LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 2/4] tickless idle cpu: Skip ticks when CPU is idle
From: Srivatsa Vaddagiri @ 2006-04-10 12:23 UTC (permalink / raw)
  To: Kumar Gala; +Cc: sri_vatsa_v, paulus, linuxppc-dev
In-Reply-To: <981C3B4E-7336-403D-AF58-3B36AA071866@kernel.crashing.org>

On Fri, Apr 07, 2006 at 09:16:58AM -0500, Kumar Gala wrote:
> >+config NO_IDLE_HZ
> >+	depends on EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_MAPLE)
> >+	bool "Switch off timer ticks on idle CPUs"
> >+	help
> >+	  Switches the HZ timer interrupts off when a CPU is idle.
> >+
> 
> any reason not to provide this for all 6xx class processors?

I think the same patch would work mostly for 6xx cpus as well. I however
dont think have any hardware to test it. If I am not mistaken, to
support 6xx CPUs, only ppc6xx_idle needs to be modified to call stop_hz_timer 
before going into power-save mode?


-- 
Regards,
vatsa

^ permalink raw reply

* RE: Accessing physical memory
From: Fillod Stephane @ 2006-04-10 12:43 UTC (permalink / raw)
  To: Antonio Di Bacco, linuxppc-embedded

Antonio Di Bacco wrote:
>How can I access the physical memory? Can I MMAP for example /dev/mem?
Is=20
>there a simpler way?

Your question is a linuxppc-embedded FAQ.
It is documented in Denx's FAQ[1], and accessible through shorter
URL[2].
For more information, please follow this thread[3] (not ppc specific
actually).

[1]
http://www.denx.de/twiki/bin/view/PPCEmbedded/DeviceDrivers#Section_Acce
ssingPeripheralsFromUserSpace
[2] http://tinyurl.com/6c7th
[3] http://lists.linuxppc.org/linuxppc-embedded/200403/msg00059.html

CIAO,
--=20
Stephane

^ permalink raw reply

* [PATCH 2/2] tickless idle cpus: allow boot cpu to skip ticks
From: Srivatsa Vaddagiri @ 2006-04-10 12:19 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <17462.61423.42032.559627@cargo.ozlabs.ibm.com>

This patch (version 2) lets boot cpu to skip ticks. Tested against
2.6.17-rc1-mm1.

Signed-off-by: Srivatsa Vaddagiri <vatsa@in.ibm.com>

---

 linux-2.6.17-rc1-root/arch/powerpc/kernel/time.c |   71 ++++++++++++++++++++---
 1 file changed, 63 insertions(+), 8 deletions(-)

diff -puN arch/powerpc/kernel/time.c~boot_cpu_fix arch/powerpc/kernel/time.c
--- linux-2.6.17-rc1/arch/powerpc/kernel/time.c~boot_cpu_fix	2006-04-10 17:43:11.000000000 +0530
+++ linux-2.6.17-rc1-root/arch/powerpc/kernel/time.c	2006-04-10 17:44:32.000000000 +0530
@@ -637,6 +637,39 @@ static void iSeries_tb_recal(void)
 
 static void account_ticks(struct pt_regs *regs);
 
+static spinlock_t do_timer_cpulock = SPIN_LOCK_UNLOCKED;
+static int do_timer_cpu;	/* Which CPU should call do_timer? */
+
+static int __devinit do_timer_cpucallback(struct notifier_block *self,
+					  unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_DOWN_PREPARE:
+		spin_lock(&do_timer_cpulock);
+		if (do_timer_cpu == cpu) {
+			cpumask_t tmpmask;
+			int new_cpu;
+
+			cpus_complement(tmpmask, nohz_cpu_mask);
+			cpu_clear(cpu, tmpmask);
+			new_cpu = any_online_cpu(tmpmask);
+			if (new_cpu != NR_CPUS)
+				do_timer_cpu = new_cpu;
+		}
+		spin_unlock(&do_timer_cpulock);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata do_timer_notifier =
+{
+	.notifier_call = do_timer_cpucallback
+};
+
 /* Returns 1 if this CPU was set in the mask */
 static inline int clear_hzless_mask(void)
 {
@@ -645,8 +678,12 @@ static inline int clear_hzless_mask(void
 
 	if (unlikely(cpu_isset(cpu, nohz_cpu_mask))) {
 		cpu_clear(cpu, nohz_cpu_mask);
-		rc = 1;
-	}
+		spin_lock(&do_timer_cpulock);
+		if (do_timer_cpu == NR_CPUS)
+			do_timer_cpu = cpu;
+		spin_unlock(&do_timer_cpulock);
+  		rc = 1;
+  	}
 
 	return rc;
 }
@@ -684,6 +721,15 @@ void stop_hz_timer(void)
 		return;
 	}
 
+	spin_lock(&do_timer_cpulock);
+	if (do_timer_cpu == cpu) {
+		cpumask_t tmpmask;
+
+		cpus_complement(tmpmask, nohz_cpu_mask);
+		do_timer_cpu = any_online_cpu(tmpmask);
+	}
+	spin_unlock(&do_timer_cpulock);
+
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
@@ -716,6 +762,7 @@ void start_hz_timer(struct pt_regs *regs
 
 #else
 static inline int clear_hzless_mask(void) { return 0;}
+#define do_timer_cpu	boot_cpuid
 #endif
 
 static void account_ticks(struct pt_regs *regs)
@@ -742,16 +789,15 @@ static void account_ticks(struct pt_regs
 		if (!cpu_is_offline(cpu))
 			account_process_time(regs);
 
-		/*
-		 * No need to check whether cpu is offline here; boot_cpuid
-		 * should have been fixed up by now.
-		 */
-		if (cpu != boot_cpuid)
+		if (cpu != do_timer_cpu)
 			continue;
 
 		write_seqlock(&xtime_lock);
 		tb_last_jiffy += tb_ticks_per_jiffy;
-		tb_last_stamp = per_cpu(last_jiffy, cpu);
+		tb_last_stamp += tb_ticks_per_jiffy;
+		/* Handle RTCL overflow on 601 */
+		if (__USE_RTC() && tb_last_stamp >= 1000000000)
+			tb_last_stamp -= 1000000000;
 		do_timer(regs);
 		timer_recalc_offset(tb_last_jiffy);
 		timer_check_rtc();
@@ -836,6 +882,13 @@ void __init smp_space_timers(unsigned in
 	unsigned long offset = tb_ticks_per_jiffy / max_cpus;
 	unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
 
+#ifdef CONFIG_NO_IDLE_HZ
+	/* Don't space timers - we want to let any CPU call do_timer to
+	 * increment xtime.
+	 */
+	half = offset = 0;
+#endif
+
 	/* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
 	previous_tb -= tb_ticks_per_jiffy;
 	/*
@@ -1051,6 +1104,8 @@ void __init time_init(void)
 	calc_cputime_factors();
 #ifdef CONFIG_NO_IDLE_HZ
 	max_skip = __USE_RTC() ? HZ : MAX_DEC_COUNT / tb_ticks_per_jiffy;
+	do_timer_cpu = boot_cpuid;
+	register_cpu_notifier(&do_timer_notifier);
 #endif
 
 	/*

_
-- 
Regards,
vatsa

^ permalink raw reply

* [PATCH 1/2] tickless idle cpus: core patch - v2
From: Srivatsa Vaddagiri @ 2006-04-10 12:18 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <17462.61423.42032.559627@cargo.ozlabs.ibm.com>

This is the v2 of the core patch to skip ticks when a CPU is idle.
Changes since v1:

	-  fix the buggy call to stop_hz_timer in idle_power4.S (hopefully it 
	   is correct now!).
	-  Dont allow boot cpu to skip ticks (a follow-on patch will
	   remove this restriction)

Signed-off-by: Srivatsa Vaddagiri <vatsa@in.ibm.com>

---

 linux-2.6.17-rc1-root/arch/powerpc/Kconfig                   |    6 
 linux-2.6.17-rc1-root/arch/powerpc/kernel/idle_power4.S      |    5 
 linux-2.6.17-rc1-root/arch/powerpc/kernel/irq.c              |    3 
 linux-2.6.17-rc1-root/arch/powerpc/kernel/time.c             |  143 +++++++++--
 linux-2.6.17-rc1-root/arch/powerpc/kernel/traps.c            |    1 
 linux-2.6.17-rc1-root/arch/powerpc/platforms/pseries/setup.c |    6 
 linux-2.6.17-rc1-root/include/asm-powerpc/time.h             |    8 
 7 files changed, 147 insertions(+), 25 deletions(-)

diff -puN arch/powerpc/kernel/time.c~no_idle_hz arch/powerpc/kernel/time.c
--- linux-2.6.17-rc1/arch/powerpc/kernel/time.c~no_idle_hz	2006-04-09 10:40:58.000000000 +0530
+++ linux-2.6.17-rc1-root/arch/powerpc/kernel/time.c	2006-04-10 14:32:04.000000000 +0530
@@ -633,40 +633,97 @@ static void iSeries_tb_recal(void)
 }
 #endif
 
-/*
- * For iSeries shared processors, we have to let the hypervisor
- * set the hardware decrementer.  We set a virtual decrementer
- * in the lppaca and call the hypervisor if the virtual
- * decrementer is less than the current value in the hardware
- * decrementer. (almost always the new decrementer value will
- * be greater than the current hardware decementer so the hypervisor
- * call will not be needed)
- */
+#ifdef CONFIG_NO_IDLE_HZ
 
-/*
- * timer_interrupt - gets called when the decrementer overflows,
- * with interrupts disabled.
+static void account_ticks(struct pt_regs *regs);
+
+/* Returns 1 if this CPU was set in the mask */
+static inline int clear_hzless_mask(void)
+{
+	int cpu = smp_processor_id();
+	int rc = 0;
+
+	if (unlikely(cpu_isset(cpu, nohz_cpu_mask))) {
+		cpu_clear(cpu, nohz_cpu_mask);
+		rc = 1;
+	}
+
+	return rc;
+}
+
+#define MAX_DEC_COUNT	UINT_MAX	/* Decrementer is 32-bit */
+static int min_skip = 2;		/* Minimum number of ticks to skip */
+static int max_skip;			/* Maximum number of ticks to skip */
+
+
+int sysctl_hz_timer = 1;
+
+/* Defer timer interrupt for as long as possible. This is accomplished by
+ * programming the decrementer to a suitable value such that it raises the
+ * exception after desired interval. This features allows CPUs to
+ * be used more efficiently in virtualized environments and/or allows for
+ * lower power consumption.
+ *
+ * Called with interrupts disabled on an idle CPU. Caller has to ensure that
+ * idle loop is not exited w/o start_hz_timer being called via an interrupt
+ * to restore timer interrupt frequency.
  */
-void timer_interrupt(struct pt_regs * regs)
+
+void stop_hz_timer(void)
 {
+	unsigned long cpu = smp_processor_id(), seq, delta;
 	int next_dec;
-	int cpu = smp_processor_id();
-	unsigned long ticks;
 
-#ifdef CONFIG_PPC32
-	if (atomic_read(&ppc_n_lost_interrupts) != 0)
-		do_IRQ(regs);
-#endif
+	if (sysctl_hz_timer != 0 || cpu == boot_cpuid)
+		return;
 
-	irq_enter();
+	cpu_set(cpu, nohz_cpu_mask);
+	mb();
+	if (rcu_pending(cpu) || local_softirq_pending()) {
+		cpu_clear(cpu, nohz_cpu_mask);
+		return;
+	}
 
-	profile_tick(CPU_PROFILING, regs);
-	calculate_steal_time();
+	do {
+		seq = read_seqbegin(&xtime_lock);
 
-#ifdef CONFIG_PPC_ISERIES
-	get_lppaca()->int_dword.fields.decr_int = 0;
+		delta = next_timer_interrupt() - jiffies;
+
+		if (delta < min_skip) {
+			cpu_clear(cpu, nohz_cpu_mask);
+			return;
+		}
+
+		if (delta > max_skip)
+			delta = max_skip;
+
+		next_dec = tb_last_stamp + delta * tb_ticks_per_jiffy;
+
+	} while (read_seqretry(&xtime_lock, seq));
+
+	next_dec -= get_tb();
+	set_dec(next_dec);
+
+	return;
+}
+
+/* Take into account skipped ticks and restore the HZ timer frequency */
+void start_hz_timer(struct pt_regs *regs)
+{
+	if (clear_hzless_mask())
+		account_ticks(regs);
+}
+
+#else
+static inline int clear_hzless_mask(void) { return 0;}
 #endif
 
+static void account_ticks(struct pt_regs *regs)
+{
+	int next_dec;
+	int cpu = smp_processor_id();
+	unsigned long ticks;
+
 	while ((ticks = tb_ticks_since(per_cpu(last_jiffy, cpu)))
 	       >= tb_ticks_per_jiffy) {
 		/* Update last_jiffy */
@@ -703,6 +760,41 @@ void timer_interrupt(struct pt_regs * re
 	
 	next_dec = tb_ticks_per_jiffy - ticks;
 	set_dec(next_dec);
+}
+
+/*
+ * For iSeries shared processors, we have to let the hypervisor
+ * set the hardware decrementer.  We set a virtual decrementer
+ * in the lppaca and call the hypervisor if the virtual
+ * decrementer is less than the current value in the hardware
+ * decrementer. (almost always the new decrementer value will
+ * be greater than the current hardware decementer so the hypervisor
+ * call will not be needed)
+ */
+
+/*
+ * timer_interrupt - gets called when the decrementer overflows,
+ * with interrupts disabled.
+ */
+void timer_interrupt(struct pt_regs * regs)
+{
+#ifdef CONFIG_PPC32
+	if (atomic_read(&ppc_n_lost_interrupts) != 0)
+		do_IRQ(regs);
+#endif
+
+	irq_enter();
+
+	clear_hzless_mask();
+
+	profile_tick(CPU_PROFILING, regs);
+	calculate_steal_time();
+
+#ifdef CONFIG_PPC_ISERIES
+	get_lppaca()->int_dword.fields.decr_int = 0;
+#endif
+
+	account_ticks(regs);
 
 #ifdef CONFIG_PPC_ISERIES
 	if (hvlpevent_is_pending())
@@ -957,6 +1049,9 @@ void __init time_init(void)
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
 	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
 	calc_cputime_factors();
+#ifdef CONFIG_NO_IDLE_HZ
+	max_skip = __USE_RTC() ? HZ : MAX_DEC_COUNT / tb_ticks_per_jiffy;
+#endif
 
 	/*
 	 * Calculate the length of each tick in ns.  It will not be
diff -puN arch/powerpc/kernel/irq.c~no_idle_hz arch/powerpc/kernel/irq.c
--- linux-2.6.17-rc1/arch/powerpc/kernel/irq.c~no_idle_hz	2006-04-09 10:40:58.000000000 +0530
+++ linux-2.6.17-rc1-root/arch/powerpc/kernel/irq.c	2006-04-09 10:40:59.000000000 +0530
@@ -60,6 +60,7 @@
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/paca.h>
 #endif
+#include <asm/time.h>
 
 int __irq_offset_value;
 #ifdef CONFIG_PPC32
@@ -189,6 +190,8 @@ void do_IRQ(struct pt_regs *regs)
 
         irq_enter();
 
+	start_hz_timer(regs);
+
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 2KB free? */
 	{
diff -puN include/asm-powerpc/time.h~no_idle_hz include/asm-powerpc/time.h
--- linux-2.6.17-rc1/include/asm-powerpc/time.h~no_idle_hz	2006-04-09 10:40:59.000000000 +0530
+++ linux-2.6.17-rc1-root/include/asm-powerpc/time.h	2006-04-09 10:40:59.000000000 +0530
@@ -198,6 +198,14 @@ static inline unsigned long tb_ticks_sin
 	return get_tbl() - tstamp;
 }
 
+#ifdef CONFIG_NO_IDLE_HZ
+extern void stop_hz_timer(void);
+extern void start_hz_timer(struct pt_regs *);
+#else
+static inline void stop_hz_timer(void) { }
+static inline void start_hz_timer(struct pt_regs *regs) { }
+#endif
+
 #define mulhwu(x,y) \
 ({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
 
diff -puN arch/powerpc/Kconfig~no_idle_hz arch/powerpc/Kconfig
--- linux-2.6.17-rc1/arch/powerpc/Kconfig~no_idle_hz	2006-04-09 10:40:59.000000000 +0530
+++ linux-2.6.17-rc1-root/arch/powerpc/Kconfig	2006-04-09 10:40:59.000000000 +0530
@@ -593,6 +593,12 @@ config HOTPLUG_CPU
 
 	  Say N if you are unsure.
 
+config NO_IDLE_HZ
+	depends on EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_MAPLE)
+	bool "Switch off timer ticks on idle CPUs"
+	help
+	  Switches the HZ timer interrupts off when a CPU is idle.
+
 config KEXEC
 	bool "kexec system call (EXPERIMENTAL)"
 	depends on PPC_MULTIPLATFORM && EXPERIMENTAL
diff -puN arch/powerpc/kernel/traps.c~no_idle_hz arch/powerpc/kernel/traps.c
--- linux-2.6.17-rc1/arch/powerpc/kernel/traps.c~no_idle_hz	2006-04-09 10:40:59.000000000 +0530
+++ linux-2.6.17-rc1-root/arch/powerpc/kernel/traps.c	2006-04-09 10:40:59.000000000 +0530
@@ -875,6 +875,7 @@ void altivec_unavailable_exception(struc
 
 void performance_monitor_exception(struct pt_regs *regs)
 {
+	start_hz_timer(regs);
 	perf_irq(regs);
 }
 
diff -puN arch/powerpc/platforms/pseries/setup.c~no_idle_hz arch/powerpc/platforms/pseries/setup.c
--- linux-2.6.17-rc1/arch/powerpc/platforms/pseries/setup.c~no_idle_hz	2006-04-09 10:40:59.000000000 +0530
+++ linux-2.6.17-rc1-root/arch/powerpc/platforms/pseries/setup.c	2006-04-09 10:40:59.000000000 +0530
@@ -463,8 +463,10 @@ static void pseries_dedicated_idle_sleep
 	 * very low priority.  The cede enables interrupts, which
 	 * doesn't matter here.
 	 */
-	if (!lppaca[cpu ^ 1].idle || poll_pending() == H_PENDING)
+	if (!lppaca[cpu ^ 1].idle || poll_pending() == H_PENDING) {
+		stop_hz_timer();
 		cede_processor();
+	}
 
 out:
 	HMT_medium();
@@ -479,6 +481,8 @@ static void pseries_shared_idle_sleep(vo
 	 */
 	get_lppaca()->idle = 1;
 
+	stop_hz_timer();
+
 	/*
 	 * Yield the processor to the hypervisor.  We return if
 	 * an external interrupt occurs (which are driven prior
diff -puN arch/powerpc/kernel/idle_power4.S~no_idle_hz arch/powerpc/kernel/idle_power4.S
--- linux-2.6.17-rc1/arch/powerpc/kernel/idle_power4.S~no_idle_hz	2006-04-09 10:40:59.000000000 +0530
+++ linux-2.6.17-rc1-root/arch/powerpc/kernel/idle_power4.S	2006-04-10 14:50:36.000000000 +0530
@@ -30,6 +30,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
 	cmpwi	0,r4,0
 	beqlr
 
+	mflr	r0
+	std	r0,16(r1)
+	bl	.stop_hz_timer
+	ld	r0,16(r1)
+	mtlr	r0
 	/* Go to NAP now */
 BEGIN_FTR_SECTION
 	DSSALL

_

-- 
Regards,
vatsa

^ permalink raw reply

* Re: [PATCH 1/4] tickless idle cpu - Allow any CPU to update jiffies
From: Srivatsa Vaddagiri @ 2006-04-10 11:49 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <17462.61423.42032.559627@cargo.ozlabs.ibm.com>

On Sat, Apr 08, 2006 at 09:04:15AM +1000, Paul Mackerras wrote:
> Srivatsa Vaddagiri writes:
> 
> > Currently, only boot CPU calls do_timer to update jiffies. This prevents
> > idle boot CPU from skipping ticks. Patch below, against 2.6.17-rc1-mm1,
> > allows jiffies to be updated from any CPU.
> 
> We have to be very careful here.  The code that keeps xtime and
> gettimeofday in sync relies on xtime being incremented as close as
> possible in time to when the timebase passes specific values.  Since
> we currently stagger the timer interrupts for the cpus throughout a
> jiffy, having cpus other than the boot cpus calling do_timer will
> break this and introduce inaccuracies.  There are also implications
> for the stolen time accounting on shared-processor LPAR systems.
> 
> I think we need to remove the staggering, thus having all cpus take
> their timer interrupt at the same time.  That way, any of them can
> call do_timer.  However we then have to be much more careful about
> possible contention, e.g. on xtime_lock.  Your patch has every cpu
> taking xtime_lock for writing rather than just the boot cpu.  I'd like
> to see if there is some way to avoid that (while still having just one
> cpu call do_timer, of course).

Paul,
	Thanks for the feedback on the patches.

Avoiding contention on xtime_lock doesnt seem to be trivial. Any
solution to it is fraught with races. Anyway, I have attempted one
solution (in the followon Patch 2/2) which keeps the overhead in timer 
interrupt handler low. 

Let me know if you have other suggestions to avoid xtime_lock
contention!

Following patches are sent in separate mails:

Patch 1/2	-	Core patch to skip ticks - v2
Patch 2/2	- 	Allow boot CPU to skip ticks - v2

The sysctl control patch and decrementer statistics patch are as before
and hence I am not resending them this time.


-- 
Regards,
vatsa

^ permalink raw reply

* [ANNOUNCE] socket-can for linux
From: Andrey Volkov @ 2006-04-10 10:39 UTC (permalink / raw)
  To: linuxppc-embedded; +Cc: linux-kernel

Hi all,

FYI, as it pointed in subj., yesterday socket-can project finally created at
berlios.de, project page: http://developer.berlios.de/projects/socketcan/

Happy hacking.

Andrey Volkov

^ permalink raw reply

* Re: [Fastboot] [PATCH]ppc64 kexec tools rm platform fix
From: Michael Ellerman @ 2006-04-10  8:53 UTC (permalink / raw)
  To: David Wilder; +Cc: Mohan Kumar, fastboot, linuxppc-dev list
In-Reply-To: <443700CB.3090909@us.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 6035 bytes --]

Hi David,

Thanks for working on this one. Few comments below ...

On Fri, 2006-04-07 at 17:16 -0700, David Wilder wrote:
> This patch was discussed earlier on this list.  (see posting by Haren 
> Myneni <hbabu@us.ibm.com>). 
> 
> In recent kernels, the platform property is removed from the 
> /proc/device-tree.This property is used to determine whether the 
> platform is LPAR or non-lpar, and reads htab-* and tce-* properties 
> based on the platform. Fixed this issue such that read these properties 
> if exists, otherwise continue instead of exiting with an error message.

(Copied from attachment, won't apply, original here
http://lists.osdl.org/pipermail/fastboot/2006-April/002765.html)

> ---
> kexec-tools-1.101/kexec/arch/ppc64/kexec-ppc64.c.orig       2006-04-08
> 16:09:20.000000000 -0700
> +++ kexec-tools-1.101/kexec/arch/ppc64/kexec-ppc64.c    2006-04-08
> 16:23:26.000000000 -0700
> @@ -34,13 +34,8 @@
>  #include "crashdump-ppc64.h"
>  #include <arch/options.h>
>  
> -/* Platforms supported by kexec on PPC64 */
> -#define PLATFORM_PSERIES       0x0100
> -#define PLATFORM_PSERIES_LPAR  0x0101
> -
>  static struct exclude_range exclude_range[MAX_MEMORY_RANGES];
>  static unsigned long long rmo_top;
> -static unsigned int platform;
>  static struct memory_range memory_range[MAX_MEMORY_RANGES];
>  static struct memory_range base_memory_range[MAX_MEMORY_RANGES];
>  unsigned long long memory_max = 0;
> @@ -201,26 +196,6 @@ static int get_devtree_details(unsigned 
>                 }
>  
>                 if (strncmp(dentry->d_name, "chosen", 6) == 0) {
> -                       /* get platform details from /chosen node */
> -                       strcat(fname, "/linux,platform");
> -                       if ((file = fopen(fname, "r")) == NULL) {
> -                               perror(fname);
> -                               closedir(cdir);
> -                               closedir(dir);
> -                               return -1;
> -                       }
> -                       if (fread(&platform, sizeof(int), 1, file) !=
> 1) {
> -                               perror(fname);
> -                               fclose(file);
> -                               closedir(cdir);
> -                               closedir(dir);
> -                               return -1;
> -                       }
> -                       fclose(file);
> -
> -                       memset(fname, 0, sizeof(fname));
> -                       strcpy(fname, device_tree);
> -                       strcat(fname, dentry->d_name);
>                         strcat(fname, "/linux,kernel-end");
>                         if ((file = fopen(fname, "r")) == NULL) {
>                                 perror(fname);
> @@ -291,18 +266,18 @@ static int get_devtree_details(unsigned 
>                                 reserve(KDUMP_BACKUP_LIMIT,
> crash_base-KDUMP_BACKUP_LIMIT);
>                         }
>  
> -                       /* if LPAR, no need to read any more
> from /chosen */
> -                       if (platform != PLATFORM_PSERIES) {
> -                               closedir(cdir);
> -                               continue;
> -                       }
>                         memset(fname, 0, sizeof(fname));
>                         strcpy(fname, device_tree);
>                         strcat(fname, dentry->d_name);
>                         strcat(fname, "/linux,htab-base");
>                         if ((file = fopen(fname, "r")) == NULL) {
> -                               perror(fname);
>                                 closedir(cdir);
> +                               if (errno == ENOENT) {
> +                                       /* Non LPAR */
> +                                       errno = 0;
> +                                       continue;
> +                                }
> +                               perror(fname);
>                                 closedir(dir);
>                                 return -1;

I don't think you want to do the closedir() before the if. You certainly
don't need to do it twice?

>                         }
> @@ -394,23 +369,23 @@ static int get_devtree_details(unsigned 
>                         }
>                         rmo_base = ((unsigned long long *)buf)[0];
>                         rmo_top = rmo_base + ((unsigned long long
> *)buf)[1];
> -                       if (platform == PLATFORM_PSERIES) {
> -                               if (rmo_top > 0x30000000UL)
> -                                       rmo_top = 0x30000000UL;
> -                       }
> +                       if (rmo_top > 0x30000000UL)
> +                               rmo_top = 0x30000000UL;
> +
>                         fclose(file);
>                         closedir(cdir);
>                 } /* memory */
>  
>                 if (strncmp(dentry->d_name, "pci@", 4) == 0) {
> -                       if (platform != PLATFORM_PSERIES) {
> -                               closedir(cdir);
> -                               continue;
> -                       }
>                         strcat(fname, "/linux,tce-base");
>                         if ((file = fopen(fname, "r")) == NULL) {
> -                               perror(fname);
>                                 closedir(cdir);
> +                               if (errno == ENOENT) {
> +                                       /* Non LPAR */
> +                                       errno = 0;
> +                                       continue;
> +                               }
> +                               perror(fname);
>                                 closedir(dir);

Same comment here.

cheers

-- 
Michael Ellerman
IBM OzLabs

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 191 bytes --]

^ permalink raw reply

* Re: Slab errors on 4xx (STB04)
From: Eugene Surovegin @ 2006-04-10  7:14 UTC (permalink / raw)
  To: Andre Draszik; +Cc: linuxppc-embedded
In-Reply-To: <44398BD4.60904@andred.net>

On Mon, Apr 10, 2006 at 12:33:56AM +0200, Andre Draszik wrote:
> 
> Since it _seems_ to work nevertheless - is CONFIG_DEBUG_SLAB known to be
> broken on this platform?

Yes, it's very likely that CONFIG_DEBUG_SLAB is the culprit here. This 
config option changes allocation alignment (it becomes not L1 cache 
line aligned). For 4xx which has non-coherent L1 cache this with 
almost 100% probability will break DMA, resulting in memory 
corruption.

You can try changing __dma_sync() to do flush_dcache_range() even for 
DMA_FROM_DEVICE case. However, do this only to check this theory, not 
as a permanent solution :).

-- 
Eugene

^ permalink raw reply

* Re: Slab errors on 4xx (STB04)
From: Roland Dreier @ 2006-04-10  1:42 UTC (permalink / raw)
  To: Andre Draszik; +Cc: linuxppc-embedded
In-Reply-To: <44398BD4.60904@andred.net>

    Andre> Since it _seems_ to work nevertheless - is
    Andre> CONFIG_DEBUG_SLAB known to be broken on this platform?
    Andre> (Although such stack traces are only printed when doing sth
    Andre> with the USB). Or is this caused by some other (memory
    Andre> initialization?) error?

Last time I looked, I think CONFIG_DEBUG_SLAB was broken on
non-cache-coherent architectures (such as PPC 4xx).  The reason is
that the slab poisoning breaks the cacheline alignment of slabs, and
then invalidating the cache can trash stuff that slab debugging ends
up thinking is overwritten.

 - R.

^ permalink raw reply

* Slab errors on 4xx (STB04)
From: Andre Draszik @ 2006-04-09 22:33 UTC (permalink / raw)
  To: linuxppc-embedded

Hi,

I am trying to use the "ppc-soc-ohci" driver on a redwood5 based board
w/ kernel 2.6.17-rc1. It's not really inspiring confidence, because I
get tons of messages similar to these

[  595.693614] slab error in cache_free_debugcheck(): cache `size-32':
double free, or memory outside object was overwritten
[  595.704278] Call Trace:
[  595.706683] [C056BE10] [C0009DD8] show_stack+0x58/0x194 (unreliable)
[  595.712967] [C056BE40] [C0009F2C] dump_stack+0x18/0x28
[  595.718028] [C056BE50] [C006103C] __slab_error+0x2c/0x3c
[  595.723265] [C056BE60] [C0062FF8] cache_free_debugcheck+0x23c/0x2e0
[  595.729427] [C056BE90] [C0063F64] kfree+0x8c/0x118
[  595.734152] [C056BEC0] [C68F46D0] usb_get_status+0x94/0xac [usbcore]
[  595.740718] [C056BEF0] [C68EEA44] choose_configuration+0x2c/0x1b8
[usbcore]
[  595.747758] [C056BF10] [C68EECCC] usb_new_device+0xfc/0x1bc [usbcore]
[  595.754286] [C056BF30] [C68F0034] hub_port_connect_change+0x24c/0x3dc
[usbcore]
[  595.761669] [C056BF60] [C68F0530] hub_events+0x36c/0x498 [usbcore]
[  595.767955] [C056BF90] [C68F066C] hub_thread+0x10/0xf0 [usbcore]
[  595.774073] [C056BFC0] [C0037CA0] kthread+0xbc/0xc4
[  595.778889] [C056BFF0] [C000525C] kernel_thread+0x44/0x60
[  595.784228] c0dec610: redzone 1:0x5a2cf071, redzone 2:0x170fc2a5.

when accessing devices.

Since it _seems_ to work nevertheless - is CONFIG_DEBUG_SLAB known to be
broken on this platform? (Although such stack traces are only printed
when doing sth with the USB). Or is this caused by some other (memory
initialization?) error?


Greetings,
Andre'

^ permalink raw reply

* Re: Oops: machine check, sig: 7 [#1] - 16-bit Pccard - SOLVED!!!
From: Daniel Ritz @ 2006-04-09 20:57 UTC (permalink / raw)
  To: Edward Felberbaum; +Cc: linuxppc-dev, linux-pcmcia, paulus
In-Reply-To: <BAY104-F3E7D812AD1E707C145856ABC90@phx.gbl>

On Friday 07 April 2006 08.25, Edward Felberbaum wrote:
> >From: Daniel Ritz <daniel.ritz-ml@swissonline.ch>
> >To: Edward Felberbaum <efelberbaum@hotmail.com>
> >CC: "linux-pcmcia" <linux-pcmcia@lists.infradead.org>
> >Subject: Re: Oops: machine check, sig: 7 [#1] - 16-bit Pccard - CardBus OK 
> >Edward Felberbaum
> >Date: Thu, 6 Apr 2006 20:11:50 +0200
> >
> > > >Can you try booting with the boot parameter
> > > >
> > > >reserve=0xfd000000-0xfdffffff
> > > >
> > > >?
> >
> >errm...that should have been:
> >	reserve=0xfd000000,0xffffff
> >ie. reserve=start,size
> >
> > >
> > > I added the above reserve to the boot parameters, it shows up on the 
> >Kernel
> > > command line in dmesg,  but dmesg still displays
> > >
> > > pcmcia: parent PCI bridge Memory window: 0xfd000000 - 0xfdffffff
> > >
> > > I would have expected the above line to not appear - use a different 
> >memory
> > > range due to the kernel command line "reserve".
> >
> >it will...:)
> >
> >rgds
> >-daniel
> 
> I followed your advice and inserted a 3Com 589 card and there was NO Oops!  
> WOW!
> 
> I built the 3c589 driver and the card works too.
> 
> Now I'm trying to get my Belkin F5D6020 v2 Wifi card to work.
> 
> Thanks very much for your help!
> 
> I see from the dmesg output from my original post that memory ranges 
> 0xfdd7f000 and 0xfddff000 are used by the Gatwick and Heathrow mac io 
> controllers.  That explains the conflict with PCMCIA over 0xfd000000.

interesting...the memory ranges are used by other devices yet the
request_resource() call in PCMCIA succeeds,,,and PCI resources shoudn't
be there in the first place then...

ok, it's in file arch/powerpc/platforms/powermac/feature.c...
i can't see any request_resource() calls in there...so CC'ing the PPC guys..
they can sure comment...

> 
> Question, can I minimize the range of memory that is reserved 0xffffff - or 
> is it a waste of time?
> 

yeah, you probably could, but it sounds like a waste of time...

> Eddie
> 

rgds
-daniel

^ permalink raw reply

* PRAMFS
From: Antonio Di Bacco @ 2006-04-09 19:20 UTC (permalink / raw)
  To: linuxppc-embedded

Hi,
anyone knows if pramfs for kernel 2.4 is stable, anyone used it? When 
downloading it I saw that the date is very old (2004). I saw people using 
jffs2 on pram, but I think there is a big waste of memory in using it, isn't 
it? My pram is only 512KB.

Bye,
Antonio.

^ permalink raw reply

* Re: Accessing physical memory
From: dwh @ 2006-04-09  4:20 UTC (permalink / raw)
  To: Antonio Di Bacco; +Cc: linuxppc-embedded
In-Reply-To: <200604082352.55490.antonio.dibacco@aruba.it>

Quoting Antonio Di Bacco <antonio.dibacco@aruba.it>:

> How can I access the physical memory? Can I MMAP for example /dev/mem? Is
> there a simpler way?
>

Hi Antonio,

What would you like to do? If you just want some arbitrary
page of memory, then look at the 'nopage' example in Rubini,
or ask me and I'll send you some code.

If you want a specific memory location, then you'll need
to claim and ioremap the memory, and again, I can give you
some code.

So, explain a little more and I can help.

Dave


----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.

^ permalink raw reply

* Re: Accessing physical memory
From: Arnd Bergmann @ 2006-04-08 23:07 UTC (permalink / raw)
  To: linuxppc-embedded
In-Reply-To: <200604082352.55490.antonio.dibacco@aruba.it>

On Saturday 08 April 2006 23:52, Antonio Di Bacco wrote:
> How can I access the physical memory? Can I MMAP for example /dev/mem? Is 
> there a simpler way?

/dev/mem access is the most simple way. A cleaner solution is usually to
write your own simple character device driver for the stuff you want to
access in memory.

Depending on why you want to access memory, slram may be the right
driver, e.g. when you want to store a file system there.

	Arnd <><

^ permalink raw reply

* Accessing physical memory
From: Antonio Di Bacco @ 2006-04-08 21:52 UTC (permalink / raw)
  To: linuxppc-embedded

How can I access the physical memory? Can I MMAP for example /dev/mem? Is 
there a simpler way?

Bye,
Antonio. 

^ permalink raw reply

* slram
From: Antonio Di Bacco @ 2006-04-08 19:02 UTC (permalink / raw)
  To: linuxppc-embedded

Anyone knows what slram driver is meant for?

Bye,
Antonio.

^ permalink raw reply

* Re: [PATCH] PCI Error Recovery: e100 network device driver
From: Francois Romieu @ 2006-04-08  8:12 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: Greg KH, linux-kernel, jesse.brandeburg, linuxppc-dev,
	john.ronciak, jeffrey.t.kirsher, netdev, linux-pci, Jeff Garzik
In-Reply-To: <20060407231134.GN25225@austin.ibm.com>

Linas Vepstas <linas@austin.ibm.com> :
> Index: linux-2.6.17-rc1/drivers/net/e100.c
> ===================================================================
> --- linux-2.6.17-rc1.orig/drivers/net/e100.c	2006-04-07 16:21:46.000000000 -0500
> +++ linux-2.6.17-rc1/drivers/net/e100.c	2006-04-07 18:10:52.411266545 -0500
[...]
> +static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)

80 cols limit.

[...]
> +static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +	struct nic *nic = netdev_priv(netdev);
> +
> +	if (pci_enable_device(pdev)) {
> +		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");

- The driver supports {get/set}_msglevel. Please consider using netif_msg_xxx
  (see include/linux/netdevice.h).

- s/e100/DRV_NAME/ (or netdev->name, or pci_name(...) depending on the
  context).

[...]
> +static struct pci_error_handlers e100_err_handler = {
> +	.error_detected = e100_io_error_detected,
> +	.slot_reset = e100_io_slot_reset,
> +	.resume = e100_io_resume,
> +};

Nit: I'd rather follow the style in the declaration of e100_driver.

-- 
Ueimor

^ permalink raw reply

* Re: freescale lite 5200 board and kernel 2.6
From: Matthias Fechner @ 2006-04-08  8:21 UTC (permalink / raw)
  To: linuxppc-embedded
In-Reply-To: <20060406221056.GA15540@raptus.dandreoli.com>

[-- Attachment #1: Type: text/plain, Size: 381 bytes --]

Hello Domenico,

* Domenico Andreoli <cavokz@gmail.com> [07-04-06 00:10]:
> kernel is built following the instructions on your wiki, i attached
> the config file. please have a look, let me know if any check/test may
> be advised.

sry, but I have now time to try your kernel config, but I attached
mine which is working fine for me.
Maybe this helps you.


Best regards,
Matthias

[-- Attachment #2: config-mpc52xx.bz2 --]
[-- Type: application/octet-stream, Size: 4800 bytes --]

^ permalink raw reply

* Re: [PATCH] PCI Error Recovery: e100 network device driver
From: Alexey Dobriyan @ 2006-04-08  0:03 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: Greg KH, linux-kernel, jesse.brandeburg, linuxppc-dev,
	john.ronciak, jeffrey.t.kirsher, netdev, linux-pci, Jeff Garzik
In-Reply-To: <20060407231134.GN25225@austin.ibm.com>

On Fri, Apr 07, 2006 at 06:11:34PM -0500, Linas Vepstas wrote:
> --- linux-2.6.17-rc1.orig/drivers/net/e100.c
> +++ linux-2.6.17-rc1/drivers/net/e100.c

> + * @state: The current pci conneection state

connection

> +static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +
> +	/* Similar to calling e100_down(), but avoids adpater I/O. */

adapter

> +static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
> +{
> +	struct net_device *netdev = pci_get_drvdata(pdev);
> +	struct nic *nic = netdev_priv(netdev);
> +
> +	if (pci_enable_device(pdev)) {
> +		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	}
> +	pci_set_master(pdev);
> +
> +	/* Only one device per card can do a reset */
> +	if (0 != PCI_FUNC(pdev->devfn))

Wrong order.

^ permalink raw reply

* Re: [PATCH 2/4] tickless idle cpu: Skip ticks when CPU is idle
From: Paul Mackerras @ 2006-04-07 23:40 UTC (permalink / raw)
  To: vatsa; +Cc: sri_vatsa_v, linuxppc-dev
In-Reply-To: <20060407063131.GB22416@in.ibm.com>

Srivatsa Vaddagiri writes:

> diff -puN arch/powerpc/kernel/idle_power4.S~no_idle_hz arch/powerpc/kernel/idle_power4.S
> --- linux-2.6.17-rc1/arch/powerpc/kernel/idle_power4.S~no_idle_hz	2006-04-07 04:14:39.000000000 +0530
> +++ linux-2.6.17-rc1-root/arch/powerpc/kernel/idle_power4.S	2006-04-07 04:14:58.000000000 +0530
> @@ -30,6 +30,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
>  	cmpwi	0,r4,0
>  	beqlr
>  
> +	mflr	r4
> +	bl	.stop_hz_timer
> +	mtlr	r4

This won't work - r4 is volatile across function calls, that is,
stop_hz_timer() could change r4 and is not required to save and
restore it.

Paul.

^ permalink raw reply

* Re: [PATCH] PCI Error Recovery: e100 network device driver
From: Linas Vepstas @ 2006-04-07 23:11 UTC (permalink / raw)
  To: Greg KH
  Cc: netdev, linux-kernel, jesse.brandeburg, linuxppc-dev,
	john.ronciak, jeffrey.t.kirsher, linux-pci, Jeff Garzik
In-Reply-To: <20060406224643.GA6278@kroah.com>

On Thu, Apr 06, 2006 at 03:46:43PM -0700, Greg KH wrote:
> On Thu, Apr 06, 2006 at 05:24:00PM -0500, Linas Vepstas wrote:
> > +	if(pci_enable_device(pdev)) {
> 
> Add a space after "if" and before "(" please.

I guess I'm immune to learning from experience. :-/

Here's a new improved patch.

--linas

[PATCH] PCI Error Recovery: e100 network device driver

Various PCI bus errors can be signaled by newer PCI controllers.  This
patch adds the PCI error recovery callbacks to the intel ethernet e100
device driver. The patch has been tested, and appears to work well.

Signed-off-by: Linas Vepstas <linas@linas.org>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>

----

 drivers/net/e100.c |   75 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 75 insertions(+)

Index: linux-2.6.17-rc1/drivers/net/e100.c
===================================================================
--- linux-2.6.17-rc1.orig/drivers/net/e100.c	2006-04-07 16:21:46.000000000 -0500
+++ linux-2.6.17-rc1/drivers/net/e100.c	2006-04-07 18:10:52.411266545 -0500
@@ -2780,6 +2780,80 @@ static void e100_shutdown(struct pci_dev
 		DPRINTK(PROBE,ERR, "Error enabling wake\n");
 }
 
+/* ------------------ PCI Error Recovery infrastructure  -------------- */
+/**
+ * e100_io_error_detected - called when PCI error is detected.
+ * @pdev: Pointer to PCI device
+ * @state: The current pci conneection state
+ */
+static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+
+	/* Similar to calling e100_down(), but avoids adpater I/O. */
+	netdev->stop(netdev);
+
+	/* Detach; put netif into state similar to hotplug unplug. */
+	netif_poll_enable(netdev);
+	netif_device_detach(netdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * e100_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	if (pci_enable_device(pdev)) {
+		printk(KERN_ERR "e100: Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	pci_set_master(pdev);
+
+	/* Only one device per card can do a reset */
+	if (0 != PCI_FUNC(pdev->devfn))
+		return PCI_ERS_RESULT_RECOVERED;
+	e100_hw_reset(nic);
+	e100_phy_init(nic);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * e100_io_resume - resume normal operations
+ * @pdev: Pointer to PCI device
+ *
+ * Resume normal operations after an error recovery
+ * sequence has been completed.
+ */
+static void e100_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	/* ack any pending wake events, disable PME */
+	pci_enable_wake(pdev, 0, 0);
+
+	netif_device_attach(netdev);
+	if (netif_running(netdev)) {
+		e100_open(netdev);
+		mod_timer(&nic->watchdog, jiffies);
+	}
+}
+
+static struct pci_error_handlers e100_err_handler = {
+	.error_detected = e100_io_error_detected,
+	.slot_reset = e100_io_slot_reset,
+	.resume = e100_io_resume,
+};
 
 static struct pci_driver e100_driver = {
 	.name =         DRV_NAME,
@@ -2791,6 +2865,7 @@ static struct pci_driver e100_driver = {
 	.resume =       e100_resume,
 #endif
 	.shutdown =     e100_shutdown,
+	.err_handler = &e100_err_handler,
 };
 
 static int __init e100_init_module(void)

^ permalink raw reply

* Re: [PATCH 1/4] tickless idle cpu - Allow any CPU to update jiffies
From: Paul Mackerras @ 2006-04-07 23:04 UTC (permalink / raw)
  To: vatsa; +Cc: sri_vatsa_v, linuxppc-dev
In-Reply-To: <20060407063044.GA22416@in.ibm.com>

Srivatsa Vaddagiri writes:

> Currently, only boot CPU calls do_timer to update jiffies. This prevents
> idle boot CPU from skipping ticks. Patch below, against 2.6.17-rc1-mm1,
> allows jiffies to be updated from any CPU.

We have to be very careful here.  The code that keeps xtime and
gettimeofday in sync relies on xtime being incremented as close as
possible in time to when the timebase passes specific values.  Since
we currently stagger the timer interrupts for the cpus throughout a
jiffy, having cpus other than the boot cpus calling do_timer will
break this and introduce inaccuracies.  There are also implications
for the stolen time accounting on shared-processor LPAR systems.

I think we need to remove the staggering, thus having all cpus take
their timer interrupt at the same time.  That way, any of them can
call do_timer.  However we then have to be much more careful about
possible contention, e.g. on xtime_lock.  Your patch has every cpu
taking xtime_lock for writing rather than just the boot cpu.  I'd like
to see if there is some way to avoid that (while still having just one
cpu call do_timer, of course).

Regards,
Paul.

^ permalink raw reply

* Virtex-4 FX12 Mini-Module support
From: Aidan Williams @ 2006-04-07 22:42 UTC (permalink / raw)
  To: linuxppc-embedded
In-Reply-To: <4418EA57.6060308@petalogix.com>

[-- Attachment #1: Type: text/plain, Size: 828 bytes --]

Hi All,

I'm using the UQ powerpc uclinux code on the
Memec Virtex-4 FX12 Mini-Module Development Kit.

I have attached a patch with our modifications:

   - switch to set cache policy (OFF, WriteThru, WriteBack)
   - switch to enable PPC405 CPU_213 errata workaround
   - cosmetic update to cputable
   - view ccr0 register in /proc/cpu

The patch is against:
http://www.itee.uq.edu.au/~pml/uclinux_powerpc/linuxppc-2.4-20051021.tgz


The specific modules/chips we're using have a silicon bug,
See the euphemistically named "Solution 13:"
http://www.xilinx.com/xlnx/xil_ans_display.jsp?iLanguageID=1&iCountryID=1&getPagePath=20658

The board boots and runs reliably with the caches OFF.
WriteThru and WriteBack caching cause memory corruption and
this is why we implemented the cache policy switch.


regards
	aidan
____
:wq!


[-- Attachment #2: virtex-4-fx12-minimodule.txt --]
[-- Type: plain/text, Size: 23729 bytes --]

^ permalink raw reply

* Re: question about Linux 2.6 with Xilinx ML-403
From: Grant Likely @ 2006-04-07 22:18 UTC (permalink / raw)
  To: yding, linuxppc-embedded list
In-Reply-To: <4436BD2B.9050306@lnxw.com>

On 4/7/06, yding <yding@lnxw.com> wrote:
>  HI, Grant,
>
>  I found this message :
> http://patchwork.ozlabs.org/linuxppc/patch?id=3D3841 on
> Internet.
>  It looks like you created some patch files for supporting Linux 2.6 with
> Xilinx ML-403.
>
> how can download the whole kernel source tree with your patched files (vi=
a
> cvs or bitkeeper) ?

I believe they are now in Linus' mainline git tree.  If not, they are
in Paul's powerpc git tree.

BTW, please CC the linuxppc-embedded mailing list when emailing me directly=
.

Cheers,
g.
--
Grant Likely, B.Sc. P.Eng.
Secret Lab Technologies Ltd.
(403) 399-0195

^ permalink raw reply

* [PATCH] powerpc/pseries: clear PCI failure counter if no new failures.
From: Linas Vepstas @ 2006-04-07 21:18 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, linux-pci, linux-kernel


[PATCH] powerpc/pseries: clear PCI failure counter if no new failures.

The current PCI error recovery system keeps track of the number of 
PCI card resets, and refuses to bring a card back up if this number 
is too large. The goal of doing this was to avoid an infinite loop 
of resets if a card is obviously dead.  However, if the failures are
rare, but the machine has a high uptime, this mechanism might still
be triggered; this is too harsh.

This patch will avoids this problem by decrementing the fail count 
after an hour. Thus, as long as a pci card BSOD's less than 6 times 
an hour, it will continue to be reset indefinitely. If it's failure 
rate is greater than that, it will be taken off-line permanently.

This patch is larger than it might otherwise be because it 
changes indentation by removing a pointless while-loop. The while 
loop is not needed, as the handler is invoked once fo each event 
(by schedule_work()); the loop is leftover cruft from an earlier 
implementation. 

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>

----
 arch/powerpc/platforms/pseries/eeh_driver.c |   13 +++---
 arch/powerpc/platforms/pseries/eeh_event.c  |   60 +++++++++++++++-------------
 include/asm-powerpc/eeh_event.h             |   10 ++--
 3 files changed, 45 insertions(+), 38 deletions(-)

Index: linux-2.6.17-rc1/arch/powerpc/platforms/pseries/eeh_driver.c
===================================================================
--- linux-2.6.17-rc1.orig/arch/powerpc/platforms/pseries/eeh_driver.c	2006-04-04 15:28:59.000000000 -0500
+++ linux-2.6.17-rc1/arch/powerpc/platforms/pseries/eeh_driver.c	2006-04-07 16:08:27.000000000 -0500
@@ -23,9 +23,8 @@
  *
  */
 #include <linux/delay.h>
-#include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/notifier.h>
+#include <linux/irq.h>
 #include <linux/pci.h>
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
@@ -250,7 +249,7 @@ static int eeh_reset_device (struct pci_
  */
 #define MAX_WAIT_FOR_RECOVERY 15
 
-void handle_eeh_events (struct eeh_event *event)
+struct pci_dn * handle_eeh_events (struct eeh_event *event)
 {
 	struct device_node *frozen_dn;
 	struct pci_dn *frozen_pdn;
@@ -265,7 +264,7 @@ void handle_eeh_events (struct eeh_event
 	if (!frozen_dn) {
 		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n",
 		        pci_name(event->dev));
-		return;
+		return NULL;
 	}
 
 	/* There are two different styles for coming up with the PE.
@@ -280,7 +279,7 @@ void handle_eeh_events (struct eeh_event
 	if (!frozen_bus) {
 		printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n",
 		        frozen_dn->full_name);
-		return;
+		return NULL;
 	}
 
 #if 0
@@ -355,7 +354,7 @@ void handle_eeh_events (struct eeh_event
 	/* Tell all device drivers that they can resume operations */
 	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
 
-	return;
+	return frozen_pdn;
 	
 excess_failures:
 	/*
@@ -384,6 +383,8 @@ perm_error:
 
 	/* Shut down the device drivers for good. */
 	pcibios_remove_pci_devices(frozen_bus);
+
+	return NULL;
 }
 
 /* ---------- end of file ---------- */
Index: linux-2.6.17-rc1/arch/powerpc/platforms/pseries/eeh_event.c
===================================================================
--- linux-2.6.17-rc1.orig/arch/powerpc/platforms/pseries/eeh_event.c	2006-04-04 15:28:59.000000000 -0500
+++ linux-2.6.17-rc1/arch/powerpc/platforms/pseries/eeh_event.c	2006-04-05 09:56:38.000000000 -0500
@@ -18,6 +18,7 @@
  * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
  */
 
+#include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/pci.h>
@@ -56,38 +57,43 @@ static int eeh_event_handler(void * dumm
 {
 	unsigned long flags;
 	struct eeh_event	*event;
+	struct pci_dn *pdn;
 
 	daemonize ("eehd");
+	set_current_state(TASK_INTERRUPTIBLE);
 
-	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
+	spin_lock_irqsave(&eeh_eventlist_lock, flags);
+	event = NULL;
+
+	/* Unqueue the event, get ready to process. */
+	if (!list_empty(&eeh_eventlist)) {
+		event = list_entry(eeh_eventlist.next, struct eeh_event, list);
+		list_del(&event->list);
+	}
+	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
 
-		spin_lock_irqsave(&eeh_eventlist_lock, flags);
-		event = NULL;
+	if (event == NULL)
+		return 0;
 
-		/* Unqueue the event, get ready to process. */
-		if (!list_empty(&eeh_eventlist)) {
-			event = list_entry(eeh_eventlist.next, struct eeh_event, list);
-			list_del(&event->list);
-		}
-		spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-
-		if (event == NULL)
-			break;
-
-		/* Serialize processing of EEH events */
-		mutex_lock(&eeh_event_mutex);
-		eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
-
-		printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
-		       pci_name(event->dev));
-
-		handle_eeh_events(event);
-
-		eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
-		pci_dev_put(event->dev);
-		kfree(event);
-		mutex_unlock(&eeh_event_mutex);
+	/* Serialize processing of EEH events */
+	mutex_lock(&eeh_event_mutex);
+	eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
+
+	printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
+	       pci_name(event->dev));
+
+	pdn = handle_eeh_events(event);
+
+	eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
+	pci_dev_put(event->dev);
+	kfree(event);
+	mutex_unlock(&eeh_event_mutex);
+
+	/* If there are no new errors after an hour, clear the counter. */
+	if (pdn && pdn->eeh_freeze_count>0) {
+		msleep_interruptible (3600*1000);
+		if (pdn->eeh_freeze_count>0)
+			pdn->eeh_freeze_count--;
 	}
 
 	return 0;
Index: linux-2.6.17-rc1/include/asm-powerpc/eeh_event.h
===================================================================
--- linux-2.6.17-rc1.orig/include/asm-powerpc/eeh_event.h	2006-03-19 23:53:29.000000000 -0600
+++ linux-2.6.17-rc1/include/asm-powerpc/eeh_event.h	2006-04-04 15:37:22.000000000 -0500
@@ -18,8 +18,8 @@
  * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
  */
 
-#ifndef ASM_PPC64_EEH_EVENT_H
-#define ASM_PPC64_EEH_EVENT_H
+#ifndef ASM_POWERPC_EEH_EVENT_H
+#define ASM_POWERPC_EEH_EVENT_H
 #ifdef __KERNEL__
 
 /** EEH event -- structure holding pci controller data that describes
@@ -39,7 +39,7 @@ struct eeh_event {
  * @dev pci device
  *
  * This routine builds a PCI error event which will be delivered
- * to all listeners on the peh_notifier_chain.
+ * to all listeners on the eeh_notifier_chain.
  *
  * This routine can be called within an interrupt context;
  * the actual event will be delivered in a normal context
@@ -51,7 +51,7 @@ int eeh_send_failure_event (struct devic
                             int time_unavail);
 
 /* Main recovery function */
-void handle_eeh_events (struct eeh_event *);
+struct pci_dn * handle_eeh_events (struct eeh_event *);
 
 #endif /* __KERNEL__ */
-#endif /* ASM_PPC64_EEH_EVENT_H */
+#endif /* ASM_POWERPC_EEH_EVENT_H */

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox