linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Support for relocatable kdump kernel
@ 2008-10-01 18:26 Mohan Kumar M
  2008-10-09  5:27 ` Paul Mackerras
  0 siblings, 1 reply; 27+ messages in thread
From: Mohan Kumar M @ 2008-10-01 18:26 UTC (permalink / raw)
  To: paulus; +Cc: ppcdev, kexec

Support for relocatable kdump kernel

This patch adds relocatable kernel support for kdump. With this one can
use the same regular kernel to capture the kdump. A signature (0xfeed1234)
is passed in r8 from panic code to the next kernel through kexec_sequence
and purgatory code. The signature is used to differentiate between
relocatable kdump kernel and non-kdump kernels.

The purgatory code compares the signature and sets the __kdump_flag in
head_64.S.  During the boot up, kernel code checks __kdump_flag and if it
is set, the kernel will behave as relocatable kdump kernel. This kernel
will boot at the address where it was loaded by kexec-tools ie at the
address reserved through crashkernel boot parameter.

Enabling both CONFIG_RELOCATABLE and CONFIG_CRASH_DUMP options makes the
kdump kernel as relocatable. So the same kernel can be used as
production and kdump kernel.

Signed-off-by: Mohan Kumar M <mohan@in.ibm.com>
---
 Documentation/kdump/kdump.txt          |   14 ++++++--
 arch/powerpc/Kconfig                   |    4 +-
 arch/powerpc/include/asm/kdump.h       |   16 ++++++++
 arch/powerpc/kernel/crash_dump.c       |    2 +
 arch/powerpc/kernel/head_64.S          |   60 +++++++++++++++++++++++++++++---
 arch/powerpc/kernel/iommu.c            |    2 +-
 arch/powerpc/kernel/machine_kexec.c    |    2 +
 arch/powerpc/kernel/machine_kexec_64.c |   12 ++++--
 arch/powerpc/kernel/misc_64.S          |   10 ++++--
 9 files changed, 104 insertions(+), 18 deletions(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 0705040..3f4bc84 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -109,7 +109,8 @@ There are two possible methods of using Kdump.
 2) Or use the system kernel binary itself as dump-capture kernel and there is
    no need to build a separate dump-capture kernel. This is possible
    only with the architecutres which support a relocatable kernel. As
-   of today, i386, x86_64 and ia64 architectures support relocatable kernel.
+   of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
+   kernel.
 
 Building a relocatable kernel is advantageous from the point of view that
 one does not have to build a second kernel for capturing the dump. But
@@ -207,8 +208,15 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
 Dump-capture kernel config options (Arch Dependent, ppc64)
 ----------------------------------------------------------
 
-*  Make and install the kernel and its modules. DO NOT add this kernel
-   to the boot loader configuration files.
+1) Enable "Build a kdump crash kernel" support under "Kernel" options:
+
+   CONFIG_CRASH_DUMP=y
+
+2)   Enable "Build a relocatable kernel" support
+
+   CONFIG_RELOCATABLE=y
+
+   Make and install the kernel and its modules.
 
 Dump-capture kernel config options (Arch Dependent, ia64)
 ----------------------------------------------------------
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 17c988b..f04a96a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -824,11 +824,11 @@ config PAGE_OFFSET
 	default "0xc000000000000000"
 config KERNEL_START
 	hex
-	default "0xc000000002000000" if CRASH_DUMP
+	default "0xc000000002000000" if CRASH_DUMP && !RELOCATABLE
 	default "0xc000000000000000"
 config PHYSICAL_START
 	hex
-	default "0x02000000" if CRASH_DUMP
+	default "0x02000000" if CRASH_DUMP && !RELOCATABLE
 	default "0x00000000"
 endif
 
diff --git a/arch/powerpc/include/asm/kdump.h b/arch/powerpc/include/asm/kdump.h
index f6c93c7..5308754 100644
--- a/arch/powerpc/include/asm/kdump.h
+++ b/arch/powerpc/include/asm/kdump.h
@@ -9,6 +9,12 @@
  * Reserve to the end of the FWNMI area, see head_64.S */
 #define KDUMP_RESERVE_LIMIT	0x10000 /* 64K */
 
+/*
+ * Used to differentiate between relocatable kdump kernel and other
+ * kernels
+ */
+#define KDUMP_SIGNATURE	0xfeed1234
+
 #ifdef CONFIG_CRASH_DUMP
 
 #define KDUMP_TRAMPOLINE_START	0x0100
@@ -19,11 +25,21 @@
 #endif /* CONFIG_CRASH_DUMP */
 
 #ifndef __ASSEMBLY__
+
+extern unsigned long long __kdump_flag;
+
 #ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_RELOCATABLE
+
+static inline void reserve_kdump_trampoline(void) { ; }
+static inline void setup_kdump_trampoline(void) { ; }
+
+#else
 
 extern void reserve_kdump_trampoline(void);
 extern void setup_kdump_trampoline(void);
 
+#endif /* CONFIG_RELOCATABLE */
 #else /* !CONFIG_CRASH_DUMP */
 
 static inline void reserve_kdump_trampoline(void) { ; }
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index a323c9b..eaf9d6d 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -27,6 +27,7 @@
 #define DBG(fmt...)
 #endif
 
+#ifndef CONFIG_RELOCATABLE
 void __init reserve_kdump_trampoline(void)
 {
 	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -65,6 +66,7 @@ void __init setup_kdump_trampoline(void)
 
 	DBG(" <- setup_kdump_trampoline()\n");
 }
+#endif /* CONFIG_RELOCATABLE */
 
 #ifdef CONFIG_PROC_VMCORE
 static int __init parse_elfcorehdr(char *p)
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 8934500..29c2c34 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -97,6 +97,14 @@ __secondary_hold_spinloop:
 __secondary_hold_acknowledge:
 	.llong	0x0
 
+	/* This flag is set only for kdump kernels so that */
+	/* it will be relocatable. Purgatory code user space kexec-tools */
+	/* sets this flag. Do not move this variable as purgatory code */
+	/* relies on the position of this variables */
+	.globl	__kdump_flag
+__kdump_flag:
+	.llong	0x0
+
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -1384,7 +1392,15 @@ _STATIC(__after_prom_start)
 	/* process relocations for the final address of the kernel */
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
 	sldi	r25,r25,32
-	mr	r3,r25
+#ifdef CONFIG_CRASH_DUMP
+	ld	r7,__kdump_flag@got(r2)
+	add	r7,r7,r26
+	ld	r7,0(r7)
+	cmpldi	cr0,r7,1	/* relocatable kernel ? */
+	bne	1f
+	add	r25,r25,r26
+#endif
+1:	mr	r3,r25
 	bl	.relocate
 #endif
 
@@ -1398,10 +1414,26 @@ _STATIC(__after_prom_start)
 	LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */
 	mr.	r4,r26			/* In some cases the loader may  */
 	beq	9f			/* have already put us at zero */
-	lis	r5,(copy_to_here - _stext)@ha
-	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
 	li	r6,0x100		/* Start offset, the first 0x100 */
 					/* bytes were copied earlier.	 */
+#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * Check if the kernel has to be running as relocatable kernel based on the
+ * variable __kdump_flag, if it is set the kernel is treated as relocatble
+ * kernel, otherwise it will be moved to PHYSICAL_START
+ */
+	ld	r7,__kdump_flag@got(r2)
+	ld	r7,0(r7)
+	cmpldi	cr0,r7,1
+	bne	regular
+
+	li	r5,__end_interrupts - _stext	/* just copy interrupts */
+	b	5f
+regular:
+#endif
+	lis	r5,(copy_to_here - _stext)@ha
+	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
 
 	bl	.copy_and_flush		/* copy the first n bytes	 */
 					/* this includes the code being	 */
@@ -1411,15 +1443,33 @@ _STATIC(__after_prom_start)
 	mtctr	r8
 	bctr
 
+p_end:	.llong	_end - _stext
+
 4:	/* Now copy the rest of the kernel up to _end */
 	addis	r5,r26,(p_end - _stext)@ha
 	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
-	bl	.copy_and_flush		/* copy the rest */
+#else
+	lis	r5,(copy_to_here - _stext)@ha
+	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
 
-9:	b	.start_here_multiplatform
+	bl	.copy_and_flush		/* copy the first n bytes	 */
+					/* this includes the code being	 */
+					/* executed here.		 */
+	addis	r8,r3,(4f - _stext)@ha	/* Jump to the copy of this code */
+	addi	r8,r8,(4f - _stext)@l	/* that we just made */
+	mtctr	r8
+	bctr
 
 p_end:	.llong	_end - _stext
 
+4:	/* Now copy the rest of the kernel up to _end */
+	addis	r5,r26,(p_end - _stext)@ha
+	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
+#endif
+5:	bl	.copy_and_flush		/* copy the rest */
+
+9:	b	.start_here_multiplatform
+
 /*
  * Copy routine used to copy the kernel to start at physical address 0
  * and flush and invalidate the caches as needed.
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 550a193..24f7797 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -494,7 +494,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 	spin_lock_init(&tbl->it_lock);
 
 #ifdef CONFIG_CRASH_DUMP
-	if (ppc_md.tce_get) {
+	if (ppc_md.tce_get && __kdump_flag) {
 		unsigned long index;
 		unsigned long tceval;
 		unsigned long tcecount = 0;
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index aab7688..ac2a21f 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -88,11 +88,13 @@ void __init reserve_crashkernel(void)
 
 	crash_size = crashk_res.end - crashk_res.start + 1;
 
+#ifndef CONFIG_RELOCATABLE
 	if (crashk_res.start != KDUMP_KERNELBASE)
 		printk("Crash kernel location must be 0x%x\n",
 				KDUMP_KERNELBASE);
 
 	crashk_res.start = KDUMP_KERNELBASE;
+#endif
 	crash_size = PAGE_ALIGN(crash_size);
 	crashk_res.end = crashk_res.start + crash_size - 1;
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a168514..6a45a9e 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -255,11 +255,13 @@ static union thread_union kexec_stack
 /* Our assembly helper, in kexec_stub.S */
 extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
 					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
+					void (*clear_all)(void),
+				unsigned long long kdump_flag) ATTRIB_NORET;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
 {
+	unsigned long long kdump_flag = 0;
 	/* prepare control code if any */
 
 	/*
@@ -270,8 +272,10 @@ void default_machine_kexec(struct kimage *image)
         * using debugger IPI.
         */
 
-       if (crashing_cpu == -1)
-               kexec_prepare_cpus();
+	if (crashing_cpu == -1)
+		kexec_prepare_cpus();
+	else
+		kdump_flag = KDUMP_SIGNATURE;
 
 	/* switch to a staticly allocated stack.  Based on irq stack code.
 	 * XXX: the task struct will likely be invalid once we do the copy!
@@ -284,7 +288,7 @@ void default_machine_kexec(struct kimage *image)
 	 */
 	kexec_sequence(&kexec_stack, image->start, image,
 			page_address(image->control_code_page),
-			ppc_md.hpte_clear_all);
+			ppc_md.hpte_clear_all, kdump_flag);
 	/* NOTREACHED */
 }
 
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4dd70cf..c93e5f7 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -609,10 +609,13 @@ real_mode:	/* assume normal blr return */
 
 
 /*
- * kexec_sequence(newstack, start, image, control, clear_all())
+ * kexec_sequence(newstack, start, image, control, clear_all(), kdump_flag)
  *
  * does the grungy work with stack switching and real mode switches
  * also does simple calls to other code
+ *
+ * kdump_flag says whether the next kernel should be running at the reserved
+ * load address as needed for relocatable kdump kernel
  */
 
 _GLOBAL(kexec_sequence)
@@ -645,7 +648,7 @@ _GLOBAL(kexec_sequence)
 	mr	r29,r5			/* image (virt) */
 	mr	r28,r6			/* control, unused */
 	mr	r27,r7			/* clear_all() fn desc */
-	mr	r26,r8			/* spare */
+	mr	r26,r8			/* kdump flag */
 	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
 
 	/* disable interrupts, we are overwriting kernel data next */
@@ -707,5 +710,6 @@ _GLOBAL(kexec_sequence)
 	mr	r4,r30	# start, aka phys mem offset
 	mtlr	4
 	li	r5,0
-	blr	/* image->start(physid, image->start, 0); */
+	mr	r6,r26			/* kdump_flag */
+	blr	/* image->start(physid, image->start, 0, kdump_flag); */
 #endif /* CONFIG_KEXEC */
-- 
1.5.5.1

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
  2008-10-01 18:26 Mohan Kumar M
@ 2008-10-09  5:27 ` Paul Mackerras
  2008-10-09 16:35   ` Mohan Kumar M
  0 siblings, 1 reply; 27+ messages in thread
From: Paul Mackerras @ 2008-10-09  5:27 UTC (permalink / raw)
  To: mohan; +Cc: ppcdev, kexec

Mohan Kumar M writes:

> Support for relocatable kdump kernel

[snip]

> @@ -1384,7 +1392,15 @@ _STATIC(__after_prom_start)
>  	/* process relocations for the final address of the kernel */
>  	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
>  	sldi	r25,r25,32
> -	mr	r3,r25
> +#ifdef CONFIG_CRASH_DUMP
> +	ld	r7,__kdump_flag@got(r2)
> +	add	r7,r7,r26
> +	ld	r7,0(r7)

I think it is dangerous to use an address from the GOT at this point
when we haven't called relocate() yet.  In fact those 3 instructions
can be replaced by one:

	ld	r7,__kdump_flag-_stext(r26)

since we have our base address (i.e. the address of _stext) in r26 at
this point.

> +#ifdef CONFIG_RELOCATABLE
> +#ifdef CONFIG_CRASH_DUMP
> +/*
> + * Check if the kernel has to be running as relocatable kernel based on the
> + * variable __kdump_flag, if it is set the kernel is treated as relocatble
> + * kernel, otherwise it will be moved to PHYSICAL_START
> + */
> +	ld	r7,__kdump_flag@got(r2)
> +	ld	r7,0(r7)

Here again I would rather you did

	ld	r7,__kdump_flag-_stext(r26)

since the kernel is relocated for its final location by this point,
but it is not running at the final location yet.

> +	cmpldi	cr0,r7,1
> +	bne	regular
> +
> +	li	r5,__end_interrupts - _stext	/* just copy interrupts */
> +	b	5f
> +regular:
> +#endif
> +	lis	r5,(copy_to_here - _stext)@ha
> +	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
>  
>  	bl	.copy_and_flush		/* copy the first n bytes	 */
>  					/* this includes the code being	 */
> @@ -1411,15 +1443,33 @@ _STATIC(__after_prom_start)
>  	mtctr	r8
>  	bctr
>  
> +p_end:	.llong	_end - _stext
> +
>  4:	/* Now copy the rest of the kernel up to _end */
>  	addis	r5,r26,(p_end - _stext)@ha
>  	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
> -	bl	.copy_and_flush		/* copy the rest */
> +#else
> +	lis	r5,(copy_to_here - _stext)@ha
> +	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
>  
> -9:	b	.start_here_multiplatform
> +	bl	.copy_and_flush		/* copy the first n bytes	 */
> +					/* this includes the code being	 */
> +					/* executed here.		 */
> +	addis	r8,r3,(4f - _stext)@ha	/* Jump to the copy of this code */
> +	addi	r8,r8,(4f - _stext)@l	/* that we just made */
> +	mtctr	r8
> +	bctr
>  
>  p_end:	.llong	_end - _stext
>  
> +4:	/* Now copy the rest of the kernel up to _end */
> +	addis	r5,r26,(p_end - _stext)@ha
> +	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
> +#endif
> +5:	bl	.copy_and_flush		/* copy the rest */
> +
> +9:	b	.start_here_multiplatform

You have ended up with two separate copies of the code here depending
on whether or not we have CONFIG_RELOCATABLE set.  I don't think the
code paths should be different to such an extent.  Please try to make
the ifdef as small as possible (ideally, nonexistent).

Paul.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
  2008-10-09  5:27 ` Paul Mackerras
@ 2008-10-09 16:35   ` Mohan Kumar M
  0 siblings, 0 replies; 27+ messages in thread
From: Mohan Kumar M @ 2008-10-09 16:35 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: ppcdev, kexec

Hi Paul,

Thank you for the review. I will implement the changes you suggested and 
send the patches.

Regards,
Mohan.

> Mohan Kumar M writes:
> 
>> Support for relocatable kdump kernel
> 
> [snip]
> 
>> @@ -1384,7 +1392,15 @@ _STATIC(__after_prom_start)
>>  	/* process relocations for the final address of the kernel */
>>  	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
>>  	sldi	r25,r25,32
>> -	mr	r3,r25
>> +#ifdef CONFIG_CRASH_DUMP
>> +	ld	r7,__kdump_flag@got(r2)
>> +	add	r7,r7,r26
>> +	ld	r7,0(r7)
> 
> I think it is dangerous to use an address from the GOT at this point
> when we haven't called relocate() yet.  In fact those 3 instructions
> can be replaced by one:
> 
> 	ld	r7,__kdump_flag-_stext(r26)
> 
> since we have our base address (i.e. the address of _stext) in r26 at
> this point.
> 
>> +#ifdef CONFIG_RELOCATABLE
>> +#ifdef CONFIG_CRASH_DUMP
>> +/*
>> + * Check if the kernel has to be running as relocatable kernel based on the
>> + * variable __kdump_flag, if it is set the kernel is treated as relocatble
>> + * kernel, otherwise it will be moved to PHYSICAL_START
>> + */
>> +	ld	r7,__kdump_flag@got(r2)
>> +	ld	r7,0(r7)
> 
> Here again I would rather you did
> 
> 	ld	r7,__kdump_flag-_stext(r26)
> 
> since the kernel is relocated for its final location by this point,
> but it is not running at the final location yet.
> 
>> +	cmpldi	cr0,r7,1
>> +	bne	regular
>> +
>> +	li	r5,__end_interrupts - _stext	/* just copy interrupts */
>> +	b	5f
>> +regular:
>> +#endif
>> +	lis	r5,(copy_to_here - _stext)@ha
>> +	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
>>  
>>  	bl	.copy_and_flush		/* copy the first n bytes	 */
>>  					/* this includes the code being	 */
>> @@ -1411,15 +1443,33 @@ _STATIC(__after_prom_start)
>>  	mtctr	r8
>>  	bctr
>>  
>> +p_end:	.llong	_end - _stext
>> +
>>  4:	/* Now copy the rest of the kernel up to _end */
>>  	addis	r5,r26,(p_end - _stext)@ha
>>  	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
>> -	bl	.copy_and_flush		/* copy the rest */
>> +#else
>> +	lis	r5,(copy_to_here - _stext)@ha
>> +	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
>>  
>> -9:	b	.start_here_multiplatform
>> +	bl	.copy_and_flush		/* copy the first n bytes	 */
>> +					/* this includes the code being	 */
>> +					/* executed here.		 */
>> +	addis	r8,r3,(4f - _stext)@ha	/* Jump to the copy of this code */
>> +	addi	r8,r8,(4f - _stext)@l	/* that we just made */
>> +	mtctr	r8
>> +	bctr
>>  
>>  p_end:	.llong	_end - _stext
>>  
>> +4:	/* Now copy the rest of the kernel up to _end */
>> +	addis	r5,r26,(p_end - _stext)@ha
>> +	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
>> +#endif
>> +5:	bl	.copy_and_flush		/* copy the rest */
>> +
>> +9:	b	.start_here_multiplatform
> 
> You have ended up with two separate copies of the code here depending
> on whether or not we have CONFIG_RELOCATABLE set.  I don't think the
> code paths should be different to such an extent.  Please try to make
> the ifdef as small as possible (ideally, nonexistent).
> 
> Paul.
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] Support for relocatable kdump kernel
@ 2008-10-12 23:34 Mohan Kumar M
  2008-10-13  1:30 ` Paul Mackerras
  0 siblings, 1 reply; 27+ messages in thread
From: Mohan Kumar M @ 2008-10-12 23:34 UTC (permalink / raw)
  To: paulus; +Cc: linuxppc-dev, kexec

Support for relocatable kdump kernel

This patch adds relocatable kernel support for kdump. With this one can
use the same regular kernel to capture the kdump. A signature (0xfeed1234)
is passed in r8 from panic code to the next kernel through kexec_sequence
and purgatory code. The signature is used to differentiate between
relocatable kdump kernel and non-kdump kernels.

The purgatory code compares the signature and sets the __kdump_flag in
head_64.S.  During the boot up, kernel code checks __kdump_flag and if it
is set, the kernel will behave as relocatable kdump kernel. This kernel
will boot at the address where it was loaded by kexec-tools ie at the
address reserved through crashkernel boot parameter

CONFIG_CRASH_DUMP depends on CONFIG_RELOCATABLE option to build kdump
kernel as relocatable. So the same kernel can be used as production and
kdump kernel.

This patch incorporates the changes suggested by Paul Mackerrass to avoid
GOT use and to avoid two copies of the code.

Signed-off-by: Mohan Kumar M <mohan@in.ibm.com>
---
 Documentation/kdump/kdump.txt          |   14 ++++++++++---
 arch/powerpc/Kconfig                   |    8 ++-----
 arch/powerpc/include/asm/kdump.h       |   16 +++++++++++++++
 arch/powerpc/kernel/crash_dump.c       |    2 +
 arch/powerpc/kernel/head_64.S          |   34 ++++++++++++++++++++++++++++-----
 arch/powerpc/kernel/iommu.c            |    2 -
 arch/powerpc/kernel/machine_kexec.c    |    2 +
 arch/powerpc/kernel/machine_kexec_64.c |   12 +++++++----
 arch/powerpc/kernel/misc_64.S          |   10 ++++++---
 9 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 0705040..3f4bc84 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -109,7 +109,8 @@ There are two possible methods of using Kdump.
 2) Or use the system kernel binary itself as dump-capture kernel and there is
    no need to build a separate dump-capture kernel. This is possible
    only with the architecutres which support a relocatable kernel. As
-   of today, i386, x86_64 and ia64 architectures support relocatable kernel.
+   of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
+   kernel.
 
 Building a relocatable kernel is advantageous from the point of view that
 one does not have to build a second kernel for capturing the dump. But
@@ -207,8 +208,15 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
 Dump-capture kernel config options (Arch Dependent, ppc64)
 ----------------------------------------------------------
 
-*  Make and install the kernel and its modules. DO NOT add this kernel
-   to the boot loader configuration files.
+1) Enable "Build a kdump crash kernel" support under "Kernel" options:
+
+   CONFIG_CRASH_DUMP=y
+
+2)   Enable "Build a relocatable kernel" support
+
+   CONFIG_RELOCATABLE=y
+
+   Make and install the kernel and its modules.
 
 Dump-capture kernel config options (Arch Dependent, ia64)
 ----------------------------------------------------------
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 17c988b..6b3e840 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -321,11 +321,11 @@ config KEXEC
 
 config CRASH_DUMP
 	bool "Build a kdump crash kernel"
-	depends on PPC_MULTIPLATFORM && PPC64
+	depends on PPC_MULTIPLATFORM && PPC64 && RELOCATABLE
 	help
 	  Build a kernel suitable for use as a kdump capture kernel.
-	  The kernel will be linked at a different address than normal, and
-	  so can only be used for Kdump.
+	  The same kernel binary can be used as production kernel and dump capture
+	  kernel
 
 	  Don't change this unless you know what you are doing.
 
@@ -824,11 +824,9 @@ config PAGE_OFFSET
 	default "0xc000000000000000"
 config KERNEL_START
 	hex
-	default "0xc000000002000000" if CRASH_DUMP
 	default "0xc000000000000000"
 config PHYSICAL_START
 	hex
-	default "0x02000000" if CRASH_DUMP
 	default "0x00000000"
 endif
 
diff --git a/arch/powerpc/include/asm/kdump.h b/arch/powerpc/include/asm/kdump.h
index f6c93c7..5308754 100644
--- a/arch/powerpc/include/asm/kdump.h
+++ b/arch/powerpc/include/asm/kdump.h
@@ -9,6 +9,12 @@
  * Reserve to the end of the FWNMI area, see head_64.S */
 #define KDUMP_RESERVE_LIMIT	0x10000 /* 64K */
 
+/*
+ * Used to differentiate between relocatable kdump kernel and other
+ * kernels
+ */
+#define KDUMP_SIGNATURE	0xfeed1234
+
 #ifdef CONFIG_CRASH_DUMP
 
 #define KDUMP_TRAMPOLINE_START	0x0100
@@ -19,11 +25,21 @@
 #endif /* CONFIG_CRASH_DUMP */
 
 #ifndef __ASSEMBLY__
+
+extern unsigned long long __kdump_flag;
+
 #ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_RELOCATABLE
+
+static inline void reserve_kdump_trampoline(void) { ; }
+static inline void setup_kdump_trampoline(void) { ; }
+
+#else
 
 extern void reserve_kdump_trampoline(void);
 extern void setup_kdump_trampoline(void);
 
+#endif /* CONFIG_RELOCATABLE */
 #else /* !CONFIG_CRASH_DUMP */
 
 static inline void reserve_kdump_trampoline(void) { ; }
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index a323c9b..eaf9d6d 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -27,6 +27,7 @@
 #define DBG(fmt...)
 #endif
 
+#ifndef CONFIG_RELOCATABLE
 void __init reserve_kdump_trampoline(void)
 {
 	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -65,6 +66,7 @@ void __init setup_kdump_trampoline(void)
 
 	DBG(" <- setup_kdump_trampoline()\n");
 }
+#endif /* CONFIG_RELOCATABLE */
 
 #ifdef CONFIG_PROC_VMCORE
 static int __init parse_elfcorehdr(char *p)
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index e409338..5b12b10 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -97,6 +97,14 @@ __secondary_hold_spinloop:
 __secondary_hold_acknowledge:
 	.llong	0x0
 
+	/* This flag is set only for kdump kernels so that */
+	/* it will be relocatable. Purgatory code user space kexec-tools */
+	/* sets this flag. Do not move this variable as purgatory code */
+	/* relies on the position of this variables */
+	.globl	__kdump_flag
+__kdump_flag:
+	.llong	0x0
+
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -1384,8 +1392,13 @@ _STATIC(__after_prom_start)
 	/* process relocations for the final address of the kernel */
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
 	sldi	r25,r25,32
+#ifdef CONFIG_CRASH_DUMP
+	ld	r7,__kdump_flag-_stext(r26)
+	cmpldi	cr0,r7,1	/* relocatable kernel ? */
+	bne	1f
 	add	r25,r25,r26
-	mr	r3,r25
+#endif
+1:	mr	r3,r25
 	bl	.relocate
 #endif
 
@@ -1401,9 +1414,21 @@ _STATIC(__after_prom_start)
 	beq	9f			/* have already put us at zero */
 	li	r6,0x100		/* Start offset, the first 0x100 */
 					/* bytes were copied earlier.	 */
-#ifdef CONFIG_RELOCATABLE
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * Check if the kernel has to be running as relocatable kernel based on the
+ * variable __kdump_flag, if it is set the kernel is treated as relocatble
+ * kernel, otherwise it will be moved to PHYSICAL_START
+ */
+	ld	r7,__kdump_flag-_stext(r26)
+	cmpldi	cr0,r7,1
+	bne	regular
+
 	li	r5,__end_interrupts - _stext	/* just copy interrupts */
-#else
+	b	5f
+regular:
+#endif
 	lis	r5,(copy_to_here - _stext)@ha
 	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
 
@@ -1420,8 +1445,7 @@ p_end:	.llong	_end - _stext
 4:	/* Now copy the rest of the kernel up to _end */
 	addis	r5,r26,(p_end - _stext)@ha
 	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
-#endif
-	bl	.copy_and_flush		/* copy the rest */
+5:	bl	.copy_and_flush		/* copy the rest */
 
 9:	b	.start_here_multiplatform
 
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 550a193..24f7797 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -494,7 +494,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 	spin_lock_init(&tbl->it_lock);
 
 #ifdef CONFIG_CRASH_DUMP
-	if (ppc_md.tce_get) {
+	if (ppc_md.tce_get && __kdump_flag) {
 		unsigned long index;
 		unsigned long tceval;
 		unsigned long tcecount = 0;
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index aab7688..ac2a21f 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -88,11 +88,13 @@ void __init reserve_crashkernel(void)
 
 	crash_size = crashk_res.end - crashk_res.start + 1;
 
+#ifndef CONFIG_RELOCATABLE
 	if (crashk_res.start != KDUMP_KERNELBASE)
 		printk("Crash kernel location must be 0x%x\n",
 				KDUMP_KERNELBASE);
 
 	crashk_res.start = KDUMP_KERNELBASE;
+#endif
 	crash_size = PAGE_ALIGN(crash_size);
 	crashk_res.end = crashk_res.start + crash_size - 1;
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a168514..6a45a9e 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -255,11 +255,13 @@ static union thread_union kexec_stack
 /* Our assembly helper, in kexec_stub.S */
 extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
 					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
+					void (*clear_all)(void),
+				unsigned long long kdump_flag) ATTRIB_NORET;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
 {
+	unsigned long long kdump_flag = 0;
 	/* prepare control code if any */
 
 	/*
@@ -270,8 +272,10 @@ void default_machine_kexec(struct kimage *image)
         * using debugger IPI.
         */
 
-       if (crashing_cpu == -1)
-               kexec_prepare_cpus();
+	if (crashing_cpu == -1)
+		kexec_prepare_cpus();
+	else
+		kdump_flag = KDUMP_SIGNATURE;
 
 	/* switch to a staticly allocated stack.  Based on irq stack code.
 	 * XXX: the task struct will likely be invalid once we do the copy!
@@ -284,7 +288,7 @@ void default_machine_kexec(struct kimage *image)
 	 */
 	kexec_sequence(&kexec_stack, image->start, image,
 			page_address(image->control_code_page),
-			ppc_md.hpte_clear_all);
+			ppc_md.hpte_clear_all, kdump_flag);
 	/* NOTREACHED */
 }
 
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4dd70cf..c93e5f7 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -609,10 +609,13 @@ real_mode:	/* assume normal blr return */
 
 
 /*
- * kexec_sequence(newstack, start, image, control, clear_all())
+ * kexec_sequence(newstack, start, image, control, clear_all(), kdump_flag)
  *
  * does the grungy work with stack switching and real mode switches
  * also does simple calls to other code
+ *
+ * kdump_flag says whether the next kernel should be running at the reserved
+ * load address as needed for relocatable kdump kernel
  */
 
 _GLOBAL(kexec_sequence)
@@ -645,7 +648,7 @@ _GLOBAL(kexec_sequence)
 	mr	r29,r5			/* image (virt) */
 	mr	r28,r6			/* control, unused */
 	mr	r27,r7			/* clear_all() fn desc */
-	mr	r26,r8			/* spare */
+	mr	r26,r8			/* kdump flag */
 	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
 
 	/* disable interrupts, we are overwriting kernel data next */
@@ -707,5 +710,6 @@ _GLOBAL(kexec_sequence)
 	mr	r4,r30	# start, aka phys mem offset
 	mtlr	4
 	li	r5,0
-	blr	/* image->start(physid, image->start, 0); */
+	mr	r6,r26			/* kdump_flag */
+	blr	/* image->start(physid, image->start, 0, kdump_flag); */
 #endif /* CONFIG_KEXEC */

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
  2008-10-12 23:34 Mohan Kumar M
@ 2008-10-13  1:30 ` Paul Mackerras
  2008-10-16 10:33   ` Mohan Kumar M
  0 siblings, 1 reply; 27+ messages in thread
From: Paul Mackerras @ 2008-10-13  1:30 UTC (permalink / raw)
  To: mohan; +Cc: linuxppc-dev, kexec

Mohan Kumar M writes:

> Support for relocatable kdump kernel

> @@ -1401,9 +1414,21 @@ _STATIC(__after_prom_start)
>  	beq	9f			/* have already put us at zero */
>  	li	r6,0x100		/* Start offset, the first 0x100 */
>  					/* bytes were copied earlier.	 */
> -#ifdef CONFIG_RELOCATABLE
> +
> +#ifdef CONFIG_CRASH_DUMP

What tree is this patch against?  No tree that I can find has a line
saying "#ifdef CONFIG_RELOCATABLE" at that point.  Or is there a
prerequisite patch that I haven't seen yet?

Paul.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
  2008-10-13  1:30 ` Paul Mackerras
@ 2008-10-16 10:33   ` Mohan Kumar M
  0 siblings, 0 replies; 27+ messages in thread
From: Mohan Kumar M @ 2008-10-16 10:33 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev, kexec

Paul Mackerras wrote:
> Mohan Kumar M writes:
> 
>> Support for relocatable kdump kernel
> 
>> @@ -1401,9 +1414,21 @@ _STATIC(__after_prom_start)
>>  	beq	9f			/* have already put us at zero */
>>  	li	r6,0x100		/* Start offset, the first 0x100 */
>>  					/* bytes were copied earlier.	 */
>> -#ifdef CONFIG_RELOCATABLE
>> +
>> +#ifdef CONFIG_CRASH_DUMP
> 
> What tree is this patch against?  No tree that I can find has a line
> saying "#ifdef CONFIG_RELOCATABLE" at that point.  Or is there a
> prerequisite patch that I haven't seen yet?

Hi Paul,

My patch is based on your PATCH 5 of relocatable kernel patchset (which 
demonstrates kernel can be relocatable).

Do I need to resend the patch by generating the diff with your patch5 also?

Regards,
Mohan.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
       [not found] <18684.5062.154465.668614@drongo.ozlabs.ibm.com>
@ 2008-10-20  6:43 ` Michael Ellerman
  2008-10-20  9:34   ` Mohan Kumar M
  0 siblings, 1 reply; 27+ messages in thread
From: Michael Ellerman @ 2008-10-20  6:43 UTC (permalink / raw)
  To: Mohan Kumar; +Cc: linuxppc-dev list, kexec

[-- Attachment #1: Type: text/plain, Size: 13832 bytes --]

> > -------- Forwarded Message --------
> > From: Mohan Kumar M <mohan@in.ibm.com>
> > To: paulus@samba.org
> > Cc: linuxppc-dev@ozlabs.org, kexec@lists.infradead.org
> > Subject: [PATCH] Support for relocatable kdump kernel
> > Date: Mon, 13 Oct 2008 05:04:20 +0530
> > 
> > Support for relocatable kdump kernel
> > 
> > This patch adds relocatable kernel support for kdump. With this one can
> > use the same regular kernel to capture the kdump. A signature (0xfeed1234)
> > is passed in r8 from panic code to the next kernel through kexec_sequence
> > and purgatory code. The signature is used to differentiate between
> > relocatable kdump kernel and non-kdump kernels.

You should put a big fat warning here in the changelog. By changing the
calling sequence (adding to it), we now require that for a new kernel to
work as a kdump kernel it has to be loaded with new kexec tools.

> > The purgatory code compares the signature and sets the __kdump_flag in
> > head_64.S.  During the boot up, kernel code checks __kdump_flag and if it
> > is set, the kernel will behave as relocatable kdump kernel. This kernel
> > will boot at the address where it was loaded by kexec-tools ie at the
> > address reserved through crashkernel boot parameter
> > 
> > CONFIG_CRASH_DUMP depends on CONFIG_RELOCATABLE option to build kdump
> > kernel as relocatable. So the same kernel can be used as production and
> > kdump kernel.

Those two statements aren't really related. A CONFIG_RELOCATABLE kernel
can be used as both a kdump and a normal kernel, and we need to make
sure that a CONFIG_CRASH_DUMP kernel can be used as both - ie. that
there's no code that uses CONFIG_CRASH_DUMP to do anything we /don't/
want in a normal kernel.

> > This patch incorporates the changes suggested by Paul Mackerrass to avoid
> > GOT use and to avoid two copies of the code.
> > 
> > Signed-off-by: Mohan Kumar M <mohan@in.ibm.com>

> > diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> > index 0705040..3f4bc84 100644
> > --- a/Documentation/kdump/kdump.txt
> > +++ b/Documentation/kdump/kdump.txt
> > @@ -109,7 +109,8 @@ There are two possible methods of using Kdump.
> >  2) Or use the system kernel binary itself as dump-capture kernel and there is
> >     no need to build a separate dump-capture kernel. This is possible
> >     only with the architecutres which support a relocatable kernel. As
> > -   of today, i386, x86_64 and ia64 architectures support relocatable kernel.
> > +   of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
> > +   kernel.

This is a little bit unclear as the kernel now doesn't have a ppc64
architecture. You might want to say "64-bit powerpc (ppc64)", because
that matches the kernel arch and also kexec-tools (which still has
ppc32/64 IIRC)

> >  
> >  Building a relocatable kernel is advantageous from the point of view that
> >  one does not have to build a second kernel for capturing the dump. But
> > @@ -207,8 +208,15 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
> >  Dump-capture kernel config options (Arch Dependent, ppc64)
> >  ----------------------------------------------------------
> >  
> > -*  Make and install the kernel and its modules. DO NOT add this kernel
> > -   to the boot loader configuration files.
> > +1) Enable "Build a kdump crash kernel" support under "Kernel" options:
> > +
> > +   CONFIG_CRASH_DUMP=y
> > +
> > +2)   Enable "Build a relocatable kernel" support
> > +
> > +   CONFIG_RELOCATABLE=y
> > +
> > +   Make and install the kernel and its modules.
> >  
> >  Dump-capture kernel config options (Arch Dependent, ia64)
> >  ----------------------------------------------------------
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 17c988b..6b3e840 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -321,11 +321,11 @@ config KEXEC
> >  
> >  config CRASH_DUMP
> >  	bool "Build a kdump crash kernel"
> > -	depends on PPC_MULTIPLATFORM && PPC64
> > +	depends on PPC_MULTIPLATFORM && PPC64 && RELOCATABLE
> >  	help
> >  	  Build a kernel suitable for use as a kdump capture kernel.
> > -	  The kernel will be linked at a different address than normal, and
> > -	  so can only be used for Kdump.
> > +	  The same kernel binary can be used as production kernel and dump capture
> > +	  kernel
> >  
> >  	  Don't change this unless you know what you are doing.
> >  
> > @@ -824,11 +824,9 @@ config PAGE_OFFSET
> >  	default "0xc000000000000000"
> >  config KERNEL_START
> >  	hex
> > -	default "0xc000000002000000" if CRASH_DUMP
> >  	default "0xc000000000000000"
> >  config PHYSICAL_START
> >  	hex
> > -	default "0x02000000" if CRASH_DUMP
> >  	default "0x00000000"
> >  endif
> >  
> > diff --git a/arch/powerpc/include/asm/kdump.h b/arch/powerpc/include/asm/kdump.h
> > index f6c93c7..5308754 100644
> > --- a/arch/powerpc/include/asm/kdump.h
> > +++ b/arch/powerpc/include/asm/kdump.h
> > @@ -9,6 +9,12 @@
> >   * Reserve to the end of the FWNMI area, see head_64.S */
> >  #define KDUMP_RESERVE_LIMIT	0x10000 /* 64K */
> >  
> > +/*
> > + * Used to differentiate between relocatable kdump kernel and other
> > + * kernels
> > + */
> > +#define KDUMP_SIGNATURE	0xfeed1234
> > +
> >  #ifdef CONFIG_CRASH_DUMP
> >  
> >  #define KDUMP_TRAMPOLINE_START	0x0100
> > @@ -19,11 +25,21 @@
> >  #endif /* CONFIG_CRASH_DUMP */
> >  
> >  #ifndef __ASSEMBLY__
> > +
> > +extern unsigned long long __kdump_flag;

Why long long ?

> >  #ifdef CONFIG_CRASH_DUMP
> > +#ifdef CONFIG_RELOCATABLE
> > +
> > +static inline void reserve_kdump_trampoline(void) { ; }
> > +static inline void setup_kdump_trampoline(void) { ; }
> > +
> > +#else
> >  
> >  extern void reserve_kdump_trampoline(void);
> >  extern void setup_kdump_trampoline(void);
> >  
> > +#endif /* CONFIG_RELOCATABLE */

You've disabled the else case with your Kconfig changes, so you should
just rip all that code out.

> >  static inline void reserve_kdump_trampoline(void) { ; }
> > diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
> > index a323c9b..eaf9d6d 100644
> > --- a/arch/powerpc/kernel/crash_dump.c
> > +++ b/arch/powerpc/kernel/crash_dump.c
> > @@ -27,6 +27,7 @@
> >  #define DBG(fmt...)
> >  #endif
> >  
> > +#ifndef CONFIG_RELOCATABLE
> >  void __init reserve_kdump_trampoline(void)
> >  {
> >  	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
> > @@ -65,6 +66,7 @@ void __init setup_kdump_trampoline(void)
> >  
> >  	DBG(" <- setup_kdump_trampoline()\n");
> >  }
> > +#endif /* CONFIG_RELOCATABLE */
> >  
> >  #ifdef CONFIG_PROC_VMCORE
> >  static int __init parse_elfcorehdr(char *p)
> > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
> > index e409338..5b12b10 100644
> > --- a/arch/powerpc/kernel/head_64.S
> > +++ b/arch/powerpc/kernel/head_64.S
> > @@ -97,6 +97,14 @@ __secondary_hold_spinloop:
> >  __secondary_hold_acknowledge:
> >  	.llong	0x0
> >  
> > +	/* This flag is set only for kdump kernels so that */
> > +	/* it will be relocatable. Purgatory code user space kexec-tools */
> > +	/* sets this flag. Do not move this variable as purgatory code */
> > +	/* relies on the position of this variables */
> > +	.globl	__kdump_flag
> > +__kdump_flag:
> > +	.llong	0x0

I guess the __ matches the other flags here, it's not the prettiest
though. For client code (like in iommu.c) it'd be nice to have static
inline, perhaps is_kdump_kernel() that hides this.

> >  #ifdef CONFIG_PPC_ISERIES
> >  	/*
> >  	 * At offset 0x20, there is a pointer to iSeries LPAR data.
> > @@ -1384,8 +1392,13 @@ _STATIC(__after_prom_start)
> >  	/* process relocations for the final address of the kernel */
> >  	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
> >  	sldi	r25,r25,32
> > +#ifdef CONFIG_CRASH_DUMP
> > +	ld	r7,__kdump_flag-_stext(r26)
> > +	cmpldi	cr0,r7,1	/* relocatable kernel ? */

You don't use the signature here?

> > +	bne	1f
> >  	add	r25,r25,r26
> > -	mr	r3,r25
> > +#endif
> > +1:	mr	r3,r25
> >  	bl	.relocate
> >  #endif
> >  
> > @@ -1401,9 +1414,21 @@ _STATIC(__after_prom_start)
> >  	beq	9f			/* have already put us at zero */
> >  	li	r6,0x100		/* Start offset, the first 0x100 */
> >  					/* bytes were copied earlier.	 */
> > -#ifdef CONFIG_RELOCATABLE
> > +
> > +#ifdef CONFIG_CRASH_DUMP
> > +/*
> > + * Check if the kernel has to be running as relocatable kernel based on the
> > + * variable __kdump_flag, if it is set the kernel is treated as relocatble
> > + * kernel, otherwise it will be moved to PHYSICAL_START
> > + */
> > +	ld	r7,__kdump_flag-_stext(r26)
> > +	cmpldi	cr0,r7,1
> > +	bne	regular
> > +
> >  	li	r5,__end_interrupts - _stext	/* just copy interrupts */
> > -#else
> > +	b	5f
> > +regular:
> > +#endif
> >  	lis	r5,(copy_to_here - _stext)@ha
> >  	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */

I'm jet lagged to hell, so I'm not sure I can trust my parsing of this.
But I think this definitely breaks CONFIG_RELOCATABLE without
CRASH_DUMP, and I'm not sure it's right otherwise.

> > @@ -1420,8 +1445,7 @@ p_end:	.llong	_end - _stext
> >  4:	/* Now copy the rest of the kernel up to _end */
> >  	addis	r5,r26,(p_end - _stext)@ha
> >  	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
> > -#endif
> > -	bl	.copy_and_flush		/* copy the rest */
> > +5:	bl	.copy_and_flush		/* copy the rest */
> >  
> >  9:	b	.start_here_multiplatform
> >  
> > diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> > index 550a193..24f7797 100644
> > --- a/arch/powerpc/kernel/iommu.c
> > +++ b/arch/powerpc/kernel/iommu.c
> > @@ -494,7 +494,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
> >  	spin_lock_init(&tbl->it_lock);
> >  
> >  #ifdef CONFIG_CRASH_DUMP
> > -	if (ppc_md.tce_get) {
> > +	if (ppc_md.tce_get && __kdump_flag) {
> >  		unsigned long index;
> >  		unsigned long tceval;
> >  		unsigned long tcecount = 0;


I see more code that needs this sort of treatment in pseries/iommu.c and
cell/ras.c

> > diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
> > index aab7688..ac2a21f 100644
> > --- a/arch/powerpc/kernel/machine_kexec.c
> > +++ b/arch/powerpc/kernel/machine_kexec.c
> > @@ -88,11 +88,13 @@ void __init reserve_crashkernel(void)
> >  
> >  	crash_size = crashk_res.end - crashk_res.start + 1;
> >  
> > +#ifndef CONFIG_RELOCATABLE
> >  	if (crashk_res.start != KDUMP_KERNELBASE)
> >  		printk("Crash kernel location must be 0x%x\n",
> >  				KDUMP_KERNELBASE);

We still need code here for the RELOCATABLE case that checks a) the
kernel is being allocated inside the RMO, and b) that it's 64k aligned.

> >  
> >  	crashk_res.start = KDUMP_KERNELBASE;
> > +#endif
> >  	crash_size = PAGE_ALIGN(crash_size);
> >  	crashk_res.end = crashk_res.start + crash_size - 1;
> >  
> > diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
> > index a168514..6a45a9e 100644
> > --- a/arch/powerpc/kernel/machine_kexec_64.c
> > +++ b/arch/powerpc/kernel/machine_kexec_64.c
> > @@ -255,11 +255,13 @@ static union thread_union kexec_stack
> >  /* Our assembly helper, in kexec_stub.S */
> >  extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
> >  					void *image, void *control,
> > -					void (*clear_all)(void)) ATTRIB_NORET;
> > +					void (*clear_all)(void),
> > +				unsigned long long kdump_flag) ATTRIB_NORET;
> >  
> >  /* too late to fail here */
> >  void default_machine_kexec(struct kimage *image)
> >  {
> > +	unsigned long long kdump_flag = 0;
> >  	/* prepare control code if any */
> >  
> >  	/*
> > @@ -270,8 +272,10 @@ void default_machine_kexec(struct kimage *image)
> >          * using debugger IPI.
> >          */
> >  
> > -       if (crashing_cpu == -1)
> > -               kexec_prepare_cpus();
> > +	if (crashing_cpu == -1)
> > +		kexec_prepare_cpus();
> > +	else
> > +		kdump_flag = KDUMP_SIGNATURE;
> >  
> >  	/* switch to a staticly allocated stack.  Based on irq stack code.
> >  	 * XXX: the task struct will likely be invalid once we do the copy!
> > @@ -284,7 +288,7 @@ void default_machine_kexec(struct kimage *image)
> >  	 */
> >  	kexec_sequence(&kexec_stack, image->start, image,
> >  			page_address(image->control_code_page),
> > -			ppc_md.hpte_clear_all);
> > +			ppc_md.hpte_clear_all, kdump_flag);
> >  	/* NOTREACHED */
> >  }
> >  
> > diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> > index 4dd70cf..c93e5f7 100644
> > --- a/arch/powerpc/kernel/misc_64.S
> > +++ b/arch/powerpc/kernel/misc_64.S
> > @@ -609,10 +609,13 @@ real_mode:	/* assume normal blr return */
> >  
> > 
> >  /*
> > - * kexec_sequence(newstack, start, image, control, clear_all())
> > + * kexec_sequence(newstack, start, image, control, clear_all(), kdump_flag)
> >   *
> >   * does the grungy work with stack switching and real mode switches
> >   * also does simple calls to other code
> > + *
> > + * kdump_flag says whether the next kernel should be running at the reserved
> > + * load address as needed for relocatable kdump kernel
> >   */

Doesn't it just say "we crashed in the first kernel" - what the 2nd
kernel does is up to it.

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
  2008-10-20  6:43 ` Michael Ellerman
@ 2008-10-20  9:34   ` Mohan Kumar M
  2008-10-21  6:03     ` Michael Ellerman
  0 siblings, 1 reply; 27+ messages in thread
From: Mohan Kumar M @ 2008-10-20  9:34 UTC (permalink / raw)
  To: michael; +Cc: linuxppc-dev list, kexec

Michael Ellerman wrote:
>>> -------- Forwarded Message --------
>>> The purgatory code compares the signature and sets the __kdump_flag in
>>> head_64.S.  During the boot up, kernel code checks __kdump_flag and if it
>>> is set, the kernel will behave as relocatable kdump kernel. This kernel
>>> will boot at the address where it was loaded by kexec-tools ie at the
>>> address reserved through crashkernel boot parameter
>>>
>>> CONFIG_CRASH_DUMP depends on CONFIG_RELOCATABLE option to build kdump
>>> kernel as relocatable. So the same kernel can be used as production and
>>> kdump kernel.
> 
> Those two statements aren't really related. A CONFIG_RELOCATABLE kernel
> can be used as both a kdump and a normal kernel, and we need to make
> sure that a CONFIG_CRASH_DUMP kernel can be used as both - ie. that
> there's no code that uses CONFIG_CRASH_DUMP to do anything we /don't/
> want in a normal kernel.
> 
Hi Mike,

Thank you very much for the detailed review.

For 64 bit powerpc, we are going to support only relocatable kdump 
kernel (as per Paulus' suggestions). To enable CRASH_DUMP one needs 
CONFIG_RELOCATABLE to be enabled first.

>>>  #ifdef CONFIG_CRASH_DUMP
>>> +#ifdef CONFIG_RELOCATABLE
>>> +
>>> +static inline void reserve_kdump_trampoline(void) { ; }
>>> +static inline void setup_kdump_trampoline(void) { ; }
>>> +
>>> +#else
>>>  
>>>  extern void reserve_kdump_trampoline(void);
>>>  extern void setup_kdump_trampoline(void);
>>>  
>>> +#endif /* CONFIG_RELOCATABLE */
> 
> You've disabled the else case with your Kconfig changes, so you should
> just rip all that code out.

I made Kconfig changes only to the 64 bit powerpc path and still the 32 
bit powerpc code uses the legacy kdump code. So we need to retain some 
of legacy kdump code.

> 
>>>  static inline void reserve_kdump_trampoline(void) { ; }
>>> diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
>>> index a323c9b..eaf9d6d 100644
>>> --- a/arch/powerpc/kernel/crash_dump.c
>>> +++ b/arch/powerpc/kernel/crash_dump.c
>>> @@ -27,6 +27,7 @@
>>>  #define DBG(fmt...)
>>>  #endif
>>>  
>>> +#ifndef CONFIG_RELOCATABLE
>>>  void __init reserve_kdump_trampoline(void)
>>>  {
>>>  	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
>>> @@ -65,6 +66,7 @@ void __init setup_kdump_trampoline(void)
>>>  
>>>  	DBG(" <- setup_kdump_trampoline()\n");
>>>  }
>>> +#endif /* CONFIG_RELOCATABLE */
>>>  
>>>  #ifdef CONFIG_PROC_VMCORE
>>>  static int __init parse_elfcorehdr(char *p)
>>> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
>>> index e409338..5b12b10 100644
>>> --- a/arch/powerpc/kernel/head_64.S
>>> +++ b/arch/powerpc/kernel/head_64.S
>>> @@ -97,6 +97,14 @@ __secondary_hold_spinloop:
>>>  __secondary_hold_acknowledge:
>>>  	.llong	0x0
>>>  
>>> +	/* This flag is set only for kdump kernels so that */
>>> +	/* it will be relocatable. Purgatory code user space kexec-tools */
>>> +	/* sets this flag. Do not move this variable as purgatory code */
>>> +	/* relies on the position of this variables */
>>> +	.globl	__kdump_flag
>>> +__kdump_flag:
>>> +	.llong	0x0
> 
> I guess the __ matches the other flags here, it's not the prettiest
> though. For client code (like in iommu.c) it'd be nice to have static
> inline, perhaps is_kdump_kernel() that hides this.
> 
Do you expect a function to do the checking in iommu.c?

>>>  #ifdef CONFIG_PPC_ISERIES
>>>  	/*
>>>  	 * At offset 0x20, there is a pointer to iSeries LPAR data.
>>> @@ -1384,8 +1392,13 @@ _STATIC(__after_prom_start)
>>>  	/* process relocations for the final address of the kernel */
>>>  	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
>>>  	sldi	r25,r25,32
>>> +#ifdef CONFIG_CRASH_DUMP
>>> +	ld	r7,__kdump_flag-_stext(r26)
>>> +	cmpldi	cr0,r7,1	/* relocatable kernel ? */
> 
> You don't use the signature here?

kexec-tools check the signature and based on the signature it sets 
__kdump_flag to 1 (or 0). So kernel code just checks whether its set or not.

> 
>>> +	bne	1f
>>>  	add	r25,r25,r26
>>> -	mr	r3,r25
>>> +#endif
>>> +1:	mr	r3,r25
>>>  	bl	.relocate
>>>  #endif
>>>  
>>> @@ -1401,9 +1414,21 @@ _STATIC(__after_prom_start)
>>>  	beq	9f			/* have already put us at zero */
>>>  	li	r6,0x100		/* Start offset, the first 0x100 */
>>>  					/* bytes were copied earlier.	 */
>>> -#ifdef CONFIG_RELOCATABLE
>>> +
>>> +#ifdef CONFIG_CRASH_DUMP
>>> +/*
>>> + * Check if the kernel has to be running as relocatable kernel based on the
>>> + * variable __kdump_flag, if it is set the kernel is treated as relocatble
>>> + * kernel, otherwise it will be moved to PHYSICAL_START
>>> + */
>>> +	ld	r7,__kdump_flag-_stext(r26)
>>> +	cmpldi	cr0,r7,1
>>> +	bne	regular
>>> +
>>>  	li	r5,__end_interrupts - _stext	/* just copy interrupts */
>>> -#else
>>> +	b	5f
>>> +regular:
>>> +#endif
>>>  	lis	r5,(copy_to_here - _stext)@ha
>>>  	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
> 
> I'm jet lagged to hell, so I'm not sure I can trust my parsing of this.
> But I think this definitely breaks CONFIG_RELOCATABLE without
> CRASH_DUMP, and I'm not sure it's right otherwise.
>
Hmmm, I compiled and tried the kernel with 3 config option combinations: 
1. CONFIG_RELOCATABLE and CONFIG_CRASH_DUMP 2. CONFIG_RELOCATABLE 3. 
Without CONFIG_RELOCATABLE (without CONFIG_CRASH_DUMP)

All of the above 3 combinations worked. This patch relies on Pauls' 
patch5 in the relocatable kernel patcheset.

Regards,
Mohan.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
  2008-10-20  9:34   ` Mohan Kumar M
@ 2008-10-21  6:03     ` Michael Ellerman
  2008-10-21 18:21       ` Mohan Kumar M
  0 siblings, 1 reply; 27+ messages in thread
From: Michael Ellerman @ 2008-10-21  6:03 UTC (permalink / raw)
  To: Mohan Kumar M; +Cc: linuxppc-dev list, kexec

[-- Attachment #1: Type: text/plain, Size: 4568 bytes --]

On Mon, 2008-10-20 at 15:04 +0530, Mohan Kumar M wrote:
> Michael Ellerman wrote:
> >>>  #ifdef CONFIG_CRASH_DUMP
> >>> +#ifdef CONFIG_RELOCATABLE
> >>> +
> >>> +static inline void reserve_kdump_trampoline(void) { ; }
> >>> +static inline void setup_kdump_trampoline(void) { ; }
> >>> +
> >>> +#else
> >>>  
> >>>  extern void reserve_kdump_trampoline(void);
> >>>  extern void setup_kdump_trampoline(void);
> >>>  
> >>> +#endif /* CONFIG_RELOCATABLE */
> > 
> > You've disabled the else case with your Kconfig changes, so you should
> > just rip all that code out.
> 
> I made Kconfig changes only to the 64 bit powerpc path and still the 32 
> bit powerpc code uses the legacy kdump code. So we need to retain some 
> of legacy kdump code.

Does it? I see CONFIG_CRASH_DUMP depending on PPC64, so there is no
32-bit kdump possible. Or is someone working on it out-of-tree?

> >>> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
> >>> index e409338..5b12b10 100644
> >>> --- a/arch/powerpc/kernel/head_64.S
> >>> +++ b/arch/powerpc/kernel/head_64.S
> >>> @@ -97,6 +97,14 @@ __secondary_hold_spinloop:
> >>>  __secondary_hold_acknowledge:
> >>>  	.llong	0x0
> >>>  
> >>> +	/* This flag is set only for kdump kernels so that */
> >>> +	/* it will be relocatable. Purgatory code user space kexec-tools */
> >>> +	/* sets this flag. Do not move this variable as purgatory code */
> >>> +	/* relies on the position of this variables */
> >>> +	.globl	__kdump_flag
> >>> +__kdump_flag:
> >>> +	.llong	0x0
> > 
> > I guess the __ matches the other flags here, it's not the prettiest
> > though. For client code (like in iommu.c) it'd be nice to have static
> > inline, perhaps is_kdump_kernel() that hides this.
> > 
> Do you expect a function to do the checking in iommu.c?

You'd use the function in iommu.c, but it should be defined in some
header.

> >>>  #ifdef CONFIG_PPC_ISERIES
> >>>  	/*
> >>>  	 * At offset 0x20, there is a pointer to iSeries LPAR data.
> >>> @@ -1384,8 +1392,13 @@ _STATIC(__after_prom_start)
> >>>  	/* process relocations for the final address of the kernel */
> >>>  	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
> >>>  	sldi	r25,r25,32
> >>> +#ifdef CONFIG_CRASH_DUMP
> >>> +	ld	r7,__kdump_flag-_stext(r26)
> >>> +	cmpldi	cr0,r7,1	/* relocatable kernel ? */
> > 
> > You don't use the signature here?
> 
> kexec-tools check the signature and based on the signature it sets 
> __kdump_flag to 1 (or 0). So kernel code just checks whether its set or not.

OK. Does old purgatory ensure that the register is 0? Otherwise I think
it's possible that a new kernel could get confused by cruft left in that
register by an old purgatory - causing the 2nd kernel to think it's a
kdump kernel when it shouldn't be.

> >>> @@ -1401,9 +1414,21 @@ _STATIC(__after_prom_start)
> >>>  	beq	9f			/* have already put us at zero */
> >>>  	li	r6,0x100		/* Start offset, the first 0x100 */
> >>>  					/* bytes were copied earlier.	 */
> >>> -#ifdef CONFIG_RELOCATABLE
> >>> +
> >>> +#ifdef CONFIG_CRASH_DUMP
> >>> +/*
> >>> + * Check if the kernel has to be running as relocatable kernel based on the
> >>> + * variable __kdump_flag, if it is set the kernel is treated as relocatble
> >>> + * kernel, otherwise it will be moved to PHYSICAL_START
> >>> + */
> >>> +	ld	r7,__kdump_flag-_stext(r26)
> >>> +	cmpldi	cr0,r7,1
> >>> +	bne	regular
> >>> +
> >>>  	li	r5,__end_interrupts - _stext	/* just copy interrupts */
> >>> -#else
> >>> +	b	5f
> >>> +regular:
> >>> +#endif
> >>>  	lis	r5,(copy_to_here - _stext)@ha
> >>>  	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
> > 
> > I'm jet lagged to hell, so I'm not sure I can trust my parsing of this.
> > But I think this definitely breaks CONFIG_RELOCATABLE without
> > CRASH_DUMP, and I'm not sure it's right otherwise.
> >
> Hmmm, I compiled and tried the kernel with 3 config option combinations: 
> 1. CONFIG_RELOCATABLE and CONFIG_CRASH_DUMP 2. CONFIG_RELOCATABLE 3. 
> Without CONFIG_RELOCATABLE (without CONFIG_CRASH_DUMP)
> 
> All of the above 3 combinations worked. This patch relies on Pauls' 
> patch5 in the relocatable kernel patcheset.

OK if you've tested.

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
  2008-10-21  6:03     ` Michael Ellerman
@ 2008-10-21 18:21       ` Mohan Kumar M
  0 siblings, 0 replies; 27+ messages in thread
From: Mohan Kumar M @ 2008-10-21 18:21 UTC (permalink / raw)
  To: michael; +Cc: linuxppc-dev list, kexec

Michael Ellerman wrote:

> Does it? I see CONFIG_CRASH_DUMP depending on PPC64, so there is no
> 32-bit kdump possible. Or is someone working on it out-of-tree?
> 

IIUC Anton Vorontsov is working on the 32-bit kdump kernel support.

>> Do you expect a function to do the checking in iommu.c?
> 
> You'd use the function in iommu.c, but it should be defined in some
> header.
> 
Yeah, I will do that.

> OK. Does old purgatory ensure that the register is 0? Otherwise I think
> it's possible that a new kernel could get confused by cruft left in that
> register by an old purgatory - causing the 2nd kernel to think it's a
> kdump kernel when it shouldn't be.

__kdump_flag is by default is 0 and old purgatory code even won't know 
that it need to modify __kdump_flag. So unless __kdump_flag is 1, the 
kernel will behave as a normal one.

Regards,
Mohan.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] Support for relocatable kdump kernel
@ 2008-10-22  3:38 Michael Ellerman
  0 siblings, 0 replies; 27+ messages in thread
From: Michael Ellerman @ 2008-10-22  3:38 UTC (permalink / raw)
  To: linuxppc-dev

From: Mohan Kumar M <mohan@in.ibm.com>

This adds relocatable kernel support for kdump. With this one can
use the same regular kernel to capture the kdump. A signature (0xfeed1234)
is passed in r6 from panic code to the next kernel through kexec_sequence
and purgatory code. The signature is used to differentiate between
kdump kernel and non-kdump kernels.

The purgatory code compares the signature and sets the __kdump_flag in
head_64.S.  During the boot up, kernel code checks __kdump_flag and if it
is set, the kernel will behave as relocatable kdump kernel. This kernel
will boot at the address where it was loaded by kexec-tools ie. at the
address reserved through crashkernel boot parameter.

CONFIG_CRASH_DUMP depends on CONFIG_RELOCATABLE option to build kdump
kernel as relocatable. So the same kernel can be used as production and
kdump kernel.

This patch incorporates the changes suggested by Paul Mackerras to avoid
GOT use and to avoid two copies of the code.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Mohan Kumar M <mohan@in.ibm.com>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
 Documentation/kdump/kdump.txt          |   14 +++++-
 arch/powerpc/Kconfig                   |   10 +---
 arch/powerpc/include/asm/kdump.h       |   17 ++++++--
 arch/powerpc/kernel/crash_dump.c       |    2 +
 arch/powerpc/kernel/head_64.S          |   39 +++++++++++++++---
 arch/powerpc/kernel/iommu.c            |   69 +++++++++++++++++---------------
 arch/powerpc/kernel/machine_kexec.c    |    2 +
 arch/powerpc/kernel/machine_kexec_64.c |   13 ++++--
 arch/powerpc/kernel/misc_64.S          |    9 +++-
 arch/powerpc/platforms/cell/ras.c      |    6 +-
 arch/powerpc/platforms/pseries/iommu.c |    6 +-
 11 files changed, 121 insertions(+), 66 deletions(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 0705040..3f4bc84 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -109,7 +109,8 @@ There are two possible methods of using Kdump.
 2) Or use the system kernel binary itself as dump-capture kernel and there is
    no need to build a separate dump-capture kernel. This is possible
    only with the architecutres which support a relocatable kernel. As
-   of today, i386, x86_64 and ia64 architectures support relocatable kernel.
+   of today, i386, x86_64, ppc64 and ia64 architectures support relocatable
+   kernel.
 
 Building a relocatable kernel is advantageous from the point of view that
 one does not have to build a second kernel for capturing the dump. But
@@ -207,8 +208,15 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
 Dump-capture kernel config options (Arch Dependent, ppc64)
 ----------------------------------------------------------
 
-*  Make and install the kernel and its modules. DO NOT add this kernel
-   to the boot loader configuration files.
+1) Enable "Build a kdump crash kernel" support under "Kernel" options:
+
+   CONFIG_CRASH_DUMP=y
+
+2)   Enable "Build a relocatable kernel" support
+
+   CONFIG_RELOCATABLE=y
+
+   Make and install the kernel and its modules.
 
 Dump-capture kernel config options (Arch Dependent, ia64)
 ----------------------------------------------------------
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 369d93e..5b15278 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -323,13 +323,11 @@ config KEXEC
 
 config CRASH_DUMP
 	bool "Build a kdump crash kernel"
-	depends on PPC_MULTIPLATFORM && PPC64
+	depends on PPC_MULTIPLATFORM && PPC64 && RELOCATABLE
 	help
 	  Build a kernel suitable for use as a kdump capture kernel.
-	  The kernel will be linked at a different address than normal, and
-	  so can only be used for Kdump.
-
-	  Don't change this unless you know what you are doing.
+	  The same kernel binary can be used as production kernel and dump
+	  capture kernel.
 
 config PHYP_DUMP
 	bool "Hypervisor-assisted dump (EXPERIMENTAL)"
@@ -829,11 +827,9 @@ config PAGE_OFFSET
 	default "0xc000000000000000"
 config KERNEL_START
 	hex
-	default "0xc000000002000000" if CRASH_DUMP
 	default "0xc000000000000000"
 config PHYSICAL_START
 	hex
-	default "0x02000000" if CRASH_DUMP
 	default "0x00000000"
 endif
 
diff --git a/arch/powerpc/include/asm/kdump.h b/arch/powerpc/include/asm/kdump.h
index f6c93c7..a503da9 100644
--- a/arch/powerpc/include/asm/kdump.h
+++ b/arch/powerpc/include/asm/kdump.h
@@ -9,6 +9,12 @@
  * Reserve to the end of the FWNMI area, see head_64.S */
 #define KDUMP_RESERVE_LIMIT	0x10000 /* 64K */
 
+/*
+ * Used to differentiate between relocatable kdump kernel and other
+ * kernels
+ */
+#define KDUMP_SIGNATURE	0xfeed1234
+
 #ifdef CONFIG_CRASH_DUMP
 
 #define KDUMP_TRAMPOLINE_START	0x0100
@@ -19,17 +25,18 @@
 #endif /* CONFIG_CRASH_DUMP */
 
 #ifndef __ASSEMBLY__
-#ifdef CONFIG_CRASH_DUMP
 
+extern unsigned long __kdump_flag;
+
+#if defined(CONFIG_CRASH_DUMP) && !defined(CONFIG_RELOCATABLE)
 extern void reserve_kdump_trampoline(void);
 extern void setup_kdump_trampoline(void);
-
-#else /* !CONFIG_CRASH_DUMP */
-
+#else
+/* !CRASH_DUMP || RELOCATABLE */
 static inline void reserve_kdump_trampoline(void) { ; }
 static inline void setup_kdump_trampoline(void) { ; }
+#endif
 
-#endif /* CONFIG_CRASH_DUMP */
 #endif /* __ASSEMBLY__ */
 
 #endif /* __PPC64_KDUMP_H */
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 97e0563..19671ac 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -30,6 +30,7 @@
 /* Stores the physical address of elf header of crash image. */
 unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
 
+#ifndef CONFIG_RELOCATABLE
 void __init reserve_kdump_trampoline(void)
 {
 	lmb_reserve(0, KDUMP_RESERVE_LIMIT);
@@ -68,6 +69,7 @@ void __init setup_kdump_trampoline(void)
 
 	DBG(" <- setup_kdump_trampoline()\n");
 }
+#endif /* CONFIG_RELOCATABLE */
 
 /*
  * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 84856be..69489bd 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -97,6 +97,12 @@ __secondary_hold_spinloop:
 __secondary_hold_acknowledge:
 	.llong	0x0
 
+	/* This flag is set by purgatory if we should be a kdump kernel. */
+	/* Do not move this variable as purgatory knows about it. */
+	.globl	__kdump_flag
+__kdump_flag:
+	.llong	0x0
+
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -1384,7 +1390,13 @@ _STATIC(__after_prom_start)
 	/* process relocations for the final address of the kernel */
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
 	sldi	r25,r25,32
-	mr	r3,r25
+#ifdef CONFIG_CRASH_DUMP
+	ld	r7,__kdump_flag-_stext(r26)
+	cmpldi	cr0,r7,1	/* kdump kernel ? - stay where we are */
+	bne	1f
+	add	r25,r25,r26
+#endif
+1:	mr	r3,r25
 	bl	.relocate
 #endif
 
@@ -1398,11 +1410,26 @@ _STATIC(__after_prom_start)
 	li	r3,0			/* target addr */
 	mr.	r4,r26			/* In some cases the loader may  */
 	beq	9f			/* have already put us at zero */
-	lis	r5,(copy_to_here - _stext)@ha
-	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
 	li	r6,0x100		/* Start offset, the first 0x100 */
 					/* bytes were copied earlier.	 */
 
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * Check if the kernel has to be running as relocatable kernel based on the
+ * variable __kdump_flag, if it is set the kernel is treated as relocatable
+ * kernel, otherwise it will be moved to PHYSICAL_START
+ */
+	ld	r7,__kdump_flag-_stext(r26)
+	cmpldi	cr0,r7,1
+	bne	3f
+
+	li	r5,__end_interrupts - _stext	/* just copy interrupts */
+	b	5f
+3:
+#endif
+	lis	r5,(copy_to_here - _stext)@ha
+	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
+
 	bl	.copy_and_flush		/* copy the first n bytes	 */
 					/* this includes the code being	 */
 					/* executed here.		 */
@@ -1411,15 +1438,15 @@ _STATIC(__after_prom_start)
 	mtctr	r8
 	bctr
 
+p_end:	.llong	_end - _stext
+
 4:	/* Now copy the rest of the kernel up to _end */
 	addis	r5,r26,(p_end - _stext)@ha
 	ld	r5,(p_end - _stext)@l(r5)	/* get _end */
-	bl	.copy_and_flush		/* copy the rest */
+5:	bl	.copy_and_flush		/* copy the rest */
 
 9:	b	.start_here_multiplatform
 
-p_end:	.llong	_end - _stext
-
 /*
  * Copy routine used to copy the kernel to start at physical address 0
  * and flush and invalidate the caches as needed.
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ea1ba89..3857d7e 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -458,6 +458,42 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 	spin_unlock_irqrestore(&(tbl->it_lock), flags);
 }
 
+static void iommu_table_clear(struct iommu_table *tbl)
+{
+	if (!__kdump_flag) {
+		/* Clear the table in case firmware left allocations in it */
+		ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+		return;
+	}
+
+#ifdef CONFIG_CRASH_DUMP
+	if (ppc_md.tce_get) {
+		unsigned long index, tceval, tcecount = 0;
+
+		/* Reserve the existing mappings left by the first kernel. */
+		for (index = 0; index < tbl->it_size; index++) {
+			tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+			/*
+			 * Freed TCE entry contains 0x7fffffffffffffff on JS20
+			 */
+			if (tceval && (tceval != 0x7fffffffffffffffUL)) {
+				__set_bit(index, tbl->it_map);
+				tcecount++;
+			}
+		}
+
+		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
+			printk(KERN_WARNING "TCE table is full; freeing ");
+			printk(KERN_WARNING "%d entries for the kdump boot\n",
+				KDUMP_MIN_TCE_ENTRIES);
+			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
+				index < tbl->it_size; index++)
+				__clear_bit(index, tbl->it_map);
+		}
+	}
+#endif
+}
+
 /*
  * Build a iommu_table structure.  This contains a bit map which
  * is used to manage allocation of the tce space.
@@ -484,38 +520,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 	tbl->it_largehint = tbl->it_halfpoint;
 	spin_lock_init(&tbl->it_lock);
 
-#ifdef CONFIG_CRASH_DUMP
-	if (ppc_md.tce_get) {
-		unsigned long index;
-		unsigned long tceval;
-		unsigned long tcecount = 0;
-
-		/*
-		 * Reserve the existing mappings left by the first kernel.
-		 */
-		for (index = 0; index < tbl->it_size; index++) {
-			tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
-			/*
-			 * Freed TCE entry contains 0x7fffffffffffffff on JS20
-			 */
-			if (tceval && (tceval != 0x7fffffffffffffffUL)) {
-				__set_bit(index, tbl->it_map);
-				tcecount++;
-			}
-		}
-		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
-			printk(KERN_WARNING "TCE table is full; ");
-			printk(KERN_WARNING "freeing %d entries for the kdump boot\n",
-				KDUMP_MIN_TCE_ENTRIES);
-			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
-				index < tbl->it_size; index++)
-				__clear_bit(index, tbl->it_map);
-		}
-	}
-#else
-	/* Clear the hardware table in case firmware left allocations in it */
-	ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
-#endif
+	iommu_table_clear(tbl);
 
 	if (!welcomed) {
 		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index aab7688..ac2a21f 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -88,11 +88,13 @@ void __init reserve_crashkernel(void)
 
 	crash_size = crashk_res.end - crashk_res.start + 1;
 
+#ifndef CONFIG_RELOCATABLE
 	if (crashk_res.start != KDUMP_KERNELBASE)
 		printk("Crash kernel location must be 0x%x\n",
 				KDUMP_KERNELBASE);
 
 	crashk_res.start = KDUMP_KERNELBASE;
+#endif
 	crash_size = PAGE_ALIGN(crash_size);
 	crashk_res.end = crashk_res.start + crash_size - 1;
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 4bd8b4f..e6efec7 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -255,11 +255,14 @@ static union thread_union kexec_stack
 /* Our assembly helper, in kexec_stub.S */
 extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
 					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
+					void (*clear_all)(void),
+					unsigned long kdump_flag) ATTRIB_NORET;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
 {
+	unsigned long kdump_flag = 0;
+
 	/* prepare control code if any */
 
 	/*
@@ -270,8 +273,10 @@ void default_machine_kexec(struct kimage *image)
         * using debugger IPI.
         */
 
-       if (crashing_cpu == -1)
-               kexec_prepare_cpus();
+	if (crashing_cpu == -1)
+		kexec_prepare_cpus();
+	else
+		kdump_flag = KDUMP_SIGNATURE;
 
 	/* switch to a staticly allocated stack.  Based on irq stack code.
 	 * XXX: the task struct will likely be invalid once we do the copy!
@@ -284,7 +289,7 @@ void default_machine_kexec(struct kimage *image)
 	 */
 	kexec_sequence(&kexec_stack, image->start, image,
 			page_address(image->control_code_page),
-			ppc_md.hpte_clear_all);
+			ppc_md.hpte_clear_all, kdump_flag);
 	/* NOTREACHED */
 }
 
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 3053fe5..a243fd0 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -611,10 +611,12 @@ real_mode:	/* assume normal blr return */
 
 
 /*
- * kexec_sequence(newstack, start, image, control, clear_all())
+ * kexec_sequence(newstack, start, image, control, clear_all(), kdump_flag)
  *
  * does the grungy work with stack switching and real mode switches
  * also does simple calls to other code
+ *
+ * kdump_flag says whether the next kernel should be a kdump kernel.
  */
 
 _GLOBAL(kexec_sequence)
@@ -647,7 +649,7 @@ _GLOBAL(kexec_sequence)
 	mr	r29,r5			/* image (virt) */
 	mr	r28,r6			/* control, unused */
 	mr	r27,r7			/* clear_all() fn desc */
-	mr	r26,r8			/* spare */
+	mr	r26,r8			/* kdump flag */
 	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
 
 	/* disable interrupts, we are overwriting kernel data next */
@@ -709,5 +711,6 @@ _GLOBAL(kexec_sequence)
 	mr	r4,r30	# start, aka phys mem offset
 	mtlr	4
 	li	r5,0
-	blr	/* image->start(physid, image->start, 0); */
+	mr	r6,r26			/* kdump_flag */
+	blr	/* image->start(physid, image->start, 0, kdump_flag); */
 #endif /* CONFIG_KEXEC */
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 2a14b05..665af1c 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -21,6 +21,7 @@
 #include <asm/machdep.h>
 #include <asm/rtas.h>
 #include <asm/cell-regs.h>
+#include <asm/kdump.h>
 
 #include "ras.h"
 
@@ -111,9 +112,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)
 	int ret = -ENOMEM;
 	unsigned long addr;
 
-#ifdef CONFIG_CRASH_DUMP
-	rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
-#endif
+	if (__kdump_flag)
+		rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
 
 	area = kmalloc(sizeof(*area), GFP_KERNEL);
 	if (!area)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a8c4466..d56491d 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -44,6 +44,7 @@
 #include <asm/tce.h>
 #include <asm/ppc-pci.h>
 #include <asm/udbg.h>
+#include <asm/kdump.h>
 
 #include "plpar_wrappers.h"
 
@@ -291,9 +292,8 @@ static void iommu_table_setparms(struct pci_controller *phb,
 
 	tbl->it_base = (unsigned long)__va(*basep);
 
-#ifndef CONFIG_CRASH_DUMP
-	memset((void *)tbl->it_base, 0, *sizep);
-#endif
+	if (!__kdump_flag)
+		memset((void *)tbl->it_base, 0, *sizep);
 
 	tbl->it_busno = phb->bus->number;
 
-- 
1.5.5

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] Support for relocatable kdump kernel
@ 2008-10-22  4:56 Milton Miller
  2008-10-22 20:39 ` [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable Milton Miller
                   ` (3 more replies)
  0 siblings, 4 replies; 27+ messages in thread
From: Milton Miller @ 2008-10-22  4:56 UTC (permalink / raw)
  To: Michael Ellerman, Mohan Kumar M; +Cc: ppcdev, Paul Mackerras

On Wed Oct 22 at 14:38:10 EST in 2008, Michael Ellerman wrote:
> This adds relocatable kernel support for kdump. With this one can
> use the same regular kernel to capture the kdump. A signature 
> (0xfeed1234)
> is passed in r6 from panic code to the next kernel through 
> kexec_sequence
> and purgatory code. The signature is used to differentiate between
> kdump kernel and non-kdump kernels.

I object to this signature being passed from the kernel.  The
purgatory code should be modified to set this based on a memory
variable in its space, and that varable should be set by the
userspace code code that prepares purgatory.  (The standard
kexec-tools has library code to do this and we already use it
to tell purgatory the location of the device tree and kernel).

> The purgatory code compares the signature and sets the __kdump_flag in
> head_64.S.  During the boot up, kernel code checks __kdump_flag and if 
> it
> is set, the kernel will behave as relocatable kdump kernel. This kernel
> will boot at the address where it was loaded by kexec-tools ie. at the
> address reserved through crashkernel boot parameter.

I was hoping for a more generic test, but I haven't had the time to
write code for this so I'll accept this part for now.


> CONFIG_CRASH_DUMP depends on CONFIG_RELOCATABLE option to build kdump
> kernel as relocatable. So the same kernel can be used as production and
> kdump kernel.
>
>

>
> diff --git a/arch/powerpc/include/asm/kdump.h 
> b/arch/powerpc/include/asm/kdump.h
> index f6c93c7..a503da9 100644
> --- a/arch/powerpc/include/asm/kdump.h
> +++ b/arch/powerpc/include/asm/kdump.h
> @@ -9,6 +9,12 @@
>   * Reserve to the end of the FWNMI area, see head_64.S */
>  #define KDUMP_RESERVE_LIMIT    0x10000 /* 64K */
>
> +/*
> + * Used to differentiate between relocatable kdump kernel and other
> + * kernels
> + */
> +#define KDUMP_SIGNATURE        0xfeed1234
> +

This can go

>  #ifdef CONFIG_CRASH_DUMP
>
>  #define KDUMP_TRAMPOLINE_START 0x0100
> @@ -19,17 +25,18 @@
>  #endif /* CONFIG_CRASH_DUMP */
>
>  #ifndef __ASSEMBLY__
> -#ifdef CONFIG_CRASH_DUMP
>
> +extern unsigned long __kdump_flag;
> +

This will not be needed.  It will only be referenced by the
assembly code in head.S.

> +#if defined(CONFIG_CRASH_DUMP) && !defined(CONFIG_RELOCATABLE)
>  extern void reserve_kdump_trampoline(void);
>  extern void setup_kdump_trampoline(void);
> -
> -#else /* !CONFIG_CRASH_DUMP */
> -
> +#else
> +/* !CRASH_DUMP || RELOCATABLE */
>  static inline void reserve_kdump_trampoline(void) { ; }
>  static inline void setup_kdump_trampoline(void) { ; }
> +#endif
>
> -#endif /* CONFIG_CRASH_DUMP */
>  #endif /* __ASSEMBLY__ */
>
>  #endif /* __PPC64_KDUMP_H */
> diff --git a/arch/powerpc/kernel/crash_dump.c 
> b/arch/powerpc/kernel/crash_dump.c
> index 97e0563..19671ac 100644
> --- a/arch/powerpc/kernel/crash_dump.c
> +++ b/arch/powerpc/kernel/crash_dump.c
> @@ -30,6 +30,7 @@
>  /* Stores the physical address of elf header of crash image. */
>  unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
>
> +#ifndef CONFIG_RELOCATABLE

is this done somewhere else for the relocatable case?
a comment pointing to where would be helpful.

>  void __init reserve_kdump_trampoline(void)
>  {
>         lmb_reserve(0, KDUMP_RESERVE_LIMIT);
> @@ -68,6 +69,7 @@ void __init setup_kdump_trampoline(void)
>
>         DBG(" <- setup_kdump_trampoline()\n");
>  }
> +#endif /* CONFIG_RELOCATABLE */
>
>  /*
>   * Note: elfcorehdr_addr is not just limited to vmcore. It is also 
> used by
> diff --git a/arch/powerpc/kernel/head_64.S 
> b/arch/powerpc/kernel/head_64.S
> index 84856be..69489bd 100644
> --- a/arch/powerpc/kernel/head_64.S
> +++ b/arch/powerpc/kernel/head_64.S
> @@ -97,6 +97,12 @@ __secondary_hold_spinloop:
>  __secondary_hold_acknowledge:
>         .llong  0x0
>
> +       /* This flag is set by purgatory if we should be a kdump 
> kernel. */
> +       /* Do not move this variable as purgatory knows about it. */
> +       .globl  __kdump_flag
> +__kdump_flag:
> +       .llong  0x0
> +
>  #ifdef CONFIG_PPC_ISERIES
>         /*
>          * At offset 0x20, there is a pointer to iSeries LPAR data.
> @@ -1384,7 +1390,13 @@ _STATIC(__after_prom_start)
>         /* process relocations for the final address of the kernel */
>         lis     r25,PAGE_OFFSET at highest      /* compute virtual 
> base of kernel */
>         sldi    r25,r25,32
> -       mr      r3,r25
> +#ifdef CONFIG_CRASH_DUMP
> +       ld      r7,__kdump_flag-_stext(r26)
> +       cmpldi  cr0,r7,1        /* kdump kernel ? - stay where we are 
> */
> +       bne     1f
> +       add     r25,r25,r26
> +#endif
> +1:     mr      r3,r25
>         bl      .relocate
>  #endif
>
> @@ -1398,11 +1410,26 @@ _STATIC(__after_prom_start)
>         li      r3,0                    /* target addr */
>         mr.     r4,r26                  /* In some cases the loader 
> may  */
>         beq     9f                      /* have already put us at zero 
> */
> -       lis     r5,(copy_to_here - _stext)@ha
> -       addi    r5,r5,(copy_to_here - _stext)@l /* # bytes of memory 
> to copy */
>         li      r6,0x100                /* Start offset, the first 
> 0x100 */
>                                         /* bytes were copied earlier.  
>   */
>
> +#ifdef CONFIG_CRASH_DUMP
> +/*
> + * Check if the kernel has to be running as relocatable kernel based 
> on the
> + * variable __kdump_flag, if it is set the kernel is treated as 
> relocatable
> + * kernel, otherwise it will be moved to PHYSICAL_START
> + */
> +       ld      r7,__kdump_flag-_stext(r26)
> +       cmpldi  cr0,r7,1
> +       bne     3f
> +
> +       li      r5,__end_interrupts - _stext    /* just copy 
> interrupts */
> +       b       5f
> +3:
> +#endif
> +       lis     r5,(copy_to_here - _stext)@ha
> +       addi    r5,r5,(copy_to_here - _stext)@l /* # bytes of memory 
> to copy */
> +
>         bl      .copy_and_flush         /* copy the first n bytes      
>   */
>                                         /* this includes the code 
> being  */
>                                         /* executed here.              
>   */
> @@ -1411,15 +1438,15 @@ _STATIC(__after_prom_start)
>         mtctr   r8
>         bctr
>
> +p_end: .llong  _end - _stext
> +
>  4:     /* Now copy the rest of the kernel up to _end */
>         addis   r5,r26,(p_end - _stext)@ha
>         ld      r5,(p_end - _stext)@l(r5)       /* get _end */
> -       bl      .copy_and_flush         /* copy the rest */
> +5:     bl      .copy_and_flush         /* copy the rest */
>
>  9:     b       .start_here_multiplatform
>
> -p_end: .llong  _end - _stext
> -
>  /*
>   * Copy routine used to copy the kernel to start at physical address 0
>   * and flush and invalidate the caches as needed.
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index ea1ba89..3857d7e 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -458,6 +458,42 @@ void iommu_unmap_sg(struct iommu_table *tbl, 
> struct scatterlist *sglist,
>         spin_unlock_irqrestore(&(tbl->it_lock), flags);
>  }
>
> +static void iommu_table_clear(struct iommu_table *tbl)
> +{
> +       if (!__kdump_flag) {

This test should be using the new is_kdump_kernel in
include/linux/crash_dump.h.   (That tests the presence
of the elfcore= command line, but it (1) is common
with other architectures, and (2) allows the kernel
to be running offset for other uses.

> +               /* Clear the table in case firmware left allocations 
> in it */
> +               ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
> +               return;
> +       }
>
> +
> +#ifdef CONFIG_CRASH_DUMP
> +       if (ppc_md.tce_get) {
> +               unsigned long index, tceval, tcecount = 0;
> +
>

> --- a/arch/powerpc/kernel/machine_kexec.c
> +++ b/arch/powerpc/kernel/machine_kexec.c
> @@ -88,11 +88,13 @@ void __init reserve_crashkernel(void)
>
>         crash_size = crashk_res.end - crashk_res.start + 1;
>
> +#ifndef CONFIG_RELOCATABLE
>         if (crashk_res.start != KDUMP_KERNELBASE)
>                 printk("Crash kernel location must be 0x%x\n",
>                                 KDUMP_KERNELBASE);
>
>         crashk_res.start = KDUMP_KERNELBASE;
> +#endif

actually, we should never test on ppc64, as we don't care
if the base kernel is compiled CONFIG_RELOCATABLE but only
that the dump kernel must be.

>         crash_size = PAGE_ALIGN(crash_size);
>         crashk_res.end = crashk_res.start + crash_size - 1;
>
> diff --git a/arch/powerpc/kernel/machine_kexec_64.c 
> b/arch/powerpc/kernel/machine_kexec_64.c
> index 4bd8b4f..e6efec7 100644
> --- a/arch/powerpc/kernel/machine_kexec_64.c
> +++ b/arch/powerpc/kernel/machine_kexec_64.c
> @@ -255,11 +255,14 @@ static union thread_union kexec_stack
>  /* Our assembly helper, in kexec_stub.S */
>  extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long 
> start,
>                                         void *image, void *control,
> -                                       void (*clear_all)(void)) 
> ATTRIB_NORET;
> +                                       void (*clear_all)(void),
> +                                       unsigned long kdump_flag) 
> ATTRIB_NORET;
>
>  /* too late to fail here */
>  void default_machine_kexec(struct kimage *image)
>  {
> +       unsigned long kdump_flag = 0;
> +
>         /* prepare control code if any */
>
>         /*
> @@ -270,8 +273,10 @@ void default_machine_kexec(struct kimage *image)
>          * using debugger IPI.
>          */
>
> -       if (crashing_cpu == -1)
> -               kexec_prepare_cpus();
> +       if (crashing_cpu == -1)
> +               kexec_prepare_cpus();
> +       else
> +               kdump_flag = KDUMP_SIGNATURE;
>
>         /* switch to a staticly allocated stack.  Based on irq stack 
> code.
>          * XXX: the task struct will likely be invalid once we do the 
> copy!
> @@ -284,7 +289,7 @@ void default_machine_kexec(struct kimage *image)
>          */
>         kexec_sequence(&kexec_stack, image->start, image,
>                         page_address(image->control_code_page),
> -                       ppc_md.hpte_clear_all);
> +                       ppc_md.hpte_clear_all, kdump_flag);
>         /* NOTREACHED */
>  }
>
>
> diff --git a/arch/powerpc/kernel/misc_64.S 
> b/arch/powerpc/kernel/misc_64.S
> index 3053fe5..a243fd0 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -611,10 +611,12 @@ real_mode:        /* assume normal blr return */
>
>
>  /*
> - * kexec_sequence(newstack, start, image, control, clear_all())
> + * kexec_sequence(newstack, start, image, control, clear_all(), 
> kdump_flag)
>   *
>   * does the grungy work with stack switching and real mode switches
>   * also does simple calls to other code
> + *
> + * kdump_flag says whether the next kernel should be a kdump kernel.
>   */
>
>  _GLOBAL(kexec_sequence)
> @@ -647,7 +649,7 @@ _GLOBAL(kexec_sequence)
>         mr      r29,r5                  /* image (virt) */
>         mr      r28,r6                  /* control, unused */
>         mr      r27,r7                  /* clear_all() fn desc */
> -       mr      r26,r8                  /* spare */
> +       mr      r26,r8                  /* kdump flag */
>         lhz     r25,PACAHWCPUID(r13)    /* get our phys cpu from paca 
> */
>
>         /* disable interrupts, we are overwriting kernel data next */
> @@ -709,5 +711,6 @@ _GLOBAL(kexec_sequence)
>         mr      r4,r30  # start, aka phys mem offset
>         mtlr    4
>         li      r5,0
> -       blr     /* image->start(physid, image->start, 0); */
> +       mr      r6,r26                  /* kdump_flag */
> +       blr     /* image->start(physid, image->start, 0, kdump_flag); 
> */
>  #endif /* CONFIG_KEXEC */

please remove these changes and signature.
as explained above, this should be handled by the user space loading
the kexec kernel.   We never use the same kexec target for kdump and
a normal reboot.


> diff --git a/arch/powerpc/platforms/cell/ras.c 
> b/arch/powerpc/platforms/cell/ras.c
> index 2a14b05..665af1c 100644
> --- a/arch/powerpc/platforms/cell/ras.c
> +++ b/arch/powerpc/platforms/cell/ras.c
> @@ -21,6 +21,7 @@
>  #include <asm/machdep.h>
>  #include <asm/rtas.h>
>  #include <asm/cell-regs.h>
> +#include <asm/kdump.h>
>
>  #include "ras.h"
>
> @@ -111,9 +112,8 @@ static int __init cbe_ptcal_enable_on_node(int 
> nid, int order)
>         int ret = -ENOMEM;
>         unsigned long addr;
>
> -#ifdef CONFIG_CRASH_DUMP
> -       rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
> -#endif
> +       if (__kdump_flag)
> +               rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
>

This should use is_kdump_kernel() from include/linux/crash_dump.h

>         area = kmalloc(sizeof(*area), GFP_KERNEL);
>         if (!area)
> diff --git a/arch/powerpc/platforms/pseries/iommu.c 
> b/arch/powerpc/platforms/pseries/iommu.c
> index a8c4466..d56491d 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -44,6 +44,7 @@
>  #include <asm/tce.h>
>  #include <asm/ppc-pci.h>
>  #include <asm/udbg.h>
> +#include <asm/kdump.h>
>
>  #include "plpar_wrappers.h"
>
> @@ -291,9 +292,8 @@ static void iommu_table_setparms(struct 
> pci_controller *phb,
>
>         tbl->it_base = (unsigned long)__va(*basep);
>
> -#ifndef CONFIG_CRASH_DUMP
> -       memset((void *)tbl->it_base, 0, *sizep);
> -#endif
> +       if (!__kdump_flag)
> +               memset((void *)tbl->it_base, 0, *sizep);
>

as should this.

>
>         tbl->it_busno = phb->bus->number;
>


thanks,
milton

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-22  4:56 [PATCH] Support for relocatable kdump kernel Milton Miller
@ 2008-10-22 20:39 ` Milton Miller
  2008-10-23  3:23   ` Michael Neuling
                     ` (2 more replies)
  2008-10-22 20:39 ` [PATCH 2/2 kexec-tools] ppc64: segemments are sorted Milton Miller
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 27+ messages in thread
From: Milton Miller @ 2008-10-22 20:39 UTC (permalink / raw)
  To: Ben Herrenschmidt; +Cc: linuxppc-dev, Simon Horman, kexec, Paul Mackerras

The __kdump_flag ABI is overly constraining for future development.  

As of 2.6.27, the kernel entry point has 4 constraints:  Offset 0 is
the starting point for the master (boot) cpu (entered with r3 pointing
to the device tree structure), offset 0x60 is code for the slave cpus
(entered with r3 set to their device tree physical id), offset 0x20 is
used by the iseries hypervisor, and secondary cpus must be well behaved
when the first 256 bytes are copied to address 0.

Placing the __kdump_flag at 0x18 is bad because:

- It was taking the last 8 bytes before the iseries hypervisor data.  
- It was 8 bytes for a boolean flag
- It had no way of identifying that the flag was present
- It does leave any room for the master to add any additional code
  before branching, which hurts debug.
- It will be unnecessarily hard for 32 bit code to be common (8 bytes)

Now that we have eliminated the use of __kdump_flag in favor of
the standard is_kdump_kernel(), this flag only controls run without
relocating the kernel to PHYSICAL_START (0), so rename it __run_at_load.

Move the flag to 0x5c, 1 word before the secondary cpu entry point at
0x60.  Use the copy at address 0 not the one in the base kernel image to
make it easier on kexec-tools.  Initialize it with "run0" to say it will
run at 0 unless it is set to 1.  It only exists if we are relocatable.

Signed-off-by: Milton Miller <miltonm@bga.com>
---
I left it global so it appears that way in System.map, but it would
not need to be.

I kept the guards with CONFIG_CRASH_DUMP for now.  They could be relaxed
to just CONFIG_RELOCATABLE.

Tested with normal kexec (kernel moved to 0) and a custom boot-loader
(kernel stayed at loaded 16MB start).

Index: next.git/arch/powerpc/kernel/head_64.S
===================================================================
--- next.git.orig/arch/powerpc/kernel/head_64.S	2008-10-22 04:30:08.000000000 -0500
+++ next.git/arch/powerpc/kernel/head_64.S	2008-10-22 04:59:55.000000000 -0500
@@ -97,12 +97,6 @@ __secondary_hold_spinloop:
 __secondary_hold_acknowledge:
 	.llong	0x0
 
-	/* This flag is set by purgatory if we should be a kdump kernel. */
-	/* Do not move this variable as purgatory knows about it. */
-	.globl	__kdump_flag
-__kdump_flag:
-	.llong	0x0
-
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -112,6 +106,20 @@ __kdump_flag:
 	.llong hvReleaseData-KERNELBASE
 #endif /* CONFIG_PPC_ISERIES */
 
+#ifdef CONFIG_CRASH_DUMP
+	/* This flag is set to 1 by a loader if the kernel should run
+	 * at the loaded address instead of the linked address.  This
+	 * is used by kexec-tools to keep the the kdump kernel in the
+	 * crash_kernel region.  The loader is responsible for
+	 * observing the alignment requirement.
+	 */
+	/* Do not move this variable as kexec-tools knows about it. */
+	. = 0x5c
+	.globl	__run_at_load
+__run_at_load:
+	.long	0x72756e30	/* "run0" -- relocate to 0 by default */
+#endif
+
 	. = 0x60
 /*
  * The following code is used to hold secondary processors
@@ -1391,8 +1399,8 @@ _STATIC(__after_prom_start)
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
 	sldi	r25,r25,32
 #ifdef CONFIG_CRASH_DUMP
-	ld	r7,__kdump_flag-_stext(r26)
-	cmpldi	cr0,r7,1	/* kdump kernel ? - stay where we are */
+	lwz	r7,__run_at_load-_stext(0)
+	cmplwi	cr0,r7,1	/* kdump kernel ? - stay where we are */
 	bne	1f
 	add	r25,r25,r26
 #endif
@@ -1416,11 +1424,11 @@ _STATIC(__after_prom_start)
 #ifdef CONFIG_CRASH_DUMP
 /*
  * Check if the kernel has to be running as relocatable kernel based on the
- * variable __kdump_flag, if it is set the kernel is treated as relocatable
+ * variable __run_at_load, if it is set the kernel is treated as relocatable
  * kernel, otherwise it will be moved to PHYSICAL_START
  */
-	ld	r7,__kdump_flag-_stext(r26)
-	cmpldi	cr0,r7,1
+	lwz	r7,__run_at_load-_stext(0)
+	cmplwi	cr0,r7,1
 	bne	3f
 
 	li	r5,__end_interrupts - _stext	/* just copy interrupts */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/2 kexec-tools] ppc64: new relocatble kernel activation ABI
  2008-10-22  4:56 [PATCH] Support for relocatable kdump kernel Milton Miller
  2008-10-22 20:39 ` [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable Milton Miller
  2008-10-22 20:39 ` [PATCH 2/2 kexec-tools] ppc64: segemments are sorted Milton Miller
@ 2008-10-22 20:39 ` Milton Miller
  2008-10-22 20:39 ` [PATCH 1/3] powerpc: kexec exit should not use magic numbers Milton Miller
  3 siblings, 0 replies; 27+ messages in thread
From: Milton Miller @ 2008-10-22 20:39 UTC (permalink / raw)
  To: Simon Horman; +Cc: linuxppc-dev, Simon Horman, kexec, Paul Mackerras


The updates kexec-tools to match the kernel after the patches

"kexec exit should not use magic numbers" and 

"better flag for running relocatable" are applied.



Signed-off-by: Milton Miller <miltonm@bga.com>
---
Still proposed change

Index: kexec-tools/purgatory/arch/ppc64/v2wrap.S
===================================================================
--- kexec-tools.orig/purgatory/arch/ppc64/v2wrap.S	2008-10-22 06:14:44.000000000 -0500
+++ kexec-tools/purgatory/arch/ppc64/v2wrap.S	2008-10-22 06:14:48.000000000 -0500
@@ -45,11 +45,13 @@
 	oris    rn,rn,name##@h;         \
 	ori     rn,rn,name##@l
 
-#define KDUMP_SIGNATURE 0xfeed1234
-
 	.machine ppc64
 	.globl purgatory_start
 purgatory_start:	b	master
+	.org purgatory_start + 0x5c     # ABI: possible run_at_load flag at 0x5c
+run_at_load:
+	.long 0
+	.size run_at_load, . - run_at_load
 	.org purgatory_start + 0x60     # ABI: slaves start at 60 with r3=phys
 slave:	b $
 	.org purgatory_start + 0x100    # ABI: end of copied region
@@ -65,7 +67,6 @@ master:
 	isync
 	mr      17,3            # save cpu id to r17
 	mr      15,4            # save physical address in reg15
-	mr      18,6            # save kdump flag in reg18
 
 	LOADADDR(6,my_toc)
 	ld      2,0(6)          #setup toc
@@ -96,14 +97,6 @@ master:
 	mtctr	4		# prepare branch too
 	mr      3,16            # restore dt address
 
-	LOADADDR(6,KDUMP_SIGNATURE)
-	cmpd	18,6
-	bne	regular
-	li	7,1
-	std	7,24(4)		# mark kdump flag at kernel
-regular:
-	lwz	7,0(4)		# get the first instruction that we stole
-	stw	7,0(0)		# and put it in the slave loop at 0
 				# skip cache flush, do we care?
 
 	bctr			# start kernel
Index: kexec-tools/kexec/arch/ppc64/crashdump-ppc64.h
===================================================================
--- kexec-tools.orig/kexec/arch/ppc64/crashdump-ppc64.h	2008-10-22 06:14:44.000000000 -0500
+++ kexec-tools/kexec/arch/ppc64/crashdump-ppc64.h	2008-10-22 06:14:48.000000000 -0500
@@ -23,6 +23,8 @@ void add_usable_mem_rgns(unsigned long l
 #define _ALIGN_UP(addr,size)	(((addr)+((size)-1))&(~((size)-1)))
 #define _ALIGN_DOWN(addr,size)	((addr)&(~((size)-1)))
 
+#define KERNEL_RUN_AT_ZERO_MAGIC 0x72756e30	/* "run0" */
+
 extern uint64_t crash_base;
 extern uint64_t crash_size;
 extern unsigned int rtas_base;
Index: kexec-tools/kexec/arch/ppc64/kexec-elf-ppc64.c
===================================================================
--- kexec-tools.orig/kexec/arch/ppc64/kexec-elf-ppc64.c	2008-10-22 06:14:44.000000000 -0500
+++ kexec-tools/kexec/arch/ppc64/kexec-elf-ppc64.c	2008-10-22 06:14:48.000000000 -0500
@@ -93,6 +93,7 @@ int elf_ppc64_load(int argc, char **argv
 	uint64_t my_stack, my_backup_start;
 	uint64_t toc_addr;
 	unsigned int slave_code[256/sizeof (unsigned int)], master_entry;
+	unsigned int run_at_load;
 
 #define OPT_APPEND     (OPT_ARCH_MAX+0)
 #define OPT_RAMDISK     (OPT_ARCH_MAX+1)
@@ -307,6 +308,18 @@ int elf_ppc64_load(int argc, char **argv
 		my_backup_start = info->backup_start;
 		elf_rel_set_symbol(&info->rhdr, "backup_start",
 				&my_backup_start, sizeof(my_backup_start));
+
+		/* Tell relocatable kernel to run at load address
+		 * via word before slave code in purgatory
+		 */
+
+		elf_rel_get_symbol(&info->rhdr, "run_at_load", &run_at_load,
+				sizeof(run_at_load));
+		if (run_at_load == KERNEL_RUN_AT_ZERO_MAGIC)
+			run_at_load = 1;
+			/* else it should be a fixed offset image */
+		elf_rel_set_symbol(&info->rhdr, "run_at_load", &run_at_load,
+				sizeof(run_at_load));
 	}
 
 	/* Set stack address */
@@ -325,10 +338,13 @@ int elf_ppc64_load(int argc, char **argv
 	my_backup_start = 0;
 	my_stack = 0;
 	toc_addr = 0;
+	run_at_load = 0;
 
 	elf_rel_get_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel));
 	elf_rel_get_symbol(&info->rhdr, "dt_offset", &my_dt_offset,
 				sizeof(my_dt_offset));
+	elf_rel_get_symbol(&info->rhdr, "run_at_load", &run_at_load,
+				sizeof(run_at_load));
 	elf_rel_get_symbol(&info->rhdr, "panic_kernel", &my_panic_kernel,
 				sizeof(my_panic_kernel));
 	elf_rel_get_symbol(&info->rhdr, "backup_start", &my_backup_start,
@@ -341,6 +357,7 @@ int elf_ppc64_load(int argc, char **argv
 	fprintf(stderr, "kernel is %llx\n", (unsigned long long)my_kernel);
 	fprintf(stderr, "dt_offset is %llx\n",
 		(unsigned long long)my_dt_offset);
+	fprintf(stderr, "run_at_load flag is %x\n", run_at_load);
 	fprintf(stderr, "panic_kernel is %x\n", my_panic_kernel);
 	fprintf(stderr, "backup_start is %llx\n",
 		(unsigned long long)my_backup_start);

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 2/2 kexec-tools] ppc64: segemments are sorted
  2008-10-22  4:56 [PATCH] Support for relocatable kdump kernel Milton Miller
  2008-10-22 20:39 ` [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable Milton Miller
@ 2008-10-22 20:39 ` Milton Miller
  2008-10-22 20:47   ` Milton Miller
  2008-10-22 20:39 ` [PATCH 1/2 kexec-tools] ppc64: new relocatble kernel activation ABI Milton Miller
  2008-10-22 20:39 ` [PATCH 1/3] powerpc: kexec exit should not use magic numbers Milton Miller
  3 siblings, 1 reply; 27+ messages in thread
From: Milton Miller @ 2008-10-22 20:39 UTC (permalink / raw)
  To: Simon Horman; +Cc: linuxppc-dev, Simon Horman, kexec, Paul Mackerras

Every time add_segment is called, the segments are sorted.  If the first
hole in memory is not big enough for the kernel then something besides
the kernel may be at 

Signed-off-by: Milton Miller <miltonm@bga.com>
---
Found during custom environment testing with several reserved blocks of
memory, not the usual case.

Index: kexec-tools/kexec/arch/ppc64/kexec-elf-ppc64.c
===================================================================
--- kexec-tools.orig/kexec/arch/ppc64/kexec-elf-ppc64.c	2008-10-22 06:14:48.000000000 -0500
+++ kexec-tools/kexec/arch/ppc64/kexec-elf-ppc64.c	2008-10-22 06:14:54.000000000 -0500
@@ -86,7 +86,7 @@ int elf_ppc64_load(int argc, char **argv
 	size_t size;
 	uint64_t *rsvmap_ptr;
 	struct bootblock *bb_ptr;
-	unsigned int nr_segments, i;
+	unsigned int i;
 	int result, opt;
 	uint64_t my_kernel, my_dt_offset;
 	unsigned int my_panic_kernel;
@@ -187,7 +187,7 @@ int elf_ppc64_load(int argc, char **argv
 	if (size > phdr->p_memsz)
 		size = phdr->p_memsz;
 
-	hole_addr = (uint64_t)locate_hole(info, size, 0, 0,
+	my_kernel = hole_addr = (uint64_t)locate_hole(info, size, 0, 0,
 			max_addr, 1);
 	ehdr.e_phdr[0].p_paddr = hole_addr;
 	result = elf_exec_load(&ehdr, info);
@@ -233,12 +233,10 @@ int elf_ppc64_load(int argc, char **argv
 			return -1;
 		}
 		seg_buf = (unsigned char *)slurp_file(ramdisk, &seg_size);
-		add_buffer(info, seg_buf, seg_size, seg_size, 0, 0, max_addr, 1);
-		hole_addr = (uintptr_t)
-			info->segment[info->nr_segments-1].mem;
+		hole_addr = add_buffer(info, seg_buf, seg_size, seg_size,
+			0, 0, max_addr, 1);
 		initrd_base = hole_addr;
-		initrd_size = (uint64_t)
-			info->segment[info->nr_segments-1].memsz;
+		initrd_size = seg_size;
 	} /* ramdisk */
 
 	if (devicetreeblob) {
@@ -248,16 +246,18 @@ int elf_ppc64_load(int argc, char **argv
 		/* Grab device tree from buffer */
 		blob_buf =
 			(unsigned char *)slurp_file(devicetreeblob, &blob_size);
-		add_buffer(info, blob_buf, blob_size, blob_size, 0, 0,
-				max_addr, -1);
+		my_dt_offset = add_buffer(info, blob_buf, blob_size, blob_size,
+				0, 0, max_addr, -1);
 
+		seg_buf = blob_buf;
+		seg_size = blob_size;
 	} else {
 		/* create from fs2dt */
 		seg_buf = NULL;
 		seg_size = 0;
 		create_flatten_tree(info, (unsigned char **)&seg_buf,
 				(unsigned long *)&seg_size,cmdline);
-		add_buffer(info, seg_buf, seg_size, seg_size,
+		my_dt_offset = add_buffer(info, seg_buf, seg_size, seg_size,
 				0, 0, max_addr, -1);
 	}
 
@@ -265,27 +265,20 @@ int elf_ppc64_load(int argc, char **argv
 	 * find last entry (both 0) in the reserve mem list.  Assume DT
 	 * entry is before this one
 	 */
-	bb_ptr = (struct bootblock *)(
-		(unsigned char *)info->segment[(info->nr_segments)-1].buf);
-	rsvmap_ptr = (uint64_t *)(
-		(unsigned char *)info->segment[(info->nr_segments)-1].buf +
-		bb_ptr->off_mem_rsvmap);
+	bb_ptr = (struct bootblock *)(seg_buf);
+	rsvmap_ptr = (uint64_t *)
+		(((char *)seg_buf) + bb_ptr->off_mem_rsvmap);
 	while (*rsvmap_ptr || *(rsvmap_ptr+1))
 		rsvmap_ptr += 2;
 	rsvmap_ptr -= 2;
-	*rsvmap_ptr = (uintptr_t)(
-		info->segment[(info->nr_segments)-1].mem);
+	*rsvmap_ptr = my_dt_offset;
 	rsvmap_ptr++;
 	*rsvmap_ptr = (uint64_t)bb_ptr->totalsize;
 
-	nr_segments = info->nr_segments;
-
 	/* Set kernel */
-	my_kernel = (uintptr_t)info->segment[0].mem;
 	elf_rel_set_symbol(&info->rhdr, "kernel", &my_kernel, sizeof(my_kernel));
 
 	/* Set dt_offset */
-	my_dt_offset = (uintptr_t)info->segment[nr_segments-1].mem;
 	elf_rel_set_symbol(&info->rhdr, "dt_offset", &my_dt_offset,
 				sizeof(my_dt_offset));
 
@@ -293,7 +286,7 @@ int elf_ppc64_load(int argc, char **argv
 	elf_rel_get_symbol(&info->rhdr, "purgatory_start", slave_code,
 			sizeof(slave_code));
 	master_entry = slave_code[0];
-	memcpy(slave_code, info->segment[0].buf, sizeof(slave_code));
+	memcpy(slave_code, phdr->p_data, sizeof(slave_code));
 	slave_code[0] = master_entry;
 	elf_rel_set_symbol(&info->rhdr, "purgatory_start", slave_code,
 				sizeof(slave_code));
@@ -366,7 +359,7 @@ int elf_ppc64_load(int argc, char **argv
 	fprintf(stderr, "purgatory size is %zu\n", purgatory_size);
 #endif
 
-	for (i = 0; i < nr_segments; i++)
+	for (i = 0; i < info->nr_segments; i++)
 		fprintf(stderr, "segment[%d].mem:%p memsz:%zu\n", i,
 			info->segment[i].mem, info->segment[i].memsz);
 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/3] powerpc: kexec exit should not use magic numbers
  2008-10-22  4:56 [PATCH] Support for relocatable kdump kernel Milton Miller
                   ` (2 preceding siblings ...)
  2008-10-22 20:39 ` [PATCH 1/2 kexec-tools] ppc64: new relocatble kernel activation ABI Milton Miller
@ 2008-10-22 20:39 ` Milton Miller
  2008-10-22 23:18   ` Simon Horman
  3 siblings, 1 reply; 27+ messages in thread
From: Milton Miller @ 2008-10-22 20:39 UTC (permalink / raw)
  To: Ben Herrenschmidt; +Cc: linuxppc-dev, Simon Horman, kexec, Paul Mackerras

The relocatable kernel kdump patch (54622f10a6aabb8bb2bdacf3dd070046f03dc246)
added a magic flag value in a register to tell purgatory that it should
be a panic kernel.  This part is wrong and is reverted by this patch.

The kernel gets a list of memory blocks and a entry point from user space.
Its job is to copy the blocks into place and then branch to the designated
entry point (after turning "off" the mmu).

The user space tool inserts a trampoline, called purgatory, that runs
before the user supplied code.   Its job is to establish the entry
environment for the new kernel or other application based on the contents
of memory.  The purgatory code is compiled and embedded in the tool,
where it is later patched using the elf symbol table using elf symbols.

Since the tool knows it is creating a purgatory that will run after a
kernel crash, it should just patch purgatory (or the kernel directly)
if something needs to happen.

Signed-off-by: Milton Miller <miltonm@bga.com>
---
keep the whitespace fix at if(crashing_cpu == -1)

Index: next.git/arch/powerpc/include/asm/kdump.h
===================================================================
--- next.git.orig/arch/powerpc/include/asm/kdump.h	2008-10-22 06:53:22.000000000 -0500
+++ next.git/arch/powerpc/include/asm/kdump.h	2008-10-22 06:54:12.000000000 -0500
@@ -9,12 +9,6 @@
  * Reserve to the end of the FWNMI area, see head_64.S */
 #define KDUMP_RESERVE_LIMIT	0x10000 /* 64K */
 
-/*
- * Used to differentiate between relocatable kdump kernel and other
- * kernels
- */
-#define KDUMP_SIGNATURE	0xfeed1234
-
 #ifdef CONFIG_CRASH_DUMP
 
 #define KDUMP_TRAMPOLINE_START	0x0100
Index: next.git/arch/powerpc/kernel/machine_kexec_64.c
===================================================================
--- next.git.orig/arch/powerpc/kernel/machine_kexec_64.c	2008-10-22 06:53:22.000000000 -0500
+++ next.git/arch/powerpc/kernel/machine_kexec_64.c	2008-10-22 06:54:12.000000000 -0500
@@ -255,14 +255,11 @@ static union thread_union kexec_stack
 /* Our assembly helper, in kexec_stub.S */
 extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
 					void *image, void *control,
-					void (*clear_all)(void),
-					unsigned long kdump_flag) ATTRIB_NORET;
+					void (*clear_all)(void)) ATTRIB_NORET;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
 {
-	unsigned long kdump_flag = 0;
-
 	/* prepare control code if any */
 
 	/*
@@ -275,8 +272,6 @@ void default_machine_kexec(struct kimage
 
 	if (crashing_cpu == -1)
 		kexec_prepare_cpus();
-	else
-		kdump_flag = KDUMP_SIGNATURE;
 
 	/* switch to a staticly allocated stack.  Based on irq stack code.
 	 * XXX: the task struct will likely be invalid once we do the copy!
@@ -289,7 +284,7 @@ void default_machine_kexec(struct kimage
 	 */
 	kexec_sequence(&kexec_stack, image->start, image,
 			page_address(image->control_code_page),
-			ppc_md.hpte_clear_all, kdump_flag);
+			ppc_md.hpte_clear_all);
 	/* NOTREACHED */
 }
 
Index: next.git/arch/powerpc/kernel/misc_64.S
===================================================================
--- next.git.orig/arch/powerpc/kernel/misc_64.S	2008-10-22 06:53:22.000000000 -0500
+++ next.git/arch/powerpc/kernel/misc_64.S	2008-10-22 06:54:12.000000000 -0500
@@ -611,12 +611,10 @@ real_mode:	/* assume normal blr return *
 
 
 /*
- * kexec_sequence(newstack, start, image, control, clear_all(), kdump_flag)
+ * kexec_sequence(newstack, start, image, control, clear_all())
  *
  * does the grungy work with stack switching and real mode switches
  * also does simple calls to other code
- *
- * kdump_flag says whether the next kernel should be a kdump kernel.
  */
 
 _GLOBAL(kexec_sequence)
@@ -649,7 +647,7 @@ _GLOBAL(kexec_sequence)
 	mr	r29,r5			/* image (virt) */
 	mr	r28,r6			/* control, unused */
 	mr	r27,r7			/* clear_all() fn desc */
-	mr	r26,r8			/* kdump flag */
+	mr	r26,r8			/* spare */
 	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
 
 	/* disable interrupts, we are overwriting kernel data next */
@@ -711,6 +709,5 @@ _GLOBAL(kexec_sequence)
 	mr	r4,r30	# start, aka phys mem offset
 	mtlr	4
 	li	r5,0
-	mr	r6,r26			/* kdump_flag */
-	blr	/* image->start(physid, image->start, 0, kdump_flag); */
+	blr	/* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/2 kexec-tools] ppc64: segemments are sorted
  2008-10-22 20:39 ` [PATCH 2/2 kexec-tools] ppc64: segemments are sorted Milton Miller
@ 2008-10-22 20:47   ` Milton Miller
  0 siblings, 0 replies; 27+ messages in thread
From: Milton Miller @ 2008-10-22 20:47 UTC (permalink / raw)
  To: Milton Miller; +Cc: kexec, Simon Horman, Paul Mackerras, linuxppc-dev


On Oct 22, 2008, at 3:39 PM, Milton Miller wrote:

> Every time add_segment is called, the segments are sorted.  If the 
> first
> hole in memory is not big enough for the kernel then something besides
> the kernel may be at

info->segment[0].

> ---
> Found during custom environment testing with several reserved blocks of
> memory, not the usual case.

milton

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/3] powerpc: kexec exit should not use magic numbers
  2008-10-22 20:39 ` [PATCH 1/3] powerpc: kexec exit should not use magic numbers Milton Miller
@ 2008-10-22 23:18   ` Simon Horman
  0 siblings, 0 replies; 27+ messages in thread
From: Simon Horman @ 2008-10-22 23:18 UTC (permalink / raw)
  To: Milton Miller; +Cc: kexec, linuxppc-dev, Paul Mackerras

[ Added Mohan Kumar to CC list ]

On Wed, Oct 22, 2008 at 03:39:18PM -0500, Milton Miller wrote:
> The relocatable kernel kdump patch (54622f10a6aabb8bb2bdacf3dd070046f03dc246)
> added a magic flag value in a register to tell purgatory that it should
> be a panic kernel.  This part is wrong and is reverted by this patch.
> 
> The kernel gets a list of memory blocks and a entry point from user space.
> Its job is to copy the blocks into place and then branch to the designated
> entry point (after turning "off" the mmu).
> 
> The user space tool inserts a trampoline, called purgatory, that runs
> before the user supplied code.   Its job is to establish the entry
> environment for the new kernel or other application based on the contents
> of memory.  The purgatory code is compiled and embedded in the tool,
> where it is later patched using the elf symbol table using elf symbols.
> 
> Since the tool knows it is creating a purgatory that will run after a
> kernel crash, it should just patch purgatory (or the kernel directly)
> if something needs to happen.

Hi Milton,

All of these patches look fine to me.

On the kernel side:
Acked-by: Simon Horman <horms@verge.net.au>

On the kexec-tools side:
I'd rather wait until the kernel changes get merged before merging
the kexec-tools portion. Please ping me at that point.


I'd like to note that these changes really ought to go into the same kernel
(and kexec-tools) release that the relocateable kdump patches as they will
introduce incompatibility. For example, crash-dump kernels with only the
relocatable kdump changes will not be usable if the first-kernel includes
these changes. I think that means that this needs to go into 2.6.28 -
assuming that Linus accepts the pull request than Ben Herrenschmidt sent
recently.

-- 
Simon Horman
  VA Linux Systems Japan K.K., Sydney, Australia Satellite Office
  H: www.vergenet.net/~horms/             W: www.valinux.co.jp/en

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-22 20:39 ` [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable Milton Miller
@ 2008-10-23  3:23   ` Michael Neuling
  2008-10-23  3:32   ` Paul Mackerras
  2008-10-23 15:15   ` Mohan Kumar M
  2 siblings, 0 replies; 27+ messages in thread
From: Michael Neuling @ 2008-10-23  3:23 UTC (permalink / raw)
  To: Milton Miller; +Cc: Simon Horman, kexec, Paul Mackerras, linuxppc-dev

In message <kexec-kern-2@bga.com> you wrote:
> The __kdump_flag ABI is overly constraining for future development.  
> 
> As of 2.6.27, the kernel entry point has 4 constraints:  Offset 0 is
> the starting point for the master (boot) cpu (entered with r3 pointing
> to the device tree structure), offset 0x60 is code for the slave cpus
> (entered with r3 set to their device tree physical id), offset 0x20 is
> used by the iseries hypervisor, and secondary cpus must be well behaved
> when the first 256 bytes are copied to address 0.
> 
> Placing the __kdump_flag at 0x18 is bad because:
> 
> - It was taking the last 8 bytes before the iseries hypervisor data.  
> - It was 8 bytes for a boolean flag
> - It had no way of identifying that the flag was present
> - It does leave any room for the master to add any additional code
>   before branching, which hurts debug.
> - It will be unnecessarily hard for 32 bit code to be common (8 bytes)
> 
> Now that we have eliminated the use of __kdump_flag in favor of
> the standard is_kdump_kernel(), this flag only controls run without
> relocating the kernel to PHYSICAL_START (0), so rename it __run_at_load.
> 
> Move the flag to 0x5c, 1 word before the secondary cpu entry point at
> 0x60.  Use the copy at address 0 not the one in the base kernel image to
> make it easier on kexec-tools.  Initialize it with "run0" to say it will
> run at 0 unless it is set to 1.  It only exists if we are relocatable.
> 
> Signed-off-by: Milton Miller <miltonm@bga.com>
> ---
> I left it global so it appears that way in System.map, but it would
> not need to be.
> 
> I kept the guards with CONFIG_CRASH_DUMP for now.  They could be relaxed
> to just CONFIG_RELOCATABLE.
> 
> Tested with normal kexec (kernel moved to 0) and a custom boot-loader
> (kernel stayed at loaded 16MB start).
> 
> Index: next.git/arch/powerpc/kernel/head_64.S
> ===================================================================
> --- next.git.orig/arch/powerpc/kernel/head_64.S	2008-10-22 04:30:08.000
000000 -0500
> +++ next.git/arch/powerpc/kernel/head_64.S	2008-10-22 04:59:55.000000000 -
0500
> @@ -97,12 +97,6 @@ __secondary_hold_spinloop:
>  __secondary_hold_acknowledge:
>  	.llong	0x0
>  
> -	/* This flag is set by purgatory if we should be a kdump kernel. */
> -	/* Do not move this variable as purgatory knows about it. */
> -	.globl	__kdump_flag
> -__kdump_flag:
> -	.llong	0x0
> -
>  #ifdef CONFIG_PPC_ISERIES
>  	/*
>  	 * At offset 0x20, there is a pointer to iSeries LPAR data.
> @@ -112,6 +106,20 @@ __kdump_flag:
>  	.llong hvReleaseData-KERNELBASE
>  #endif /* CONFIG_PPC_ISERIES */
>  
> +#ifdef CONFIG_CRASH_DUMP
> +	/* This flag is set to 1 by a loader if the kernel should run
> +	 * at the loaded address instead of the linked address.  This
> +	 * is used by kexec-tools to keep the the kdump kernel in the
> +	 * crash_kernel region.  The loader is responsible for
> +	 * observing the alignment requirement.
> +	 */
> +	/* Do not move this variable as kexec-tools knows about it. */
> +	. = 0x5c
> +	.globl	__run_at_load
> +__run_at_load:
> +	.long	0x72756e30	/* "run0" -- relocate to 0 by default */
> +#endif
> +
>  	. = 0x60
>  /*
>   * The following code is used to hold secondary processors
> @@ -1391,8 +1399,8 @@ _STATIC(__after_prom_start)
>  	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
>  	sldi	r25,r25,32
>  #ifdef CONFIG_CRASH_DUMP
> -	ld	r7,__kdump_flag-_stext(r26)
> -	cmpldi	cr0,r7,1	/* kdump kernel ? - stay where we are */
> +	lwz	r7,__run_at_load-_stext(0)
> +	cmplwi	cr0,r7,1	/* kdump kernel ? - stay where we are */

Do we really want the flag to always be at 0x5c not 0x5c + kernel offset?

Also, comment "kdump kernel" needs to be updated to reflect the new
name.  

Other than that, the patch series works for me.

Mikey

>  	bne	1f
>  	add	r25,r25,r26
>  #endif
> @@ -1416,11 +1424,11 @@ _STATIC(__after_prom_start)
>  #ifdef CONFIG_CRASH_DUMP
>  /*
>   * Check if the kernel has to be running as relocatable kernel based on the
> - * variable __kdump_flag, if it is set the kernel is treated as relocatable
> + * variable __run_at_load, if it is set the kernel is treated as relocatable
>   * kernel, otherwise it will be moved to PHYSICAL_START
>   */
> -	ld	r7,__kdump_flag-_stext(r26)
> -	cmpldi	cr0,r7,1
> +	lwz	r7,__run_at_load-_stext(0)
> +	cmplwi	cr0,r7,1
>  	bne	3f
>  
>  	li	r5,__end_interrupts - _stext	/* just copy interrupts */
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-22 20:39 ` [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable Milton Miller
  2008-10-23  3:23   ` Michael Neuling
@ 2008-10-23  3:32   ` Paul Mackerras
  2008-10-23  3:43     ` Paul Mackerras
  2008-10-23 15:15   ` Mohan Kumar M
  2 siblings, 1 reply; 27+ messages in thread
From: Paul Mackerras @ 2008-10-23  3:32 UTC (permalink / raw)
  To: Milton Miller; +Cc: Simon Horman, kexec, linuxppc-dev

Milton Miller writes:

> Move the flag to 0x5c, 1 word before the secondary cpu entry point at
> 0x60.  Use the copy at address 0 not the one in the base kernel image to
> make it easier on kexec-tools.

Why is it easier on kexec-tools?  Doesn't kexec-tools know where it
put the kernel?

I'd much rather keep the flag inside the kdump kernel image, rather
than having kexec/kdump start using random fixed locations outside the
new kernel image.

Paul.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-23  3:32   ` Paul Mackerras
@ 2008-10-23  3:43     ` Paul Mackerras
  2008-10-24  4:41       ` Michael Neuling
  2008-11-07 13:52       ` Milton Miller
  0 siblings, 2 replies; 27+ messages in thread
From: Paul Mackerras @ 2008-10-23  3:43 UTC (permalink / raw)
  To: Milton Miller, Ben Herrenschmidt, linuxppc-dev, Michael Ellerman,
	kexec, Simon Horman

Paul Mackerras writes:
> Milton Miller writes:
> 
> > Move the flag to 0x5c, 1 word before the secondary cpu entry point at
> > 0x60.  Use the copy at address 0 not the one in the base kernel image to
> > make it easier on kexec-tools.
> 
> Why is it easier on kexec-tools?  Doesn't kexec-tools know where it
> put the kernel?
> 
> I'd much rather keep the flag inside the kdump kernel image, rather
> than having kexec/kdump start using random fixed locations outside the
> new kernel image.

In fact the cliching argument is that when the kernel is loaded by OF
or yaboot, we have no way to tell what will be at location 0x5c,
whereas we know that the word at offset 0x5c in the kernel image will
have been initialized to 0.  So we had better put the flag inside the
kernel image.

Paul.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-22 20:39 ` [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable Milton Miller
  2008-10-23  3:23   ` Michael Neuling
  2008-10-23  3:32   ` Paul Mackerras
@ 2008-10-23 15:15   ` Mohan Kumar M
  2008-11-07 13:59     ` Milton Miller
  2 siblings, 1 reply; 27+ messages in thread
From: Mohan Kumar M @ 2008-10-23 15:15 UTC (permalink / raw)
  To: Milton Miller; +Cc: kexec, linuxppc-dev, Simon Horman, Paul Mackerras

Hi Milton,

My suggestions:

Milton Miller wrote:
> The __kdump_flag ABI is overly constraining for future development.  
> 
> As of 2.6.27, the kernel entry point has 4 constraints:  Offset 0 is
> the starting point for the master (boot) cpu (entered with r3 pointing
> to the device tree structure), offset 0x60 is code for the slave cpus
> (entered with r3 set to their device tree physical id), offset 0x20 is
> used by the iseries hypervisor, and secondary cpus must be well behaved
> when the first 256 bytes are copied to address 0.
> 
> Placing the __kdump_flag at 0x18 is bad because:
> 
> - It was taking the last 8 bytes before the iseries hypervisor data.  
> - It was 8 bytes for a boolean flag
> - It had no way of identifying that the flag was present
> - It does leave any room for the master to add any additional code
>   before branching, which hurts debug.
> - It will be unnecessarily hard for 32 bit code to be common (8 bytes)
> 
> Now that we have eliminated the use of __kdump_flag in favor of
> the standard is_kdump_kernel(), this flag only controls run without
> relocating the kernel to PHYSICAL_START (0), so rename it __run_at_load.
>
We could try both of our approaches. Instead of passing the information 
that next kernel should be relocatable from kexec_sequence to purgatory 
code, we will do it from kexec-tools path (following your approach). But 
instead of setting the __run_at_load value in the purgatory code (ie at 
physical address 0x5c), we will set the variable __run_at_load at kernel 
  image itself.

i.e.,
[code snip 1]
	lwz	r7,__run_at_load-_stext(r26)
	cmplwi	cr0,r7,1	/* kdump kernel ? - stay where we are */
  	bne	1f
  	add	r25,r25,r26

	lwz	r7,__run_at_load-_stext(r26)
	cmplwi	cr0,r7,1
  	bne	3f

kexec-tools
[code snip 2]
	LOADADDR(6,run_at_load)
	ld	18,0(6)
	cmpd	18,1
	bne	skip
	li	7,1
	stw	7,92(4)		# mark __run_at_load flag at kernel
skip:
	lwz	7,0(4)		# get the first instruction that we stole
	stw	7,0(0)		# and put it in the slave loop at 0
  				# skip cache flush, do we care?

[code snip 3]
	if (info->kexec_flags & KEXEC_ON_CRASH) {
		....
		elf_rel_set_symbol(&info->rhdr, "run_at_load",
                                 &my_run_at_load, 							 
sizeof(my_run_at_load));
	}

Using this approach we are not breaking the kexec_sequence ABI and we 
directly modifying the flag in kernel image.

Regards,
Mohan.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-23  3:43     ` Paul Mackerras
@ 2008-10-24  4:41       ` Michael Neuling
  2008-11-07 13:52       ` Milton Miller
  1 sibling, 0 replies; 27+ messages in thread
From: Michael Neuling @ 2008-10-24  4:41 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: kexec, Milton Miller, linuxppc-dev, Simon Horman

From: Milton Miller <miltonm@bga.com>

The __kdump_flag ABI is overly constraining for future development.  

As of 2.6.27, the kernel entry point has 4 constraints:  Offset 0 is
the starting point for the master (boot) cpu (entered with r3 pointing
to the device tree structure), offset 0x60 is code for the slave cpus
(entered with r3 set to their device tree physical id), offset 0x20 is
used by the iseries hypervisor, and secondary cpus must be well behaved
when the first 256 bytes are copied to address 0.

Placing the __kdump_flag at 0x18 is bad because:

- It was taking the last 8 bytes before the iseries hypervisor data.  
- It was 8 bytes for a boolean flag
- It had no way of identifying that the flag was present
- It does leave any room for the master to add any additional code
  before branching, which hurts debug.
- It will be unnecessarily hard for 32 bit code to be common (8 bytes)

Now that we have eliminated the use of __kdump_flag in favor of
the standard is_kdump_kernel(), this flag only controls run without
relocating the kernel to PHYSICAL_START (0), so rename it __run_at_load.

Move the flag to 0x5c, 1 word before the secondary cpu entry point at
0x60.  Initialize it with "run0" to say it will run at 0 unless it is
set to 1.  It only exists if we are relocatable.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/kernel/head_64.S |   30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

As discussed, this changes the __run_at_load location to 
0x5c + load offset rather than 0x5c + 0.

Index: linux-2.6-ozlabs/arch/powerpc/kernel/head_64.S
===================================================================
--- linux-2.6-ozlabs.orig/arch/powerpc/kernel/head_64.S
+++ linux-2.6-ozlabs/arch/powerpc/kernel/head_64.S
@@ -104,12 +104,6 @@ __secondary_hold_spinloop:
 __secondary_hold_acknowledge:
 	.llong	0x0
 
-	/* This flag is set by purgatory if we should be a kdump kernel. */
-	/* Do not move this variable as purgatory knows about it. */
-	.globl	__kdump_flag
-__kdump_flag:
-	.llong	0x0
-
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -119,6 +113,20 @@ __kdump_flag:
 	.llong hvReleaseData-KERNELBASE
 #endif /* CONFIG_PPC_ISERIES */
 
+#ifdef CONFIG_CRASH_DUMP
+	/* This flag is set to 1 by a loader if the kernel should run
+	 * at the loaded address instead of the linked address.  This
+	 * is used by kexec-tools to keep the the kdump kernel in the
+	 * crash_kernel region.  The loader is responsible for
+	 * observing the alignment requirement.
+	 */
+	/* Do not move this variable as kexec-tools knows about it. */
+	. = 0x5c
+	.globl	__run_at_load
+__run_at_load:
+	.long	0x72756e30	/* "run0" -- relocate to 0 by default */
+#endif
+
 	. = 0x60
 /*
  * The following code is used to hold secondary processors
@@ -1407,8 +1415,8 @@ _STATIC(__after_prom_start)
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
 	sldi	r25,r25,32
 #ifdef CONFIG_CRASH_DUMP
-	ld	r7,__kdump_flag-_stext(r26)
-	cmpldi	cr0,r7,1	/* kdump kernel ? - stay where we are */
+	lwz	r7,__run_at_load-_stext(r26)
+	cmplwi	cr0,r7,1	/* kdump kernel ? - stay where we are */
 	bne	1f
 	add	r25,r25,r26
 #endif
@@ -1432,11 +1440,11 @@ _STATIC(__after_prom_start)
 #ifdef CONFIG_CRASH_DUMP
 /*
  * Check if the kernel has to be running as relocatable kernel based on the
- * variable __kdump_flag, if it is set the kernel is treated as relocatable
+ * variable __run_at_load, if it is set the kernel is treated as relocatable
  * kernel, otherwise it will be moved to PHYSICAL_START
  */
-	ld	r7,__kdump_flag-_stext(r26)
-	cmpldi	cr0,r7,1
+	lwz	r7,__run_at_load-_stext(r26)
+	cmplwi	cr0,r7,1
 	bne	3f
 
 	li	r5,__end_interrupts - _stext	/* just copy interrupts */

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-23  3:43     ` Paul Mackerras
  2008-10-24  4:41       ` Michael Neuling
@ 2008-11-07 13:52       ` Milton Miller
  1 sibling, 0 replies; 27+ messages in thread
From: Milton Miller @ 2008-11-07 13:52 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: Simon Horman, kexec, linuxppc-dev

On Oct 22, 2008, at 10:43 PM, Paul Mackerras wrote:
> Paul Mackerras writes:
>> Milton Miller writes:
>>> Move the flag to 0x5c, 1 word before the secondary cpu entry point at
>>> 0x60.  Use the copy at address 0 not the one in the base kernel 
>>> image to
>>> make it easier on kexec-tools.
>>
>> Why is it easier on kexec-tools?  Doesn't kexec-tools know where it
>> put the kernel?

The archictecture code calls cross-platform code to identify what is
loaded.   It isn't specified if this is a shared mmap or a read into
a buffer.

>>
>> I'd much rather keep the flag inside the kdump kernel image, rather
>> than having kexec/kdump start using random fixed locations outside the
>> new kernel image.
>
> In fact the cliching argument is that when the kernel is loaded by OF
> or yaboot, we have no way to tell what will be at location 0x5c,
> whereas we know that the word at offset 0x5c in the kernel image will
> have been initialized to 0.  So we had better put the flag inside the
> kernel image.

Well, prom_init will copy the 256 bytes to 0 before the code checks
that location.

However, there is an arguement for using the same code from an epapr
or book-e relocatable, and that would need it at 0.   And today
the kexec tool does not do a shared mmap.  Since the change has
been made, I will make a new patch for kexec-tools.

milton

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-10-23 15:15   ` Mohan Kumar M
@ 2008-11-07 13:59     ` Milton Miller
  2008-11-10 15:22       ` Mohan Kumar M
  0 siblings, 1 reply; 27+ messages in thread
From: Milton Miller @ 2008-11-07 13:59 UTC (permalink / raw)
  To: Mohan Kumar M; +Cc: kexec, linuxppc-dev, Simon Horman, Paul Mackerras

On Oct 23, 2008, at 10:15 AM, Mohan Kumar M wrote:
> Hi Milton,
> My suggestions:
> Milton Miller wrote:
>> The __kdump_flag ABI is overly constraining for future development.
...
>> Now that we have eliminated the use of __kdump_flag in favor of
>> the standard is_kdump_kernel(), this flag only controls run without
>> relocating the kernel to PHYSICAL_START (0), so rename it 
>> __run_at_load.
>>
> We could try both of our approaches. Instead of passing the 
> information that next kernel should be relocatable from kexec_sequence 
> to purgatory code, we will do it from kexec-tools path (following your 
> approach). But instead of setting the __run_at_load value in the 
> purgatory code (ie at physical address 0x5c), we will set the variable 
> __run_at_load at kernel  image itself.
>
> i.e.,
> [code snip 1]
> 	lwz	r7,__run_at_load-_stext(r26)
> 	cmplwi	cr0,r7,1	/* kdump kernel ? - stay where we are */
>  	bne	1f
>  	add	r25,r25,r26
>
> 	lwz	r7,__run_at_load-_stext(r26)
> 	cmplwi	cr0,r7,1
>  	bne	3f
>
> kexec-tools
> [code snip 2]
> 	LOADADDR(6,run_at_load)
> 	ld	18,0(6)
> 	cmpd	18,1
> 	bne	skip
> 	li	7,1
> 	stw	7,92(4)		# mark __run_at_load flag at kernel
> skip:
> 	lwz	7,0(4)		# get the first instruction that we stole
> 	stw	7,0(0)		# and put it in the slave loop at 0
>  				# skip cache flush, do we care?
>
> [code snip 3]
> 	if (info->kexec_flags & KEXEC_ON_CRASH) {
> 		....
> 		elf_rel_set_symbol(&info->rhdr, "run_at_load",
>                                 &my_run_at_load, 							 
> sizeof(my_run_at_load));
> 	}


This elf_rel_set_symbol sets the copy in purgatory,
after we have copied the code from the kernel.  It
is this copy that gets copied to address 0.

However this information is not in the code that
is at the start of the kernel.  We don't have any
symbols for the kernel itself, it might be stripped.
So we can't use the elf_set_symbol api.  (The kernel
may not be relocatable either).

> Using this approach we are not breaking the kexec_sequence
> ABI and we directly modifying the flag in kernel image.
>
> Regards,
> Mohan.

I'll prepare a patch, but it might be a few days
while I catch up from my 2 week vacation.

milton

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-11-07 13:59     ` Milton Miller
@ 2008-11-10 15:22       ` Mohan Kumar M
  2008-11-11 16:06         ` Milton Miller
  0 siblings, 1 reply; 27+ messages in thread
From: Mohan Kumar M @ 2008-11-10 15:22 UTC (permalink / raw)
  To: Milton Miller; +Cc: kexec, linuxppc-dev, Simon Horman, Paul Mackerras

Milton Miller wrote:
> On Oct 23, 2008, at 10:15 AM, Mohan Kumar M wrote:
>> Hi Milton,
>> My suggestions:
>> Milton Miller wrote:
>>
>> i.e.,
>> [code snip 1]
>> 	lwz	r7,__run_at_load-_stext(r26)
>> 	cmplwi	cr0,r7,1	/* kdump kernel ? - stay where we are */
>>  	bne	1f
>>  	add	r25,r25,r26
>>
>> 	lwz	r7,__run_at_load-_stext(r26)
>> 	cmplwi	cr0,r7,1
>>  	bne	3f
>>
>> kexec-tools
>> [code snip 2]
>> 	LOADADDR(6,run_at_load)
>> 	ld	18,0(6)
>> 	cmpd	18,1
>> 	bne	skip
>> 	li	7,1
>> 	stw	7,92(4)		# mark __run_at_load flag at kernel
>> skip:
>> 	lwz	7,0(4)		# get the first instruction that we stole
>> 	stw	7,0(0)		# and put it in the slave loop at 0
>>  				# skip cache flush, do we care?
>>
>> [code snip 3]
>> 	if (info->kexec_flags & KEXEC_ON_CRASH) {
>> 		....
>> 		elf_rel_set_symbol(&info->rhdr, "run_at_load",
>>                                 &my_run_at_load, 							 
>> sizeof(my_run_at_load));
>> 	}
> 
> 
> This elf_rel_set_symbol sets the copy in purgatory,
> after we have copied the code from the kernel.  It
> is this copy that gets copied to address 0.
> 

Yes, elf_ret_symbol sets the copy in purgatory. But the following code 
in purgatory (to be introduced)

  	LOADADDR(6,run_at_load)
  	ld	18,0(6)
  	cmpd	18,1
  	bne	skip
  	li	7,1
  	stw	7,92(4)		# mark __run_at_load flag at kernel

will set the __run_at_load in the kernel image (ie where ever kernel is 
loaded + 0x5c(92). Or am I missing some thing?

> However this information is not in the code that
> is at the start of the kernel.  We don't have any
> symbols for the kernel itself, it might be stripped.
> So we can't use the elf_set_symbol api.  (The kernel
> may not be relocatable either).

Regards,
Mohan.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable
  2008-11-10 15:22       ` Mohan Kumar M
@ 2008-11-11 16:06         ` Milton Miller
  0 siblings, 0 replies; 27+ messages in thread
From: Milton Miller @ 2008-11-11 16:06 UTC (permalink / raw)
  To: Mohan Kumar M; +Cc: kexec, linuxppc-dev, Simon Horman, Paul Mackerras

On Nov 10, 2008, at 9:22 AM, Mohan Kumar M wrote:
> Yes, elf_ret_symbol sets the copy in purgatory. But the following code 
> in purgatory (to be introduced)
>
>  	LOADADDR(6,run_at_load)
>  	ld	18,0(6)
>  	cmpd	18,1
>  	bne	skip
>  	li	7,1
>  	stw	7,92(4)		# mark __run_at_load flag at kernel
>
> will set the __run_at_load in the kernel image (ie where ever kernel 
> is loaded + 0x5c(92). Or am I missing some thing?

That would work, but I prefer to keep the change in the userspace side. 
  Partly because I don't want to link setting the relocatable flag to 
purgatory starting a dump kernel, and partly because I think 
kexec-tools should be verifying that the loaded kernel will run where 
it expects, either by it finding the relcatable flag, inspecting the 
elf header for the linked address, or some other method (like elf type 
is dynamic for some platforms).  Oh, and its more readable in C.

If someone adds mmap instead of read files to the common code, then we 
will just have to make sure they use MMAP_PRIVATE instead of 
MMAP_SHARED.  Today its not an issue.

milton

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2008-11-11 16:19 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-22  4:56 [PATCH] Support for relocatable kdump kernel Milton Miller
2008-10-22 20:39 ` [PATCH 3/3] powerpc/ppc64/kdump: better flag for running relocatable Milton Miller
2008-10-23  3:23   ` Michael Neuling
2008-10-23  3:32   ` Paul Mackerras
2008-10-23  3:43     ` Paul Mackerras
2008-10-24  4:41       ` Michael Neuling
2008-11-07 13:52       ` Milton Miller
2008-10-23 15:15   ` Mohan Kumar M
2008-11-07 13:59     ` Milton Miller
2008-11-10 15:22       ` Mohan Kumar M
2008-11-11 16:06         ` Milton Miller
2008-10-22 20:39 ` [PATCH 2/2 kexec-tools] ppc64: segemments are sorted Milton Miller
2008-10-22 20:47   ` Milton Miller
2008-10-22 20:39 ` [PATCH 1/2 kexec-tools] ppc64: new relocatble kernel activation ABI Milton Miller
2008-10-22 20:39 ` [PATCH 1/3] powerpc: kexec exit should not use magic numbers Milton Miller
2008-10-22 23:18   ` Simon Horman
  -- strict thread matches above, loose matches on Subject: below --
2008-10-22  3:38 [PATCH] Support for relocatable kdump kernel Michael Ellerman
     [not found] <18684.5062.154465.668614@drongo.ozlabs.ibm.com>
2008-10-20  6:43 ` Michael Ellerman
2008-10-20  9:34   ` Mohan Kumar M
2008-10-21  6:03     ` Michael Ellerman
2008-10-21 18:21       ` Mohan Kumar M
2008-10-12 23:34 Mohan Kumar M
2008-10-13  1:30 ` Paul Mackerras
2008-10-16 10:33   ` Mohan Kumar M
2008-10-01 18:26 Mohan Kumar M
2008-10-09  5:27 ` Paul Mackerras
2008-10-09 16:35   ` Mohan Kumar M

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).