* [PATCH 1/2] ARM: improvements to compressed/head.S
@ 2011-02-16 21:39 Nicolas Pitre
2011-02-16 21:39 ` [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel Nicolas Pitre
0 siblings, 1 reply; 14+ messages in thread
From: Nicolas Pitre @ 2011-02-16 21:39 UTC (permalink / raw)
To: linux-arm-kernel
In the case of a conflict between the memory used by the compressed
kernel with its decompressor code and the memory used for the
decompressed kernel, we currently store the later after the former and
relocate it afterwards.
This would be more efficient to do this the other way around i.e.
relocate the compressed data up front instead, resulting in a smaller
copy. That also has the advantage of making the code smaller and more
straight forward.
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
arch/arm/boot/compressed/head.S | 239 ++++++++++++++++++---------------------
1 files changed, 110 insertions(+), 129 deletions(-)
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 7193884..200625c 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -174,9 +174,7 @@ not_angel:
*/
.text
- adr r0, LC0
- ldmia r0, {r1, r2, r3, r5, r6, r11, ip}
- ldr sp, [r0, #28]
+
#ifdef CONFIG_AUTO_ZRELADDR
@ determine final kernel image address
mov r4, pc
@@ -185,35 +183,108 @@ not_angel:
#else
ldr r4, =zreladdr
#endif
- subs r0, r0, r1 @ calculate the delta offset
- @ if delta is zero, we are
- beq not_relocated @ running at the address we
- @ were linked at.
+ bl cache_on
+
+restart: adr r0, LC0
+ ldmia r0, {r1, r2, r3, r5, r6, r9, r11, r12}
+ ldr sp, [r0, #32]
+
+ /*
+ * We might be running at a different address. We need
+ * to fix up various pointers.
+ */
+ sub r0, r0, r1 @ calculate the delta offset
+ add r5, r5, r0 @ _start
+ add r6, r6, r0 @ _edata
+#ifndef CONFIG_ZBOOT_ROM
+ /* malloc space is above the relocated stack (64k max) */
+ add sp, sp, r0
+ add r10, sp, #0x10000
+#else
/*
- * We're running at a different address. We need to fix
- * up various pointers:
- * r5 - zImage base address (_start)
- * r6 - size of decompressed image
- * r11 - GOT start
- * ip - GOT end
+ * With ZBOOT_ROM the bss/stack is non relocatable,
+ * but someone could still run this code from RAM,
+ * in which case our reference is _edata.
*/
- add r5, r5, r0
+ mov r10, r6
+#endif
+
+/*
+ * Check to see if we will overwrite ourselves.
+ * r4 = final kernel address
+ * r5 = start of this image
+ * r9 = size of decompressed image
+ * r10 = end of this image, including bss/stack/malloc space if non XIP
+ * We basically want:
+ * r4 >= r10 -> OK
+ * r4 + image length <= r5 -> OK
+ */
+ cmp r4, r10
+ bhs wont_overwrite
+ add r10, r4, r9
+ cmp r10, r5
+ bls wont_overwrite
+
+/*
+ * Relocate ourselves past the end of the decompressed kernel.
+ * r5 = start of this image
+ * r6 = _edata
+ * r10 = end of the decompressed kernel
+ * Because we always copy ahead, we need to do it from the end and go
+ * backward in case the source and destination overlap.
+ */
+ /* Round up to next 256-byte boundary. */
+ add r10, r10, #256
+ bic r10, r10, #255
+
+ sub r9, r6, r5 @ size to copy
+ add r9, r9, #31 @ rounded up to a multiple
+ bic r9, r9, #31 @ ... of 32 bytes
+ add r6, r9, r5
+ add r9, r9, r10
+
+1: ldmdb r6!, {r0 - r3, r10 - r12, lr}
+ cmp r6, r5
+ stmdb r9!, {r0 - r3, r10 - r12, lr}
+ bhi 1b
+
+ /* Preserve offset to relocated code. */
+ sub r6, r9, r6
+
+ bl cache_clean_flush
+
+ adr r0, BSYM(restart)
+ add r0, r0, r6
+ mov pc, r0
+
+wont_overwrite:
+/*
+ * If delta is zero, we are running at the address we were linked at.
+ * r0 = delta
+ * r2 = BSS start
+ * r3 = BSS end
+ * r4 = kernel execution address
+ * r7 = architecture ID
+ * r8 = atags pointer
+ * r11 = GOT start
+ * r12 = GOT end
+ * sp = stack pointer
+ */
+ teq r0, #0
+ beq not_relocated
add r11, r11, r0
- add ip, ip, r0
+ add r12, r12, r0
#ifndef CONFIG_ZBOOT_ROM
/*
* If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
* we need to fix up pointers into the BSS region.
- * r2 - BSS start
- * r3 - BSS end
- * sp - stack pointer
+ * Note that the stack pointer has already been fixed up.
*/
add r2, r2, r0
add r3, r3, r0
- add sp, sp, r0
/*
* Relocate all entries in the GOT table.
@@ -221,7 +292,7 @@ not_angel:
1: ldr r1, [r11, #0] @ relocate entries in the GOT
add r1, r1, r0 @ table. This fixes up the
str r1, [r11], #4 @ C references.
- cmp r11, ip
+ cmp r11, r12
blo 1b
#else
@@ -234,7 +305,7 @@ not_angel:
cmphs r3, r1 @ _end < entry
addlo r1, r1, r0 @ table. This fixes up the
str r1, [r11], #4 @ C references.
- cmp r11, ip
+ cmp r11, r12
blo 1b
#endif
@@ -246,76 +317,24 @@ not_relocated: mov r0, #0
cmp r2, r3
blo 1b
- /*
- * The C runtime environment should now be setup
- * sufficiently. Turn the cache on, set up some
- * pointers, and start decompressing.
- */
- bl cache_on
-
- mov r1, sp @ malloc space above stack
- add r2, sp, #0x10000 @ 64k max
-
/*
- * Check to see if we will overwrite ourselves.
- * r4 = final kernel address
- * r5 = start of this image
- * r6 = size of decompressed image
- * r2 = end of malloc space (and therefore this image)
- * We basically want:
- * r4 >= r2 -> OK
- * r4 + image length <= r5 -> OK
+ * The C runtime environment should now be setup sufficiently.
+ * Set up some pointers, and start decompressing.
+ * r4 = kernel execution address
+ * r7 = architecture ID
+ * r8 = atags pointer
*/
- cmp r4, r2
- bhs wont_overwrite
- add r0, r4, r6
- cmp r0, r5
- bls wont_overwrite
-
- mov r5, r2 @ decompress after malloc space
- mov r0, r5
+ mov r0, r4
+ mov r1, sp @ malloc space above stack
+ add r2, sp, #0x10000 @ 64k max
mov r3, r7
bl decompress_kernel
-
- add r0, r0, #127 + 128 @ alignment + stack
- bic r0, r0, #127 @ align the kernel length
-/*
- * r0 = decompressed kernel length
- * r1-r3 = unused
- * r4 = kernel execution address
- * r5 = decompressed kernel start
- * r7 = architecture ID
- * r8 = atags pointer
- * r9-r12,r14 = corrupted
- */
- add r1, r5, r0 @ end of decompressed kernel
- adr r2, reloc_start
- ldr r3, LC1
- add r3, r2, r3
-1: ldmia r2!, {r9 - r12, r14} @ copy relocation code
- stmia r1!, {r9 - r12, r14}
- ldmia r2!, {r9 - r12, r14}
- stmia r1!, {r9 - r12, r14}
- cmp r2, r3
- blo 1b
- mov sp, r1
- add sp, sp, #128 @ relocate the stack
-
bl cache_clean_flush
- ARM( add pc, r5, r0 ) @ call relocation code
- THUMB( add r12, r5, r0 )
- THUMB( mov pc, r12 ) @ call relocation code
-
-/*
- * We're not in danger of overwriting ourselves. Do this the simple way.
- *
- * r4 = kernel execution address
- * r7 = architecture ID
- */
-wont_overwrite: mov r0, r4
- mov r3, r7
- bl decompress_kernel
- b call_kernel
+ bl cache_off
+ mov r0, #0 @ must be zero
+ mov r1, r7 @ restore architecture number
+ mov r2, r8 @ restore atags pointer
+ mov pc, r4 @ call kernel
.align 2
.type LC0, #object
@@ -323,11 +342,11 @@ LC0: .word LC0 @ r1
.word __bss_start @ r2
.word _end @ r3
.word _start @ r5
- .word _image_size @ r6
+ .word _edata @ r6
+ .word _image_size @ r9
.word _got_start @ r11
.word _got_end @ ip
.word user_stack_end @ sp
-LC1: .word reloc_end - reloc_start
.size LC0, . - LC0
#ifdef CONFIG_ARCH_RPC
@@ -353,7 +372,7 @@ params: ldr r0, =0x10000100 @ params_phys for RPC
* On exit,
* r0, r1, r2, r3, r9, r10, r12 corrupted
* This routine must preserve:
- * r4, r5, r6, r7, r8
+ * r4, r7, r8
*/
.align 5
cache_on: mov r3, #8 @ cache_on function
@@ -551,43 +570,6 @@ __common_mmu_cache_on:
#endif
/*
- * All code following this line is relocatable. It is relocated by
- * the above code to the end of the decompressed kernel image and
- * executed there. During this time, we have no stacks.
- *
- * r0 = decompressed kernel length
- * r1-r3 = unused
- * r4 = kernel execution address
- * r5 = decompressed kernel start
- * r7 = architecture ID
- * r8 = atags pointer
- * r9-r12,r14 = corrupted
- */
- .align 5
-reloc_start: add r9, r5, r0
- sub r9, r9, #128 @ do not copy the stack
- debug_reloc_start
- mov r1, r4
-1:
- .rept 4
- ldmia r5!, {r0, r2, r3, r10 - r12, r14} @ relocate kernel
- stmia r1!, {r0, r2, r3, r10 - r12, r14}
- .endr
-
- cmp r5, r9
- blo 1b
- mov sp, r1
- add sp, sp, #128 @ relocate the stack
- debug_reloc_end
-
-call_kernel: bl cache_clean_flush
- bl cache_off
- mov r0, #0 @ must be zero
- mov r1, r7 @ restore architecture number
- mov r2, r8 @ restore atags pointer
- mov pc, r4 @ call kernel
-
-/*
* Here follow the relocatable cache support functions for the
* various processors. This is a generic hook for locating an
* entry and jumping to an instruction at the specified offset
@@ -791,7 +773,7 @@ proc_types:
* On exit,
* r0, r1, r2, r3, r9, r12 corrupted
* This routine must preserve:
- * r4, r6, r7
+ * r4, r7, r8
*/
.align 5
cache_off: mov r3, #12 @ cache_off function
@@ -866,7 +848,7 @@ __armv3_mmu_cache_off:
* On exit,
* r1, r2, r3, r9, r10, r11, r12 corrupted
* This routine must preserve:
- * r0, r4, r5, r6, r7
+ * r4, r6, r7, r8
*/
.align 5
cache_clean_flush:
@@ -1088,7 +1070,6 @@ memdump: mov r12, r0
#endif
.ltorg
-reloc_end:
.align
.section ".stack", "aw", %nobits
--
1.7.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-16 21:39 [PATCH 1/2] ARM: improvements to compressed/head.S Nicolas Pitre
@ 2011-02-16 21:39 ` Nicolas Pitre
2011-02-16 21:55 ` Stephen Boyd
` (2 more replies)
0 siblings, 3 replies; 14+ messages in thread
From: Nicolas Pitre @ 2011-02-16 21:39 UTC (permalink / raw)
To: linux-arm-kernel
We currently presume a 4x expansion to guess the decompressed kernel size
in order to determine if the decompressed kernel is in conflict with
the location where zImage is loaded. This guess may cause many issues
by overestimating the final kernel image size:
- This may force a needless relocation if the location of zImage was
fine, wasting some precious microseconds of boot time.
- The relocation may be located way too far, possibly overwriting the
initrd image in RAM.
- If the kernel image includes a large already-compressed initramfs image
then the problem is even more exacerbated.
And if by some strange means the 4x guess is too low then we may overwrite
ourselves with the decompressed image.
So let's use the exact decompressed kernel image size instead. For that
we need to rely on the stat command, but this is hardly a new build
dependency as the kernel already depends on many commands provided by
the same coreutils package where stat is found to be built.
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
arch/arm/boot/compressed/Makefile | 4 +++-
arch/arm/boot/compressed/vmlinux.lds.in | 3 ---
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 0a8f748..9d328be 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -83,9 +83,11 @@ endif
EXTRA_CFLAGS := -fpic -fno-builtin
EXTRA_AFLAGS := -Wa,-march=all
+# Provide size of uncompressed kernel to the decompressor via a linker symbol.
+LDFLAGS_vmlinux := --defsym _image_size=$(shell stat -c "%s" $(obj)/../Image)
# Supply ZRELADDR to the decompressor via a linker symbol.
ifneq ($(CONFIG_AUTO_ZRELADDR),y)
-LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR)
+LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
endif
ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
LDFLAGS_vmlinux += --be8
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 366a924..5309909 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -43,9 +43,6 @@ SECTIONS
_etext = .;
- /* Assume size of decompressed image is 4x the compressed image */
- _image_size = (_etext - _text) * 4;
-
_got_start = .;
.got : { *(.got) }
_got_end = .;
--
1.7.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-16 21:39 ` [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel Nicolas Pitre
@ 2011-02-16 21:55 ` Stephen Boyd
2011-02-16 22:11 ` Nicolas Pitre
2011-02-17 0:09 ` Grant Likely
2011-03-08 6:37 ` Jean-Christophe PLAGNIOL-VILLARD
2 siblings, 1 reply; 14+ messages in thread
From: Stephen Boyd @ 2011-02-16 21:55 UTC (permalink / raw)
To: linux-arm-kernel
On 02/16/2011 01:39 PM, Nicolas Pitre wrote:
> -LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR)
> +LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
What is this for?
--
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-16 21:55 ` Stephen Boyd
@ 2011-02-16 22:11 ` Nicolas Pitre
2011-02-16 22:14 ` Stephen Boyd
0 siblings, 1 reply; 14+ messages in thread
From: Nicolas Pitre @ 2011-02-16 22:11 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 16 Feb 2011, Stephen Boyd wrote:
> On 02/16/2011 01:39 PM, Nicolas Pitre wrote:
> > -LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR)
> > +LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
>
> What is this for?
It replaces the assignment operator with an addition operator ?
Nicolas
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-16 22:11 ` Nicolas Pitre
@ 2011-02-16 22:14 ` Stephen Boyd
0 siblings, 0 replies; 14+ messages in thread
From: Stephen Boyd @ 2011-02-16 22:14 UTC (permalink / raw)
To: linux-arm-kernel
On 2/16/2011 2:11 PM, Nicolas Pitre wrote:
> On Wed, 16 Feb 2011, Stephen Boyd wrote:
>
>> On 02/16/2011 01:39 PM, Nicolas Pitre wrote:
>>> -LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR)
>>> +LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
>> What is this for?
> It replaces the assignment operator with an addition operator ?
Ah sorry, ignore me.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-16 21:39 ` [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel Nicolas Pitre
2011-02-16 21:55 ` Stephen Boyd
@ 2011-02-17 0:09 ` Grant Likely
2011-02-17 1:48 ` Nicolas Pitre
2011-02-17 9:26 ` Russell King - ARM Linux
2011-03-08 6:37 ` Jean-Christophe PLAGNIOL-VILLARD
2 siblings, 2 replies; 14+ messages in thread
From: Grant Likely @ 2011-02-17 0:09 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Feb 16, 2011 at 04:39:03PM -0500, Nicolas Pitre wrote:
> We currently presume a 4x expansion to guess the decompressed kernel size
> in order to determine if the decompressed kernel is in conflict with
> the location where zImage is loaded. This guess may cause many issues
> by overestimating the final kernel image size:
>
> - This may force a needless relocation if the location of zImage was
> fine, wasting some precious microseconds of boot time.
>
> - The relocation may be located way too far, possibly overwriting the
> initrd image in RAM.
>
> - If the kernel image includes a large already-compressed initramfs image
> then the problem is even more exacerbated.
>
> And if by some strange means the 4x guess is too low then we may overwrite
> ourselves with the decompressed image.
>
> So let's use the exact decompressed kernel image size instead. For that
> we need to rely on the stat command, but this is hardly a new build
> dependency as the kernel already depends on many commands provided by
> the same coreutils package where stat is found to be built.
>
> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
> ---
> arch/arm/boot/compressed/Makefile | 4 +++-
> arch/arm/boot/compressed/vmlinux.lds.in | 3 ---
> 2 files changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
> index 0a8f748..9d328be 100644
> --- a/arch/arm/boot/compressed/Makefile
> +++ b/arch/arm/boot/compressed/Makefile
> @@ -83,9 +83,11 @@ endif
> EXTRA_CFLAGS := -fpic -fno-builtin
> EXTRA_AFLAGS := -Wa,-march=all
>
> +# Provide size of uncompressed kernel to the decompressor via a linker symbol.
> +LDFLAGS_vmlinux := --defsym _image_size=$(shell stat -c "%s" $(obj)/../Image)
Patch looks good to me, but on a related note, I'm looking for a way
to also find the end of .bss.
When John looks at adding support for appending an initrd and/or dtb
to the zimage, it will be important to make sure the start of the
initrd/dtb does not overlap the ram the kernel is expecting to be
empty and available. I was thinking about simply grepping System.map
for __end and doing some sed magic to extract the offset from the
beginning of the kernel.
Do you think that is sufficient to protect appended data images?
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-17 0:09 ` Grant Likely
@ 2011-02-17 1:48 ` Nicolas Pitre
2011-02-17 4:33 ` Grant Likely
2011-02-17 9:26 ` Russell King - ARM Linux
1 sibling, 1 reply; 14+ messages in thread
From: Nicolas Pitre @ 2011-02-17 1:48 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 16 Feb 2011, Grant Likely wrote:
> Patch looks good to me, but on a related note, I'm looking for a way
> to also find the end of .bss.
>
> When John looks at adding support for appending an initrd and/or dtb
> to the zimage, it will be important to make sure the start of the
> initrd/dtb does not overlap the ram the kernel is expecting to be
> empty and available. I was thinking about simply grepping System.map
> for __end and doing some sed magic to extract the offset from the
> beginning of the kernel.
I'd use the 'size' command instead:
$ arm-linux-size vmlinux
text data bss dec hex filename
3707759 135316 137808 3980883 3cbe53 vmlinux
> Do you think that is sufficient to protect appended data images?
Starting with my compressed/head.S rework, you'd have to:
1) Detect if there is some valuable data past _edata, and if so then
increase the cached _edata value in r6 accordingly. This way, if
zImage needs to relocate itself then that will include that data.
2) Modify the conditions and parameters for a zImage relocation to
ensure the end of the kernel .bss is below the start of your data.
3) Modify the relocation of .bss entries in the GOT table so .bss is
effectively moved after that appended data.
4) Tell the kernel about the data via extra ATAG/DTB entries.
Except for (4), this can be done with only a few assembly instructions.
And even in the ATAG case there is a good example in
arch/arm/boot/bootp/init.S for (4), but that could as well be done in C
instead, like the initial patch that John Bonesio did.
Nicolas
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-17 1:48 ` Nicolas Pitre
@ 2011-02-17 4:33 ` Grant Likely
0 siblings, 0 replies; 14+ messages in thread
From: Grant Likely @ 2011-02-17 4:33 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Feb 16, 2011 at 6:48 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> On Wed, 16 Feb 2011, Grant Likely wrote:
>
>> Patch looks good to me, but on a related note, I'm looking for a way
>> to also find the end of .bss.
>>
>> When John looks at adding support for appending an initrd and/or dtb
>> to the zimage, it will be important to make sure the start of the
>> initrd/dtb does not overlap the ram the kernel is expecting to be
>> empty and available. ?I was thinking about simply grepping System.map
>> for __end and doing some sed magic to extract the offset from the
>> beginning of the kernel.
>
> I'd use the 'size' command instead:
>
> $ arm-linux-size vmlinux
> ? text ? ?data ? ? bss ? ? dec ? ? hex filename
> 3707759 ?135316 ?137808 3980883 ?3cbe53 vmlinux
>
>> Do you think that is sufficient to protect appended data images?
>
> Starting with my compressed/head.S rework, you'd have to:
>
> 1) Detect if there is some valuable data past _edata, and if so then
> ? increase the cached _edata value in r6 accordingly. ?This way, if
> ? zImage needs to relocate itself then that will include that data.
>
> 2) Modify the conditions and parameters for a zImage relocation to
> ? ensure the end of the kernel .bss is below the start of your data.
>
> 3) Modify the relocation of .bss entries in the GOT table so .bss is
> ? effectively moved after that appended data.
>
> 4) Tell the kernel about the data via extra ATAG/DTB entries.
>
> Except for (4), this can be done with only a few assembly instructions.
> And even in the ATAG case there is a good example in
> arch/arm/boot/bootp/init.S for (4), but that could as well be done in C
> instead, like the initial patch that John Bonesio did.
Cool, that matches my thinking. We'll start working on this.
g.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-17 0:09 ` Grant Likely
2011-02-17 1:48 ` Nicolas Pitre
@ 2011-02-17 9:26 ` Russell King - ARM Linux
2011-02-17 17:51 ` Grant Likely
1 sibling, 1 reply; 14+ messages in thread
From: Russell King - ARM Linux @ 2011-02-17 9:26 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Feb 16, 2011 at 05:09:23PM -0700, Grant Likely wrote:
> Patch looks good to me, but on a related note, I'm looking for a way
> to also find the end of .bss.
>
> When John looks at adding support for appending an initrd and/or dtb
> to the zimage, it will be important to make sure the start of the
> initrd/dtb does not overlap the ram the kernel is expecting to be
> empty and available.
Appending data to the zImage has already been solved with the 'bootp'
stuff - and I'm afraid to say that merely catting a zImage with something
else just isn't going to work. There's no way for the zImage decompressor
to know that there's anything on the end of the image, and there's no way
for it to know where it can relocate itself to or where in memory it can
safely decompress to in such scenarios.
While such stuff may be possible with architectures where you know memory
always starts at zero and extends contiguously for N MB where N is some
number larger than 16, we don't have that luxury on ARM. Sometimes it
might be in 4MB chunks scattered throughout the physical address space
at maybe 256MB offset from zero. Or maybe 32MB chunks starting at 3GB.
etc.
And that's why the 'bootp' stuff takes a fixed address for where to
locate the initrd - the only person who knows where its safe to do so is
the person building the image for the platform.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-17 9:26 ` Russell King - ARM Linux
@ 2011-02-17 17:51 ` Grant Likely
2011-02-17 18:52 ` Russell King - ARM Linux
0 siblings, 1 reply; 14+ messages in thread
From: Grant Likely @ 2011-02-17 17:51 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Feb 17, 2011 at 09:26:17AM +0000, Russell King - ARM Linux wrote:
> On Wed, Feb 16, 2011 at 05:09:23PM -0700, Grant Likely wrote:
> > Patch looks good to me, but on a related note, I'm looking for a way
> > to also find the end of .bss.
> >
> > When John looks at adding support for appending an initrd and/or dtb
> > to the zimage, it will be important to make sure the start of the
> > initrd/dtb does not overlap the ram the kernel is expecting to be
> > empty and available.
>
> Appending data to the zImage has already been solved with the 'bootp'
> stuff - and I'm afraid to say that merely catting a zImage with something
> else just isn't going to work. There's no way for the zImage decompressor
> to know that there's anything on the end of the image, and there's no way
> for it to know where it can relocate itself to or where in memory it can
> safely decompress to in such scenarios.
Even with weird memory, looking for an image header immediately after
the end of the image is a pretty safe thing to do. As long as the
type and size of the appended data can be determined from the data
header it should work just fine. John is investigating this now for
compressed ramdisks.
As for knowing where it can relocate to, we already know (actually
current code just makes an assumption) about how long the kernel
proper image is and where it wants to live in the zImage's head.S.
Assuming enough ram, if r6 reflects the size of the zImage + the size
of the appended data then it should be good.
>
> While such stuff may be possible with architectures where you know memory
> always starts at zero and extends contiguously for N MB where N is some
> number larger than 16, we don't have that luxury on ARM. Sometimes it
> might be in 4MB chunks scattered throughout the physical address space
> at maybe 256MB offset from zero. Or maybe 32MB chunks starting at 3GB.
> etc.
Of course this makes assumptions about the size of memory available
to the image (which the current zImage also does), and that it is safe
to use a large hunk of ram immediately after the zImage. If your
system has weird memory, then you *have* to know the constraints of
your system and make sure the zImage (+ appended data) is:
1) in a region large enough to hold it, and
2a) doesn't overlap what the kernel proper needs, or
2b) has enough space behind it that it can be relocated safely
before uncompressing the kernel
This is not really any different from today, the zImage still must
make assumptions about where it can do a relocation to or from (either
by relocating vmlinux after decompressing as in current code, or by
relocating the wrapped image before decompressing as in Nicolas'
patch). The data size can be up being a lot larger though.
So, I agree, when RAM is weird this may not be feasible, but there are
also a large number of systems where ram is *not* weird and this is a
useful feature...
Actually concerning this; the initial work that John completed was to
meet the requirements of a client. I posted it for interest sake, but
I wasn't planning to try and get it merged. I didn't think it would
be of much use outside of my client's use case, but after talking with
Nicolas and others we decided it could be useful and we're going to
spend the time to see if we can make it a general solution. If it
doesn't work, then all we've lost is a bit of time, but I do think it
is a viable feature.
g.
>
> And that's why the 'bootp' stuff takes a fixed address for where to
> locate the initrd - the only person who knows where its safe to do so is
> the person building the image for the platform.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-17 17:51 ` Grant Likely
@ 2011-02-17 18:52 ` Russell King - ARM Linux
2011-02-17 19:56 ` Rob Herring
2011-02-17 20:40 ` Nicolas Pitre
0 siblings, 2 replies; 14+ messages in thread
From: Russell King - ARM Linux @ 2011-02-17 18:52 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Feb 17, 2011 at 10:51:56AM -0700, Grant Likely wrote:
> As for knowing where it can relocate to, we already know (actually
> current code just makes an assumption) about how long the kernel
> proper image is and where it wants to live in the zImage's head.S.
> Assuming enough ram, if r6 reflects the size of the zImage + the size
> of the appended data then it should be good.
That isn't a valid argument. Just because "the decompressor already
does it" does not mean that it'll work with ramdisks. We've been
lucky so far that it has worked with the kernel - and that's exactly
what it is - luck.
It works for a 1-2MB kernel. A 16MB ramdisk is a completely different
kettle of bytes.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-17 18:52 ` Russell King - ARM Linux
@ 2011-02-17 19:56 ` Rob Herring
2011-02-17 20:40 ` Nicolas Pitre
1 sibling, 0 replies; 14+ messages in thread
From: Rob Herring @ 2011-02-17 19:56 UTC (permalink / raw)
To: linux-arm-kernel
On 02/17/2011 12:52 PM, Russell King - ARM Linux wrote:
> On Thu, Feb 17, 2011 at 10:51:56AM -0700, Grant Likely wrote:
>> As for knowing where it can relocate to, we already know (actually
>> current code just makes an assumption) about how long the kernel
>> proper image is and where it wants to live in the zImage's head.S.
>> Assuming enough ram, if r6 reflects the size of the zImage + the size
>> of the appended data then it should be good.
>
> That isn't a valid argument. Just because "the decompressor already
> does it" does not mean that it'll work with ramdisks. We've been
> lucky so far that it has worked with the kernel - and that's exactly
> what it is - luck.
>
> It works for a 1-2MB kernel. A 16MB ramdisk is a completely different
> kettle of bytes.
The capability to add an initramfs into the kernel binary already exists
within the kernel build system. The primary difference here is when the
initramfs is added. Doing that after the kernel build is the more sane
approach. For example, if you want to include kernel modules in your
initramfs and a single image, the current method is kind of broken. You
have to build the kernel once to build the modules, then build it again
to update the initramfs.
If the ramdisk size is the only issue, then you would be okay with
adding couple of KB of dtb image?
Rob
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-17 18:52 ` Russell King - ARM Linux
2011-02-17 19:56 ` Rob Herring
@ 2011-02-17 20:40 ` Nicolas Pitre
1 sibling, 0 replies; 14+ messages in thread
From: Nicolas Pitre @ 2011-02-17 20:40 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, 17 Feb 2011, Russell King - ARM Linux wrote:
> On Thu, Feb 17, 2011 at 10:51:56AM -0700, Grant Likely wrote:
> > As for knowing where it can relocate to, we already know (actually
> > current code just makes an assumption) about how long the kernel
> > proper image is and where it wants to live in the zImage's head.S.
> > Assuming enough ram, if r6 reflects the size of the zImage + the size
> > of the appended data then it should be good.
>
> That isn't a valid argument. Just because "the decompressor already
> does it" does not mean that it'll work with ramdisks. We've been
> lucky so far that it has worked with the kernel - and that's exactly
> what it is - luck.
Two things here:
1) It is possible to know the length of an ATAG or DT block. The size
of a ramdisk image is not known without external information.
2) If the size of a ramdisk was discoverable by the zImage code, and if
the ramdisk image was simply appended to zImage itself, then there
would be no guessing as to where to allocate and load things as
separate blobs.
Let's suppose zImage is 1MB in size, the appended ramdisk image is 16MB,
and the decompressed kernel is 3MB. And for simplicity let's ignore
TEXT_OFFSET for the moment.
If zImage is loaded at 3MB from start of physical memory (or above) then
nothing needs to be relocated. It will simply decompress the kernel
from 0MB up to 3MB from start of physical memory. If a ramdisk is
appended to zImage then that location can be passed straight to the
kernel. We only need to make sure the kernel .bss is not larger than
1MB in that case which should be the norm. No guessing, no reliance on
luck. The only limitation is that you need at least 20MB of RAM here,
and there is no way around it whatever the booting method. (Actually you
could save 1MB by not using zImage but a straight Image, but if you're
so tight in RAM you shouldn't use a ramdisk to start with.)
If zImage is loaded at 0MB (typical) then it has to move things around.
Currently the code decompress the kernel and store it from the 1MB
address up to the 4MB address, and then move the kernel back down to 0MB
(a 3MB copy). What my patch does is to have zImage copy itself (1MB
large) to the 3MB address and then decompress the kernel from 0MB to
3MB.
Now if there is a ramdisk image appended to zImage, the zImage
relocation becomes a 17MB copy. And to be 100% safe, we might consider
the end of the kernel .bss to determine the relocation address for
zImage. But still, the final situation is the same as the case where no
relocation is needed. The ramdisk image is relocated along with zImage
to the optimal location i.e. from 4MB upwards. Again, no guessing, no
reliance on luck, no risk of overwriting a separately loaded ramdisk.
Of course this doesn't work on targets with segmented memory such as on
SA1100. For those the conventional method of loading the kernel in one
segment and the ramdisk in another must be preserved.
But with modern ARM machines, it would be far simpler to just
cat zImage ramdisk.gz > boot_image
and load that boot_image anywhere in memory and no bother with guesses
as to where to separately load the ramdisk and hope it won't get
overwritten at some point by the kernel decompressor.
But we're not there with initrd yet. I believe the rework of the
decompressor code has great value on its own. And then the
concatenation trick could be used for DTB which can be easily be self
sized. Doing that with a ramdisk image would require some information
header in between. But one step at a time.
Nicolas
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel
2011-02-16 21:39 ` [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel Nicolas Pitre
2011-02-16 21:55 ` Stephen Boyd
2011-02-17 0:09 ` Grant Likely
@ 2011-03-08 6:37 ` Jean-Christophe PLAGNIOL-VILLARD
2 siblings, 0 replies; 14+ messages in thread
From: Jean-Christophe PLAGNIOL-VILLARD @ 2011-03-08 6:37 UTC (permalink / raw)
To: linux-arm-kernel
On 16:39 Wed 16 Feb , Nicolas Pitre wrote:
> We currently presume a 4x expansion to guess the decompressed kernel size
> in order to determine if the decompressed kernel is in conflict with
> the location where zImage is loaded. This guess may cause many issues
> by overestimating the final kernel image size:
>
> - This may force a needless relocation if the location of zImage was
> fine, wasting some precious microseconds of boot time.
>
> - The relocation may be located way too far, possibly overwriting the
> initrd image in RAM.
>
> - If the kernel image includes a large already-compressed initramfs image
> then the problem is even more exacerbated.
>
> And if by some strange means the 4x guess is too low then we may overwrite
> ourselves with the decompressed image.
>
> So let's use the exact decompressed kernel image size instead. For that
> we need to rely on the stat command, but this is hardly a new build
> dependency as the kernel already depends on many commands provided by
> the same coreutils package where stat is found to be built.
>
> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
> ---
> arch/arm/boot/compressed/Makefile | 4 +++-
> arch/arm/boot/compressed/vmlinux.lds.in | 3 ---
> 2 files changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
> index 0a8f748..9d328be 100644
> --- a/arch/arm/boot/compressed/Makefile
> +++ b/arch/arm/boot/compressed/Makefile
> @@ -83,9 +83,11 @@ endif
> EXTRA_CFLAGS := -fpic -fno-builtin
> EXTRA_AFLAGS := -Wa,-march=all
>
> +# Provide size of uncompressed kernel to the decompressor via a linker symbol.
> +LDFLAGS_vmlinux := --defsym _image_size=$(shell stat -c "%s" $(obj)/../Image)
the build fail when compiling out of tree with -j9
$ make O=sx5 V=1 -j9
=>
scripts/mod/modpost -o /opt/work/linux-2.6/sx5/Module.symvers -S -c -s
make -f /opt/work/linux-2.6/scripts/Makefile.fwinst obj=firmware __fw_modbuild
arm-none-linux-gnueabi-gcc -Wp,-MD,arch/arm/boot/compressed/.piggy.gzip.o.d -nostdinc -isystem /opt/arm-2010/bin/../lib/gcc/arm-none-linux-gnueabi/4.5.1/include -I/opt/work/linux-2.6/arch/arm/include -Iinclude -I/opt/work/linux-2.6/include -include include/generated/autoconf.h -D__KERNEL__ -mlittle-endian -I/opt/work/linux-2.6/arch/arm/mach-at91/include -D__ASSEMBLY__ -mabi=aapcs-linux -mno-thumb-interwork -funwind-tables -D__LINUX_ARM_ARCH__=5 -march=armv5te -mtune=arm9tdmi -include asm/unified.h -msoft-float -Wa,-march=all -c -o arch/arm/boot/compressed/piggy.gzip.o /opt/work/linux-2.6/arch/arm/boot/compressed/piggy.gzip.S
arm-none-linux-gnueabi-ld -EL --defsym _image_size= --defsym zreladdr=0x20008000 -p --no-undefined -X -T arch/arm/boot/compressed/vmlinux.lds arch/arm/boot/compressed/head.o arch/arm/boot/compressed/piggy.gzip.o arch/arm/boot/compressed/misc.o arch/arm/boot/compressed/decompress.o arch/arm/boot/compressed/lib1funcs.o -o arch/arm/boot/compressed/vmlinux
arm-none-linux-gnueabi-ld:--defsym _image_size=: syntax error
make[3]: *** [arch/arm/boot/compressed/vmlinux] Error 1
make[2]: *** [arch/arm/boot/compressed/vmlinux] Error 2
make[1]: *** [zImage] Error 2
make: *** [sub-make] Error 2
Best Regards,
J.
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2011-03-08 6:37 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-16 21:39 [PATCH 1/2] ARM: improvements to compressed/head.S Nicolas Pitre
2011-02-16 21:39 ` [PATCH 2/2] ARM: remove the 4x expansion presumption while decompressing the kernel Nicolas Pitre
2011-02-16 21:55 ` Stephen Boyd
2011-02-16 22:11 ` Nicolas Pitre
2011-02-16 22:14 ` Stephen Boyd
2011-02-17 0:09 ` Grant Likely
2011-02-17 1:48 ` Nicolas Pitre
2011-02-17 4:33 ` Grant Likely
2011-02-17 9:26 ` Russell King - ARM Linux
2011-02-17 17:51 ` Grant Likely
2011-02-17 18:52 ` Russell King - ARM Linux
2011-02-17 19:56 ` Rob Herring
2011-02-17 20:40 ` Nicolas Pitre
2011-03-08 6:37 ` Jean-Christophe PLAGNIOL-VILLARD
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).