[PATCH 2/2] P2V: Thumb2 support

public inbox for linux-arm-kernel@lists.infradead.org
 help / color / mirror / Atom feed

* [PATCH 2/2] P2V: Thumb2 support
@ 2011-02-12 18:33 Nicolas Pitre
  2011-02-13 11:42 ` Russell King - ARM Linux
  2011-02-14 12:51 ` Dave Martin
  0 siblings, 2 replies; 7+ messages in thread
From: Nicolas Pitre @ 2011-02-12 18:33 UTC (permalink / raw)
  To: linux-arm-kernel

From: Nicolas Pitre <nicolas.pitre@linaro.org>

Adding Thumb2 support to the runtime patching of the virt_to_phys and
phys_to_virt opcodes.

Tested both the 8-bit and the 16-bit fixups, using different placements
in memory to exercize all code paths.

Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/Kconfig       |    4 ++--
 arch/arm/kernel/head.S |   48 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d5eb308..8d5643d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -194,13 +194,13 @@ config VECTORS_BASE
 config ARM_PATCH_PHYS_VIRT
 	bool "Patch physical to virtual translations at runtime (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
-	depends on !XIP_KERNEL && !THUMB2_KERNEL && MMU
+	depends on !XIP_KERNEL && MMU
 	depends on !ARCH_REALVIEW || !SPARSEMEM
 	help
 	  Patch phys-to-virt translation functions at runtime according to
 	  the position of the kernel in system memory.
 
-	  This can only be used with non-XIP, non-Thumb2, MMU kernels where
+	  This can only be used with non-XIP with MMU kernels where
 	  the base of physical memory is at a 16MB boundary.
 
 config ARM_PATCH_PHYS_VIRT_16BIT
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8f96ca0..575bb3e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -475,7 +475,8 @@ __fixup_pv_table:
 	sub	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
 	add	r4, r4, r3	@ adjust table start address
 	add	r5, r5, r3	@ adjust table end address
-	str	r8, [r7, r3]!	@ save computed PHYS_OFFSET to __pv_phys_offset
+	add	r7, r7, r3	@ adjust __pv_phys_offset address
+	str	r8, [r7]	@ save computed PHYS_OFFSET to __pv_phys_offset
 #ifndef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions
 	teq	r3, r6, lsl #24 @ must be 16MiB aligned
@@ -483,6 +484,7 @@ __fixup_pv_table:
 	mov	r6, r3, lsr #16	@ constant for add/sub instructions
 	teq	r3, r6, lsl #16	@ must be 64kiB aligned
 #endif
+THUMB(	it	ne		@ cross section branch )
 	bne	__error
 	str	r6, [r7, #4]	@ save to __pv_offset
 	b	__fixup_a_pv_table
@@ -496,6 +498,49 @@ ENDPROC(__fixup_pv_table)
 
 	.text
 __fixup_a_pv_table:
+#ifdef CONFIG_THUMB2_KERNEL
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
+	lsls	r0, r6, #24
+	lsr	r6, #8
+	beq	1f
+	clz	r7, r0
+	lsrs	r0, #24
+	lsls	r0, r7
+	bic	r0, 0x0080
+	lsrs	r7, #1
+	orrcs   r0, #0x0080
+	orr	r0, r0, r7, lsl #12
+#endif
+1:	lsls	r6, #24
+	beq	4f
+	clz	r7, r6
+	lsrs	r6, #24
+	lsls	r6, r7
+	bic	r6, #0x0080
+	lsrs	r7, #1
+	orrcs	r6, #0x0080
+	orr	r6, r6, r7, lsl #12
+	orr	r6, #0x4000
+	b	4f
+2:	add     r7, r3
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
+	ldrh	ip, [r7]
+	tst	ip, 0x0400	@ the i bit tells us LS or MS byte
+	beq	3f
+	cmp	r0, #0		@ set C flag, and ...
+	biceq	ip, 0x0400	@ immediate zero value has a special encoding
+	streqh	ip, [r7]	@ that requires the i bit cleared
+#endif
+3:	ldrh	ip, [r7, #2]
+	and	ip, 0x8f00
+	orrcc	ip, r6	@ mask in offset bits 31-24
+	orrcs	ip, r0	@ mask in offset bits 23-16
+	strh	ip, [r7, #2]
+4:	cmp	r4, r5
+        ldrcc   r7, [r4], #4	@ use branch for delay slot
+        bcc	2b
+        bx	lr
+#else
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
 	and	r0, r6, #255	@ offset bits 23-16
 	mov	r6, r6, lsr #8	@ offset bits 31-24
@@ -513,6 +558,7 @@ __fixup_a_pv_table:
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
 	bcc	2b
 	mov	pc, lr
+#endif
 ENDPROC(__fixup_a_pv_table)
 
 ENTRY(fixup_pv_table)
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] P2V: Thumb2 support
  2011-02-12 18:33 [PATCH 2/2] P2V: Thumb2 support Nicolas Pitre
@ 2011-02-13 11:42 ` Russell King - ARM Linux
  2011-02-13 13:56   ` Nicolas Pitre
  2011-02-14 12:51 ` Dave Martin
  1 sibling, 1 reply; 7+ messages in thread
From: Russell King - ARM Linux @ 2011-02-13 11:42 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Feb 12, 2011 at 01:33:42PM -0500, Nicolas Pitre wrote:
>  __fixup_a_pv_table:
> +#ifdef CONFIG_THUMB2_KERNEL
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> +	lsls	r0, r6, #24
> +	lsr	r6, #8
> +	beq	1f
> +	clz	r7, r0
> +	lsrs	r0, #24
> +	lsls	r0, r7

Why do these instructions need to update the PSR?

> +	bic	r0, 0x0080
> +	lsrs	r7, #1
> +	orrcs   r0, #0x0080
> +	orr	r0, r0, r7, lsl #12
> +#endif
> +1:	lsls	r6, #24
> +	beq	4f
> +	clz	r7, r6
> +	lsrs	r6, #24
> +	lsls	r6, r7
> +	bic	r6, #0x0080
> +	lsrs	r7, #1
> +	orrcs	r6, #0x0080
> +	orr	r6, r6, r7, lsl #12
> +	orr	r6, #0x4000
> +	b	4f
> +2:	add     r7, r3
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> +	ldrh	ip, [r7]
> +	tst	ip, 0x0400	@ the i bit tells us LS or MS byte
> +	beq	3f
> +	cmp	r0, #0		@ set C flag, and ...
> +	biceq	ip, 0x0400	@ immediate zero value has a special encoding
> +	streqh	ip, [r7]	@ that requires the i bit cleared
> +#endif
> +3:	ldrh	ip, [r7, #2]
> +	and	ip, 0x8f00
> +	orrcc	ip, r6	@ mask in offset bits 31-24
> +	orrcs	ip, r0	@ mask in offset bits 23-16
> +	strh	ip, [r7, #2]
> +4:	cmp	r4, r5
> +        ldrcc   r7, [r4], #4	@ use branch for delay slot
> +        bcc	2b
> +        bx	lr

Something's wrong with the indentation here.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] P2V: Thumb2 support
  2011-02-13 11:42 ` Russell King - ARM Linux
@ 2011-02-13 13:56   ` Nicolas Pitre
  2011-02-14 12:51     ` Dave Martin
  0 siblings, 1 reply; 7+ messages in thread
From: Nicolas Pitre @ 2011-02-13 13:56 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, 13 Feb 2011, Russell King - ARM Linux wrote:

> On Sat, Feb 12, 2011 at 01:33:42PM -0500, Nicolas Pitre wrote:
> >  __fixup_a_pv_table:
> > +#ifdef CONFIG_THUMB2_KERNEL
> > +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> > +	lsls	r0, r6, #24
> > +	lsr	r6, #8
> > +	beq	1f
> > +	clz	r7, r0
> > +	lsrs	r0, #24
> > +	lsls	r0, r7
> 
> Why do these instructions need to update the PSR?

Strictly speaking they don't.  It's just a size optimization.  There is 
a 16-bit encoding for them while their non-PSR-updating counterparts 
have only a 32-bit encoding.

> > +	strh	ip, [r7, #2]
> > +4:	cmp	r4, r5
> > +        ldrcc   r7, [r4], #4	@ use branch for delay slot
> > +        bcc	2b
> > +        bx	lr
> 
> Something's wrong with the indentation here.

Right, spaces instead of tabs, fixed.


Nicolas

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] P2V: Thumb2 support
  2011-02-13 13:56   ` Nicolas Pitre
@ 2011-02-14 12:51     ` Dave Martin
  0 siblings, 0 replies; 7+ messages in thread
From: Dave Martin @ 2011-02-14 12:51 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, Feb 13, 2011 at 1:56 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> On Sun, 13 Feb 2011, Russell King - ARM Linux wrote:
>
>> On Sat, Feb 12, 2011 at 01:33:42PM -0500, Nicolas Pitre wrote:
>> > ?__fixup_a_pv_table:
>> > +#ifdef CONFIG_THUMB2_KERNEL
>> > +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
>> > + ? lsls ? ?r0, r6, #24
>> > + ? lsr ? ? r6, #8
>> > + ? beq ? ? 1f
>> > + ? clz ? ? r7, r0
>> > + ? lsrs ? ?r0, #24
>> > + ? lsls ? ?r0, r7
>>
>> Why do these instructions need to update the PSR?
>
> Strictly speaking they don't. ?It's just a size optimization. ?There is
> a 16-bit encoding for them while their non-PSR-updating counterparts
> have only a 32-bit encoding.

More optimisations are available, but in general I'd advise against
this kind of optimisation for hand-written assembler -- it's unlikely
to help performance and impacts readability.

Using a low register in place of ip would shrink most of the loads and
stores to 16 bits for free though, if a suitable register is available
(r0?).

Cheers
---Dave

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] P2V: Thumb2 support
  2011-02-12 18:33 [PATCH 2/2] P2V: Thumb2 support Nicolas Pitre
  2011-02-13 11:42 ` Russell King - ARM Linux
@ 2011-02-14 12:51 ` Dave Martin
  2011-02-14 14:48   ` Nicolas Pitre
  1 sibling, 1 reply; 7+ messages in thread
From: Dave Martin @ 2011-02-14 12:51 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Feb 12, 2011 at 6:33 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> From: Nicolas Pitre <nicolas.pitre@linaro.org>
>
> Adding Thumb2 support to the runtime patching of the virt_to_phys and
> phys_to_virt opcodes.
>
> Tested both the 8-bit and the 16-bit fixups, using different placements
> in memory to exercize all code paths.
>
> Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
> ---
> ?arch/arm/Kconfig ? ? ? | ? ?4 ++--
> ?arch/arm/kernel/head.S | ? 48 +++++++++++++++++++++++++++++++++++++++++++++++-
> ?2 files changed, 49 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index d5eb308..8d5643d 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -194,13 +194,13 @@ config VECTORS_BASE
> ?config ARM_PATCH_PHYS_VIRT
> ? ? ? ?bool "Patch physical to virtual translations at runtime (EXPERIMENTAL)"
> ? ? ? ?depends on EXPERIMENTAL
> - ? ? ? depends on !XIP_KERNEL && !THUMB2_KERNEL && MMU
> + ? ? ? depends on !XIP_KERNEL && MMU
> ? ? ? ?depends on !ARCH_REALVIEW || !SPARSEMEM
> ? ? ? ?help
> ? ? ? ? ?Patch phys-to-virt translation functions at runtime according to
> ? ? ? ? ?the position of the kernel in system memory.
>
> - ? ? ? ? This can only be used with non-XIP, non-Thumb2, MMU kernels where
> + ? ? ? ? This can only be used with non-XIP with MMU kernels where
> ? ? ? ? ?the base of physical memory is at a 16MB boundary.
>
> ?config ARM_PATCH_PHYS_VIRT_16BIT
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 8f96ca0..575bb3e 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -475,7 +475,8 @@ __fixup_pv_table:
> ? ? ? ?sub ? ? r3, r0, r3 ? ? ?@ PHYS_OFFSET - PAGE_OFFSET
> ? ? ? ?add ? ? r4, r4, r3 ? ? ?@ adjust table start address
> ? ? ? ?add ? ? r5, r5, r3 ? ? ?@ adjust table end address
> - ? ? ? str ? ? r8, [r7, r3]! ? @ save computed PHYS_OFFSET to __pv_phys_offset
> + ? ? ? add ? ? r7, r7, r3 ? ? ?@ adjust __pv_phys_offset address
> + ? ? ? str ? ? r8, [r7] ? ? ? ?@ save computed PHYS_OFFSET to __pv_phys_offset
> ?#ifndef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> ? ? ? ?mov ? ? r6, r3, lsr #24 @ constant for add/sub instructions
> ? ? ? ?teq ? ? r3, r6, lsl #24 @ must be 16MiB aligned
> @@ -483,6 +484,7 @@ __fixup_pv_table:
> ? ? ? ?mov ? ? r6, r3, lsr #16 @ constant for add/sub instructions
> ? ? ? ?teq ? ? r3, r6, lsl #16 @ must be 64kiB aligned
> ?#endif
> +THUMB( it ? ? ?ne ? ? ? ? ? ? ?@ cross section branch )
> ? ? ? ?bne ? ? __error
> ? ? ? ?str ? ? r6, [r7, #4] ? ?@ save to __pv_offset
> ? ? ? ?b ? ? ? __fixup_a_pv_table
> @@ -496,6 +498,49 @@ ENDPROC(__fixup_pv_table)
>
> ? ? ? ?.text
> ?__fixup_a_pv_table:
> +#ifdef CONFIG_THUMB2_KERNEL
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> + ? ? ? lsls ? ?r0, r6, #24
> + ? ? ? lsr ? ? r6, #8
> + ? ? ? beq ? ? 1f
> + ? ? ? clz ? ? r7, r0
> + ? ? ? lsrs ? ?r0, #24
> + ? ? ? lsls ? ?r0, r7
> + ? ? ? bic ? ? r0, 0x0080
> + ? ? ? lsrs ? ?r7, #1
> + ? ? ? orrcs ? r0, #0x0080
> + ? ? ? orr ? ? r0, r0, r7, lsl #12
> +#endif
> +1: ? ? lsls ? ?r6, #24
> + ? ? ? beq ? ? 4f
> + ? ? ? clz ? ? r7, r6
> + ? ? ? lsrs ? ?r6, #24
> + ? ? ? lsls ? ?r6, r7
> + ? ? ? bic ? ? r6, #0x0080
> + ? ? ? lsrs ? ?r7, #1
> + ? ? ? orrcs ? r6, #0x0080
> + ? ? ? orr ? ? r6, r6, r7, lsl #12
> + ? ? ? orr ? ? r6, #0x4000
> + ? ? ? b ? ? ? 4f

We do almost the same, complex, operation twice here ... can it be
factorised with a macro or something?  This may also help readability.
 Not essential though.

> +2: ? ? add ? ? r7, r3
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> + ? ? ? ldrh ? ?ip, [r7]
> + ? ? ? tst ? ? ip, 0x0400 ? ? ?@ the i bit tells us LS or MS byte

Might be helpful to comment that we rely on TST resetting the C flag?

> + ? ? ? beq ? ? 3f
> + ? ? ? cmp ? ? r0, #0 ? ? ? ? ?@ set C flag, and ...
> + ? ? ? biceq ? ip, 0x0400 ? ? ?@ immediate zero value has a special encoding
> + ? ? ? streqh ?ip, [r7] ? ? ? ?@ that requires the i bit cleared
> +#endif
> +3: ? ? ldrh ? ?ip, [r7, #2]
> + ? ? ? and ? ? ip, 0x8f00
> + ? ? ? orrcc ? ip, r6 ?@ mask in offset bits 31-24
> + ? ? ? orrcs ? ip, r0 ?@ mask in offset bits 23-16
> + ? ? ? strh ? ?ip, [r7, #2]
> +4: ? ? cmp ? ? r4, r5
> + ? ? ? ?ldrcc ? r7, [r4], #4 ? @ use branch for delay slot
> + ? ? ? ?bcc ? ?2b
> + ? ? ? ?bx ? ? lr
> +#else
> ?#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> ? ? ? ?and ? ? r0, r6, #255 ? ?@ offset bits 23-16
> ? ? ? ?mov ? ? r6, r6, lsr #8 ?@ offset bits 31-24
> @@ -513,6 +558,7 @@ __fixup_a_pv_table:
> ? ? ? ?ldrcc ? r7, [r4], #4 ? ?@ use branch for delay slot
> ? ? ? ?bcc ? ? 2b
> ? ? ? ?mov ? ? pc, lr
> +#endif
> ?ENDPROC(__fixup_a_pv_table)
>
> ?ENTRY(fixup_pv_table)
> --
> 1.7.4
>

Not sure I entirely all the implications of this code, and I haven't
tested it yet, but it looks sound if I've understood it correctly.

Reviewed-by: Dave Martin <dave.martin@linaro.org>

Cheers
---Dave

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] P2V: Thumb2 support
  2011-02-14 12:51 ` Dave Martin
@ 2011-02-14 14:48   ` Nicolas Pitre
  2011-02-14 14:59     ` Dave Martin
  0 siblings, 1 reply; 7+ messages in thread
From: Nicolas Pitre @ 2011-02-14 14:48 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 14 Feb 2011, Dave Martin wrote:

> On Sat, Feb 12, 2011 at 6:33 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
> > ?__fixup_a_pv_table:
> > +#ifdef CONFIG_THUMB2_KERNEL
> > +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> > + ? ? ? lsls ? ?r0, r6, #24
> > + ? ? ? lsr ? ? r6, #8
> > + ? ? ? beq ? ? 1f
> > + ? ? ? clz ? ? r7, r0
> > + ? ? ? lsrs ? ?r0, #24
> > + ? ? ? lsls ? ?r0, r7
> > + ? ? ? bic ? ? r0, 0x0080
> > + ? ? ? lsrs ? ?r7, #1
> > + ? ? ? orrcs ? r0, #0x0080
> > + ? ? ? orr ? ? r0, r0, r7, lsl #12
> > +#endif
> > +1: ? ? lsls ? ?r6, #24
> > + ? ? ? beq ? ? 4f
> > + ? ? ? clz ? ? r7, r6
> > + ? ? ? lsrs ? ?r6, #24
> > + ? ? ? lsls ? ?r6, r7
> > + ? ? ? bic ? ? r6, #0x0080
> > + ? ? ? lsrs ? ?r7, #1
> > + ? ? ? orrcs ? r6, #0x0080
> > + ? ? ? orr ? ? r6, r6, r7, lsl #12
> > + ? ? ? orr ? ? r6, #0x4000
> > + ? ? ? b ? ? ? 4f
> 
> We do almost the same, complex, operation twice here ... can it be
> factorised with a macro or something?  This may also help readability.
>  Not essential though.

I'm not sure.  There is potentially only 7 instructions which are common 
to both cases which would form a logical group.  Hiding that behind a 
macro doesn't save much here.

> > +2: ? ? add ? ? r7, r3
> > +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> > + ? ? ? ldrh ? ?ip, [r7]
> > + ? ? ? tst ? ? ip, 0x0400 ? ? ?@ the i bit tells us LS or MS byte
> 
> Might be helpful to comment that we rely on TST resetting the C flag?

The tst instruction doesn't touch the C flag.  It is already cleared by 
the loop condition.  But yes, a comment to that effect might be good.

> > + ? ? ? beq ? ? 3f
> > + ? ? ? cmp ? ? r0, #0 ? ? ? ? ?@ set C flag, and ...
> > + ? ? ? biceq ? ip, 0x0400 ? ? ?@ immediate zero value has a special encoding
> > + ? ? ? streqh ?ip, [r7] ? ? ? ?@ that requires the i bit cleared
> > +#endif
> > +3: ? ? ldrh ? ?ip, [r7, #2]
> > + ? ? ? and ? ? ip, 0x8f00
> > + ? ? ? orrcc ? ip, r6 ?@ mask in offset bits 31-24
> > + ? ? ? orrcs ? ip, r0 ?@ mask in offset bits 23-16
> > + ? ? ? strh ? ?ip, [r7, #2]
> > +4: ? ? cmp ? ? r4, r5
> > + ? ? ? ?ldrcc ? r7, [r4], #4 ? @ use branch for delay slot
> > + ? ? ? ?bcc ? ?2b
> > + ? ? ? ?bx ? ? lr
> > +#else
> > ?#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
> > ? ? ? ?and ? ? r0, r6, #255 ? ?@ offset bits 23-16
> > ? ? ? ?mov ? ? r6, r6, lsr #8 ?@ offset bits 31-24
> > @@ -513,6 +558,7 @@ __fixup_a_pv_table:
> > ? ? ? ?ldrcc ? r7, [r4], #4 ? ?@ use branch for delay slot
> > ? ? ? ?bcc ? ? 2b
> > ? ? ? ?mov ? ? pc, lr
> > +#endif
> > ?ENDPROC(__fixup_a_pv_table)
> >
> > ?ENTRY(fixup_pv_table)
> > --
> > 1.7.4
> >
> 
> Not sure I entirely all the implications of this code, and I haven't
> tested it yet, but it looks sound if I've understood it correctly.
> 
> Reviewed-by: Dave Martin <dave.martin@linaro.org>

Thanks.  I'll also remove the s flag to those insns that don't need it 
for clarity.


Nicolas

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/2] P2V: Thumb2 support
  2011-02-14 14:48   ` Nicolas Pitre
@ 2011-02-14 14:59     ` Dave Martin
  0 siblings, 0 replies; 7+ messages in thread
From: Dave Martin @ 2011-02-14 14:59 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

On Mon, Feb 14, 2011 at 2:48 PM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Mon, 14 Feb 2011, Dave Martin wrote:
>
>> On Sat, Feb 12, 2011 at 6:33 PM, Nicolas Pitre <nico@fluxnic.net> wrote:
>> > ?__fixup_a_pv_table:
>> > +#ifdef CONFIG_THUMB2_KERNEL
>> > +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
>> > + ? ? ? lsls ? ?r0, r6, #24
>> > + ? ? ? lsr ? ? r6, #8
>> > + ? ? ? beq ? ? 1f
>> > + ? ? ? clz ? ? r7, r0
>> > + ? ? ? lsrs ? ?r0, #24
>> > + ? ? ? lsls ? ?r0, r7
>> > + ? ? ? bic ? ? r0, 0x0080
>> > + ? ? ? lsrs ? ?r7, #1
>> > + ? ? ? orrcs ? r0, #0x0080
>> > + ? ? ? orr ? ? r0, r0, r7, lsl #12
>> > +#endif
>> > +1: ? ? lsls ? ?r6, #24
>> > + ? ? ? beq ? ? 4f
>> > + ? ? ? clz ? ? r7, r6
>> > + ? ? ? lsrs ? ?r6, #24
>> > + ? ? ? lsls ? ?r6, r7
>> > + ? ? ? bic ? ? r6, #0x0080
>> > + ? ? ? lsrs ? ?r7, #1
>> > + ? ? ? orrcs ? r6, #0x0080
>> > + ? ? ? orr ? ? r6, r6, r7, lsl #12
>> > + ? ? ? orr ? ? r6, #0x4000
>> > + ? ? ? b ? ? ? 4f
>>
>> We do almost the same, complex, operation twice here ... can it be
>> factorised with a macro or something? ?This may also help readability.
>> ?Not essential though.
>
> I'm not sure. ?There is potentially only 7 instructions which are common
> to both cases which would form a logical group. ?Hiding that behind a
> macro doesn't save much here.

I was thinking more of avoiding mis-maintanance by ensuring there's
only one copy of the code to edit.  It wouldn't be a saving as such.
Either way, I agree that it's not that important, since it's not a lot
of code.

>> > +2: ? ? add ? ? r7, r3
>> > +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
>> > + ? ? ? ldrh ? ?ip, [r7]
>> > + ? ? ? tst ? ? ip, 0x0400 ? ? ?@ the i bit tells us LS or MS byte
>>
>> Might be helpful to comment that we rely on TST resetting the C flag?
>
> The tst instruction doesn't touch the C flag. ?It is already cleared by
> the loop condition. ?But yes, a comment to that effect might be good.

You're right ... the comment would have helped me, then :)

>
>> > + ? ? ? beq ? ? 3f
>> > + ? ? ? cmp ? ? r0, #0 ? ? ? ? ?@ set C flag, and ...
>> > + ? ? ? biceq ? ip, 0x0400 ? ? ?@ immediate zero value has a special encoding
>> > + ? ? ? streqh ?ip, [r7] ? ? ? ?@ that requires the i bit cleared
>> > +#endif
>> > +3: ? ? ldrh ? ?ip, [r7, #2]
>> > + ? ? ? and ? ? ip, 0x8f00
>> > + ? ? ? orrcc ? ip, r6 ?@ mask in offset bits 31-24
>> > + ? ? ? orrcs ? ip, r0 ?@ mask in offset bits 23-16
>> > + ? ? ? strh ? ?ip, [r7, #2]
>> > +4: ? ? cmp ? ? r4, r5
>> > + ? ? ? ?ldrcc ? r7, [r4], #4 ? @ use branch for delay slot
>> > + ? ? ? ?bcc ? ?2b
>> > + ? ? ? ?bx ? ? lr
>> > +#else
>> > ?#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
>> > ? ? ? ?and ? ? r0, r6, #255 ? ?@ offset bits 23-16
>> > ? ? ? ?mov ? ? r6, r6, lsr #8 ?@ offset bits 31-24
>> > @@ -513,6 +558,7 @@ __fixup_a_pv_table:
>> > ? ? ? ?ldrcc ? r7, [r4], #4 ? ?@ use branch for delay slot
>> > ? ? ? ?bcc ? ? 2b
>> > ? ? ? ?mov ? ? pc, lr
>> > +#endif
>> > ?ENDPROC(__fixup_a_pv_table)
>> >
>> > ?ENTRY(fixup_pv_table)
>> > --
>> > 1.7.4
>> >
>>
>> Not sure I entirely all the implications of this code, and I haven't
>> tested it yet, but it looks sound if I've understood it correctly.
>>
>> Reviewed-by: Dave Martin <dave.martin@linaro.org>
>
> Thanks. ?I'll also remove the s flag to those insns that don't need it
> for clarity.

OK, sounds fine.
---Dave

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-02-14 14:59 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-12 18:33 [PATCH 2/2] P2V: Thumb2 support Nicolas Pitre
2011-02-13 11:42 ` Russell King - ARM Linux
2011-02-13 13:56   ` Nicolas Pitre
2011-02-14 12:51     ` Dave Martin
2011-02-14 12:51 ` Dave Martin
2011-02-14 14:48   ` Nicolas Pitre
2011-02-14 14:59     ` Dave Martin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox