[RFC PATCH v3] ARM: Introduce patching of phys_to

linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
@ 2010-11-05 18:40 Eric Miao
  2010-11-07 16:48 ` Russell King - ARM Linux
  2010-11-08 11:49 ` Russell King - ARM Linux
  0 siblings, 2 replies; 9+ messages in thread
From: Eric Miao @ 2010-11-05 18:40 UTC (permalink / raw)
  To: linux-arm-kernel

Changes since last version, fixup of the patching stub instructions is now
performed in assembly code before MMU is on, that means no flush cache is
necessary.

Found myself clumsy in handling assembly. The load of PHYS_OFFSET needs
to be handled differently if that's going to be made into a variable
though. This
is not verified to work, and just for overview, I'll have a bit time
for that the next
week.


commit 89609f0d15a582d393576438038234898e49820c
Author: Eric Miao <eric.miao@canonical.com>
Date:   Thu Aug 5 17:23:36 2010 +0800

    ARM: Introduce patching of phys_to_virt and vice versa

    In most cases, the delta between PHYS_OFFSET and PAGE_OFFSET is normally
    16MiB aligned, which means the difference can be handled by a simple ADD
    or SUB instruction with an immediate shift operand in ARM.  This will be
    a bit more efficient and generic when PHYS_OFFSET goes run-time.

    This idea can be made generic to allow conversions more than phys_to_virt
    and virt_to_phys. A stub instruction is inserted where applicable, and it
    has a form of 'add rn, rd, #imm', where the lowest 8-bit of #imm is used
    to identify the type of patching.  Currently, only two types are defined,
    but could be expanded in my POV to definitions like __io(), __mem_pci()
    and so on. A __patch_table section is introduced to include the addresses
    of all these stub instructions.

    There are several places for improvement:

    1. constant parameters which can be optimized by the compiler now needs
       one additional instruction (although the optimization is neither
       possible when PHYS_OFFSET goes as a variable)

    2. thumb2 can be supported in a same way, but will leave that for future
       enhancement.

    The general idea comes from Nicolas Pitre, and is drafted at
        https://wiki.ubuntu.com/Specs/ARMSingleKernel

    Signed-off-by: Nicolas Pitre <nicolas.pitre@canonical.com>
    Signed-off-by: Eric Miao <eric.miao@canonical.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b527bf5..fc9b96e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -187,6 +187,16 @@ config VECTORS_BASE
 	help
 	  The base address of exception vectors.

+config ARM_PATCH_PHYS_VIRT
+	bool
+	help
+	  Note this is only for non-XIP and non-Thumb2 kernels. And there
+	  is CPU support which needs to read data in order to writeback
+	  dirty entries in the cache. (e.g. StrongARM, ebsa110, footbridge,
+	  rpc, sa1100, and shark). The mappings in the above cases do not
+	  exist before paging_init() has completed. Thus this option does
+	  not support these CPUs at this moment.
+
 source "init/Kconfig"

 source "kernel/Kconfig.freezer"
@@ -590,6 +600,7 @@ config ARCH_PXA
 	select TICK_ONESHOT
 	select PLAT_PXA
 	select SPARSE_IRQ
+	select ARM_PATCH_PHYS_VIRT
 	help
 	  Support for Intel/Marvell's PXA2xx/PXA3xx processor line.

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 23c2e8e..4b8b8da 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -154,6 +154,11 @@
 #define page_to_phys(page)	(__pfn_to_phys(page_to_pfn(page)))
 #define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))

+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+#define PATCH_TYPE_PHYS_TO_VIRT		(0)
+#define PATCH_TYPE_VIRT_TO_PHYS		(1)
+#endif
+
 #ifndef __ASSEMBLY__

 /*
@@ -182,6 +187,34 @@
  */
 #define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)

+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define __patch_stub(from,to,type)			\
+	__asm__(					\
+	"1:	add	%0, %1, %2\n"			\
+	"\n"						\
+	"	.pushsection __patch_table,\"a\"\n"	\
+	"	.long	1b\n"				\
+	"	.popsection\n"				\
+	: "=r" (to)					\
+	: "r" (from), "I" (type))
+
+static inline unsigned long virt_to_phys(void *x)
+{
+	unsigned long t;
+
+	__patch_stub(x, t, PATCH_TYPE_VIRT_TO_PHYS);
+	return t;
+}
+
+static inline void *phys_to_virt(unsigned long x)
+{
+	void *t;
+
+	__patch_stub(x, t, PATCH_TYPE_PHYS_TO_VIRT);
+	return t;
+}
+#else
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
  * Note: Drivers should NOT use these.  They are the wrong
@@ -197,6 +230,7 @@ static inline void *phys_to_virt(unsigned long x)
 {
 	return (void *)(__phys_to_virt((unsigned long)(x)));
 }
+#endif

 /*
  * Drivers should NOT use these either.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index dd6b369..973efcc 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -426,4 +426,67 @@ smp_on_up:

 #endif

+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define PATCH_INSTR_ADD		(0x00800000)
+#define PATCH_INSTR_SUB		(0x00400000)
+
+/* __fixup_phys_virt - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
+ * can be expressed by an immediate shifter operand. The stub instruction
+ * has a form of 'add rd, rn, #imm', where the lowest 8-bit of #imm is
+ * used to identify the type of patching.
+ */
+__fixup_phys_virt:
+ 	/*
+	 * r0 - PHYS_OFFSET
+	 * r6 - bits to set in phys_to_virt stub instructions
+	 * r7 - bits to set in virt_to_phys stub instructions
+	 */
+	ldr	r0, =PHYS_OFFSET
+	cmp	r0, #PAGE_OFFSET
+	subhi	r1, r0, #PAGE_OFFSET
+	rsbls	r1, r0, #PAGE_OFFSET
+	lsr	r1, r1, #24
+	orr	r1, r1, #0x400
+	orrhi	r6, r1, #PATCH_INSTR_SUB
+	orrhi	r7, r1, #PATCH_INSTR_ADD
+	orrls	r6, r1, #PATCH_INSTR_ADD
+	orrls	r7, r1, #PATCH_INSTR_SUB
+
+	/* r0 - instruction to patch
+	 * r1 - address offset
+	 * r2 - index into __patch_table
+	 * r3 - __patch_table_end
+	 */
+	adr	r0, 1f
+	ldmia	r0, {r1, r2, r3}
+	sub	r1, r0, r1
+	add	r2, r2, r1
+	add	r3, r3, r1
+	cmp	r2, r3
+	bhs	3f
+2:	ldr	ip, [r2]
+	add	r2, r2, #4
+	ldr	r0, [ip, r1]
+	and	r9, r0, #0x000000ff		@ to decide the patch type
+	bic	r0, r0, #0x00e00000
+	bic	r0, r0, #0x00000fc0
+	bic	r0, r0, #0x0000003f
+	cmp	r9, #PATCH_TYPE_PHYS_TO_VIRT
+	orreq	r0, r0, r6
+	cmp	r9, #PATCH_TYPE_VIRT_TO_PHYS
+	orreq	r0, r0, r7
+	str	r0, [ip, r1]
+	cmp	r2, r3
+	blo	2b
+3:
+	mov	pc, lr
+ENDPROC(__fixup_phys_virt)
+
+1:	.word	.
+	.word	__patch_table_begin
+	.word	__patch_table_end
+#endif
+
 #include "head-common.S"
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 1953e3d..c221b61 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -57,6 +57,10 @@ SECTIONS
 		__smpalt_end = .;
 #endif

+		__patch_table_begin = .;
+			*(__patch_table)
+		__patch_table_end = .;
+
 		INIT_SETUP(16)

 		INIT_CALLS

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-05 18:40 [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa Eric Miao
@ 2010-11-07 16:48 ` Russell King - ARM Linux
  2010-11-08  4:38   ` Nicolas Pitre
  2010-11-08 11:49 ` Russell King - ARM Linux
  1 sibling, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-07 16:48 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Nov 06, 2010 at 02:40:46AM +0800, Eric Miao wrote:
> Changes since last version, fixup of the patching stub instructions is now
> performed in assembly code before MMU is on, that means no flush cache is
> necessary.

I assume that there's more to this, because this doesn't include code to
call __fixup_phys_virt.  As it corrupts r1/r2, I'm not sure where you
intend to call this from, as they must be preserved around the time that
__fixup_smp is called.

Also, I assume this only works with ARM code, not Thumb2 ?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-07 16:48 ` Russell King - ARM Linux
@ 2010-11-08  4:38   ` Nicolas Pitre
  0 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-11-08  4:38 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, 7 Nov 2010, Russell King - ARM Linux wrote:

> On Sat, Nov 06, 2010 at 02:40:46AM +0800, Eric Miao wrote:
> > Changes since last version, fixup of the patching stub instructions is now
> > performed in assembly code before MMU is on, that means no flush cache is
> > necessary.
> 
> I assume that there's more to this, because this doesn't include code to
> call __fixup_phys_virt.  As it corrupts r1/r2, I'm not sure where you
> intend to call this from, as they must be preserved around the time that
> __fixup_smp is called.

I intend to have a look and provide the necessary fixes soon.


Nicolas

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-05 18:40 [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa Eric Miao
  2010-11-07 16:48 ` Russell King - ARM Linux
@ 2010-11-08 11:49 ` Russell King - ARM Linux
  2010-11-10 16:45   ` Russell King - ARM Linux
  1 sibling, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-08 11:49 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Nov 06, 2010 at 02:40:46AM +0800, Eric Miao wrote:
> +__fixup_phys_virt:
> + 	/*
> +	 * r0 - PHYS_OFFSET
> +	 * r6 - bits to set in phys_to_virt stub instructions
> +	 * r7 - bits to set in virt_to_phys stub instructions
> +	 */
> +	ldr	r0, =PHYS_OFFSET
> +	cmp	r0, #PAGE_OFFSET
> +	subhi	r1, r0, #PAGE_OFFSET
> +	rsbls	r1, r0, #PAGE_OFFSET
> +	lsr	r1, r1, #24
> +	orr	r1, r1, #0x400
> +	orrhi	r6, r1, #PATCH_INSTR_SUB
> +	orrhi	r7, r1, #PATCH_INSTR_ADD
> +	orrls	r6, r1, #PATCH_INSTR_ADD
> +	orrls	r7, r1, #PATCH_INSTR_SUB
> +
> +	/* r0 - instruction to patch
> +	 * r1 - address offset
> +	 * r2 - index into __patch_table
> +	 * r3 - __patch_table_end
> +	 */
> +	adr	r0, 1f
> +	ldmia	r0, {r1, r2, r3}
> +	sub	r1, r0, r1

Also note that r1 here is (PHYS_OFFSET - PAGE_OFFSET) - r0 was the physical
address of '1f', and the loaded value of r1 is the virtual address of '1f'.

So, I think the above code can be replaced by:

	adr	r0, 1f
	ldmia	r0, {r1-r3}
	sub	r1, r0, r1
	mov	r4, r1, lsr #24
	orr	r4, r4, #0x0400
	orr	r6, r4, #PATCH_INSTR_SUB
	orr	r7, r4, #PATCH_INSTR_ADD
	teq	r1, r4, lsl #24
	bne	error

noting that:

	add	rd, rn, #PAGE_OFFSET - PHYS_OFFSET
	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET

are equivalent.

We can do better than this - just make sure that all virt_to_phys() are an
add instruction, and all phys_to_virt() are a sub struction.  Then we only
need to fixup the constant.  IOW, virt_to_phys() is:

	add	rd, rn, #PHYS_OFFSET - PAGE_OFFSET

and phys_to_virt() is:

	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-08 11:49 ` Russell King - ARM Linux
@ 2010-11-10 16:45   ` Russell King - ARM Linux
  2010-11-10 17:55     ` Russell King - ARM Linux
  0 siblings, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-10 16:45 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Nov 08, 2010 at 11:49:48AM +0000, Russell King - ARM Linux wrote:
> Also note that r1 here is (PHYS_OFFSET - PAGE_OFFSET) - r0 was the physical
> address of '1f', and the loaded value of r1 is the virtual address of '1f'.
> 
> So, I think the above code can be replaced by:
> 
> 	adr	r0, 1f
> 	ldmia	r0, {r1-r3}
> 	sub	r1, r0, r1
> 	mov	r4, r1, lsr #24
> 	orr	r4, r4, #0x0400
> 	orr	r6, r4, #PATCH_INSTR_SUB
> 	orr	r7, r4, #PATCH_INSTR_ADD
> 	teq	r1, r4, lsl #24
> 	bne	error
> 
> noting that:
> 
> 	add	rd, rn, #PAGE_OFFSET - PHYS_OFFSET
> 	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> 
> are equivalent.
> 
> We can do better than this - just make sure that all virt_to_phys() are an
> add instruction, and all phys_to_virt() are a sub struction.  Then we only
> need to fixup the constant.  IOW, virt_to_phys() is:
> 
> 	add	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> 
> and phys_to_virt() is:
> 
> 	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET

Here's something which uses the above ideas (untested).  I think this is
something we can (and should) do unconditionally for the !XIP cases.  We
also need to fixup modules in a similar manner, so we want to place the
__fixup_pv_table function in the .text, and give it a wrapper along the
lines of:

fixup_pv_table:
	stmfd	sp!, {r4 - r7, lr}
	mov	r3, #0		@ offset (zero as we're in virtual space)
	mov	r4, r0		@ loop start
	mov	r5, r1		@ loop end
	orr	r6, r2, #0x400	@ mask in rotate right 8 bits
	bl	2		@ branch to __fixup_pv_table loop
	ldmfd	sp!, {r4 - r7, pc}

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8ae3d48..b6b6dcf 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -187,6 +187,16 @@ config VECTORS_BASE
 	help
 	  The base address of exception vectors.
 
+config ARM_PATCH_PHYS_VIRT
+	bool
+	help
+	  Note this is only for non-XIP and non-Thumb2 kernels. And there
+	  is CPU support which needs to read data in order to writeback
+	  dirty entries in the cache. (e.g. StrongARM, ebsa110, footbridge,
+	  rpc, sa1100, and shark). The mappings in the above cases do not
+	  exist before paging_init() has completed. Thus this option does
+	  not support these CPUs at this moment.
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -590,6 +600,7 @@ config ARCH_PXA
 	select TICK_ONESHOT
 	select PLAT_PXA
 	select SPARSE_IRQ
+	select ARM_PATCH_PHYS_VIRT
 	help
 	  Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 23c2e8e..3c1d3e3 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -182,6 +182,34 @@
  */
 #define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)
 
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define __pv_stub(from,to,instr)			\
+	__asm__(					\
+	"1:	" instr "	%0, %1, %2\n"		\
+	"\n"						\
+	"	.pushsection .pv_table,\"a\"\n"		\
+	"	.long	1b\n"				\
+	"	.popsection\n"				\
+	: "=r" (to)					\
+	: "r" (from), "I" (1))
+
+static inline unsigned long virt_to_phys(void *x)
+{
+	unsigned long t;
+
+	__pv_stub(x, t, "add");
+	return t;
+}
+
+static inline void *phys_to_virt(unsigned long x)
+{
+	void *t;
+
+	__pv_stub(x, t, "sub");
+	return t;
+}
+#else
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
  * Note: Drivers should NOT use these.  They are the wrong
@@ -197,6 +225,7 @@ static inline void *phys_to_virt(unsigned long x)
 {
 	return (void *)(__phys_to_virt((unsigned long)(x)));
 }
+#endif
 
 /*
  * Drivers should NOT use these either.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index dd6b369..bcc502f 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -93,6 +93,9 @@ ENTRY(stext)
 #ifdef CONFIG_SMP_ON_UP
 	bl	__fixup_smp
 #endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+	bl	__fixup_pv_table
+#endif
 	bl	__create_page_tables
 
 	/*
@@ -426,4 +429,37 @@ smp_on_up:
 
 #endif
 
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+/* __fixup_pv_table - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
+ * can be expressed by an immediate shifter operand. The stub instruction
+ * has a form of '(add|sub) rd, rn, #imm'.
+ */
+__fixup_pv_table:
+	adr	r0, 1f
+	ldmia	r0, {r3-r5}
+	sub	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
+	mov	r6, r3, lsr #24	@ constant for add/sub instructions
+	teq	r3, r6, lsl #24 @ must be 16MiB aligned
+	bne	__error
+	orr	r6, r6, #0x400	@ mask in rotate right 8 bits
+	add	r4, r4, r3
+	add	r5, r5, r3
+2:	cmp	r4, r5
+	ldrlo	r7, [r4], #4
+	ldrlo	ip, [r7, r3]
+	bic	ip, ip, #0x000000ff
+	bic	ip, ip, #0x00000f00
+	orr	ip, ip, r6
+	strlo	ip, [r7, r3]
+	blo	2b
+	mov	pc, lr
+ENDPROC(__fixup_phys_virt)
+
+1:	.word	.
+	.word	__pv_table_begin
+	.word	__pv_table_end
+#endif
+
 #include "head-common.S"
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index cead889..fb32c9d 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -57,6 +57,10 @@ SECTIONS
 		__smpalt_end = .;
 #endif
 
+		__pv_table_begin = .;
+			*(.pv_table)
+		__pv_table_end = .;
+
 		INIT_SETUP(16)
 
 		INIT_CALLS

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-10 16:45   ` Russell King - ARM Linux
@ 2010-11-10 17:55     ` Russell King - ARM Linux
  2010-11-10 20:23       ` Nicolas Pitre
  0 siblings, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-10 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Nov 10, 2010 at 04:45:08PM +0000, Russell King - ARM Linux wrote:
> Here's something which uses the above ideas (untested).  I think this is
> something we can (and should) do unconditionally for the !XIP cases.

Second version - let's get _all_ v:p translations, not just virt_to_phys
and phys_to_virt (iow, __phys_to_virt/__virt_to_phys/__pa/__va) which will
include all the page table manipulations, default bus<->virt translations.
This results in a _much_ bigger fixup table.

It also results in no fixup table being generated for platforms which
have complex v:p translations (cns3xxx, iop13xx, ixp2xxx, ks8695,
realview, s3c24a0).

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8ae3d48..6758df1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -187,6 +187,18 @@ config VECTORS_BASE
 	help
 	  The base address of exception vectors.
 
+config ARM_PATCH_PHYS_VIRT
+	bool
+	depends on EXPERIMENTAL
+	depends on !XIP && !THUMB2_KERNEL
+	help
+	  Note this is only for non-XIP and non-Thumb2 kernels. And there
+	  is CPU support which needs to read data in order to writeback
+	  dirty entries in the cache. (e.g. StrongARM, ebsa110, footbridge,
+	  rpc, sa1100, and shark). The mappings in the above cases do not
+	  exist before paging_init() has completed. Thus this option does
+	  not support these CPUs at this moment.
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -590,6 +602,7 @@ config ARCH_PXA
 	select TICK_ONESHOT
 	select PLAT_PXA
 	select SPARSE_IRQ
+	select ARM_PATCH_PHYS_VIRT
 	help
 	  Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 23c2e8e..219d125 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -133,16 +133,6 @@
 #endif
 
 /*
- * Physical vs virtual RAM address space conversion.  These are
- * private definitions which should NOT be used outside memory.h
- * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-#ifndef __virt_to_phys
-#define __virt_to_phys(x)	((x) - PAGE_OFFSET + PHYS_OFFSET)
-#define __phys_to_virt(x)	((x) - PHYS_OFFSET + PAGE_OFFSET)
-#endif
-
-/*
  * Convert a physical address to a Page Frame Number and back
  */
 #define	__phys_to_pfn(paddr)	((paddr) >> PAGE_SHIFT)
@@ -157,6 +147,45 @@
 #ifndef __ASSEMBLY__
 
 /*
+ * Physical vs virtual RAM address space conversion.  These are
+ * private definitions which should NOT be used outside memory.h
+ * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ */
+#ifndef __virt_to_phys
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define __pv_stub(from,to,instr)			\
+	__asm__(					\
+	"1:	" instr "	%0, %1, %2\n"		\
+	"\n"						\
+	"	.pushsection .pv_table,\"a\"\n"		\
+	"	.long	1b\n"				\
+	"	.popsection\n"				\
+	: "=r" (to)					\
+	: "r" (from), "I" (1))
+
+static inline unsigned long __virt_to_phys(unsigned long x)
+{
+	unsigned long t;
+
+	__pv_stub(x, t, "add");
+	return t;
+}
+
+static inline unsigned long __phys_to_virt(unsigned long x)
+{
+	unsigned long t;
+
+	__pv_stub(x, t, "sub");
+	return t;
+}
+#else
+#define __virt_to_phys(x)	((x) - PAGE_OFFSET + PHYS_OFFSET)
+#define __phys_to_virt(x)	((x) - PHYS_OFFSET + PAGE_OFFSET)
+#endif
+#endif
+
+/*
  * The DMA mask corresponding to the maximum bus address allocatable
  * using GFP_DMA.  The default here places no restriction on DMA
  * allocations.  This must be the smallest DMA mask in the system,
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index dd6b369..bcc502f 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -93,6 +93,9 @@ ENTRY(stext)
 #ifdef CONFIG_SMP_ON_UP
 	bl	__fixup_smp
 #endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+	bl	__fixup_pv_table
+#endif
 	bl	__create_page_tables
 
 	/*
@@ -426,4 +429,37 @@ smp_on_up:
 
 #endif
 
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+/* __fixup_pv_table - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
+ * can be expressed by an immediate shifter operand. The stub instruction
+ * has a form of '(add|sub) rd, rn, #imm'.
+ */
+__fixup_pv_table:
+	adr	r0, 1f
+	ldmia	r0, {r3-r5}
+	sub	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
+	mov	r6, r3, lsr #24	@ constant for add/sub instructions
+	teq	r3, r6, lsl #24 @ must be 16MiB aligned
+	bne	__error
+	orr	r6, r6, #0x400	@ mask in rotate right 8 bits
+	add	r4, r4, r3
+	add	r5, r5, r3
+2:	cmp	r4, r5
+	ldrlo	r7, [r4], #4
+	ldrlo	ip, [r7, r3]
+	bic	ip, ip, #0x000000ff
+	bic	ip, ip, #0x00000f00
+	orr	ip, ip, r6
+	strlo	ip, [r7, r3]
+	blo	2b
+	mov	pc, lr
+ENDPROC(__fixup_phys_virt)
+
+1:	.word	.
+	.word	__pv_table_begin
+	.word	__pv_table_end
+#endif
+
 #include "head-common.S"
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index cead889..fb32c9d 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -57,6 +57,10 @@ SECTIONS
 		__smpalt_end = .;
 #endif
 
+		__pv_table_begin = .;
+			*(.pv_table)
+		__pv_table_end = .;
+
 		INIT_SETUP(16)
 
 		INIT_CALLS

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-10 17:55     ` Russell King - ARM Linux
@ 2010-11-10 20:23       ` Nicolas Pitre
  2010-11-10 20:37         ` Russell King - ARM Linux
  0 siblings, 1 reply; 9+ messages in thread
From: Nicolas Pitre @ 2010-11-10 20:23 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, 8 Nov 2010, Russell King - ARM Linux wrote:

> noting that:
> 
> 	add	rd, rn, #PAGE_OFFSET - PHYS_OFFSET
> 	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> 
> are equivalent.
> 
> We can do better than this - just make sure that all virt_to_phys() are an
> add instruction, and all phys_to_virt() are a sub struction.  Then we only
> need to fixup the constant.  IOW, virt_to_phys() is:
> 
> 	add	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> 
> and phys_to_virt() is:
> 
> 	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET

Does this work even if PHYS_OFFSET - PAGE_OFFSET goes negative? 
Shouldn't we need to switch the encoding of an add into a sub and vice 
versa with a positive immediate value in that case? (I don't have the 
instruction encoding info handy at the moment).

On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:

> Here's something which uses the above ideas (untested).  I think this is
> something we can (and should) do unconditionally for the !XIP cases.

What do you mean by "unconditionally"?

> We also need to fixup modules in a similar manner, so we want to place 
> the __fixup_pv_table function in the .text, and give it a wrapper 
> along the lines of:
[...]

Modules... of course.  I hadn't thought about them.

> Second version - let's get _all_ v:p translations, not just virt_to_phys
> and phys_to_virt (iow, __phys_to_virt/__virt_to_phys/__pa/__va) which will
> include all the page table manipulations, default bus<->virt translations.
> This results in a _much_ bigger fixup table.
> 
> It also results in no fixup table being generated for platforms which
> have complex v:p translations (cns3xxx, iop13xx, ixp2xxx, ks8695,
> realview, s3c24a0).

Obviously.

> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index dd6b369..bcc502f 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -93,6 +93,9 @@ ENTRY(stext)
>  #ifdef CONFIG_SMP_ON_UP
>  	bl	__fixup_smp
>  #endif
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
> +	bl	__fixup_pv_table
> +#endif
>  	bl	__create_page_tables
>  
>  	/*
> @@ -426,4 +429,37 @@ smp_on_up:
>  
>  #endif
>  
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
> +
> +/* __fixup_pv_table - patch the stub instructions with the delta between
> + * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
> + * can be expressed by an immediate shifter operand. The stub instruction
> + * has a form of '(add|sub) rd, rn, #imm'.
> + */
> +__fixup_pv_table:
> +	adr	r0, 1f
> +	ldmia	r0, {r3-r5}
> +	sub	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
> +	mov	r6, r3, lsr #24	@ constant for add/sub instructions
> +	teq	r3, r6, lsl #24 @ must be 16MiB aligned
> +	bne	__error
> +	orr	r6, r6, #0x400	@ mask in rotate right 8 bits
> +	add	r4, r4, r3
> +	add	r5, r5, r3
> +2:	cmp	r4, r5
> +	ldrlo	r7, [r4], #4
> +	ldrlo	ip, [r7, r3]
> +	bic	ip, ip, #0x000000ff
> +	bic	ip, ip, #0x00000f00
> +	orr	ip, ip, r6

The above 3 insns could be replaced with:

	mov	ip, ip, lsr #12
	orr	ip, r6, ip, lsl #12

> +	strlo	ip, [r7, r3]
> +	blo	2b
> +	mov	pc, lr
> +ENDPROC(__fixup_phys_virt)


Nicolas

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-10 20:23       ` Nicolas Pitre
@ 2010-11-10 20:37         ` Russell King - ARM Linux
  2010-11-10 21:43           ` Nicolas Pitre
  0 siblings, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-10 20:37 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Nov 10, 2010 at 03:23:50PM -0500, Nicolas Pitre wrote:
> > 	add	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> > 
> > and phys_to_virt() is:
> > 
> > 	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> 
> Does this work even if PHYS_OFFSET - PAGE_OFFSET goes negative? 

Yes.

	add	rd, rn, #0xc0000000
	sub	rd, rn, #0x40000000

are identical operations.  The operand is an 8 bit constant, zero padded,
rotated right by (in this case) 8 bits - there can be no sign extension.

> On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:
> > Here's something which uses the above ideas (untested).  I think this is
> > something we can (and should) do unconditionally for the !XIP cases.
> 
> What do you mean by "unconditionally"?

For any machine where the v:p offset is respresentable.  It means that
we increase the amount of testing, and actually this becomes the
standard way.

> > +2:	cmp	r4, r5
> > +	ldrlo	r7, [r4], #4
> > +	ldrlo	ip, [r7, r3]
> > +	bic	ip, ip, #0x000000ff
> > +	bic	ip, ip, #0x00000f00
> > +	orr	ip, ip, r6
> 
> The above 3 insns could be replaced with:
> 
> 	mov	ip, ip, lsr #12
> 	orr	ip, r6, ip, lsl #12

They could be, but it's not worth optimizing to that extent - we're
already hitting load delays for both of those ldr instructions that
this isn't going to be blindingly fast... at least there aren't
thousands of them to fixup.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
  2010-11-10 20:37         ` Russell King - ARM Linux
@ 2010-11-10 21:43           ` Nicolas Pitre
  0 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-11-10 21:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:

> On Wed, Nov 10, 2010 at 03:23:50PM -0500, Nicolas Pitre wrote:
> > > 	add	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> > > 
> > > and phys_to_virt() is:
> > > 
> > > 	sub	rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> > 
> > Does this work even if PHYS_OFFSET - PAGE_OFFSET goes negative? 
> 
> Yes.
> 
> 	add	rd, rn, #0xc0000000
> 	sub	rd, rn, #0x40000000
> 
> are identical operations.  The operand is an 8 bit constant, zero padded,
> rotated right by (in this case) 8 bits - there can be no sign extension.

OK.

> > On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:
> > > Here's something which uses the above ideas (untested).  I think this is
> > > something we can (and should) do unconditionally for the !XIP cases.
> > 
> > What do you mean by "unconditionally"?
> 
> For any machine where the v:p offset is respresentable.  It means that
> we increase the amount of testing, and actually this becomes the
> standard way.

Well, sure.  This can become the default, but I wouldn't remove the 
ability to optimize everything at compile time for those who prefer the 
tightest kernel possible.

> > > +2:	cmp	r4, r5
> > > +	ldrlo	r7, [r4], #4
> > > +	ldrlo	ip, [r7, r3]
> > > +	bic	ip, ip, #0x000000ff
> > > +	bic	ip, ip, #0x00000f00
> > > +	orr	ip, ip, r6
> > 
> > The above 3 insns could be replaced with:
> > 
> > 	mov	ip, ip, lsr #12
> > 	orr	ip, r6, ip, lsl #12
> 
> They could be, but it's not worth optimizing to that extent - we're
> already hitting load delays for both of those ldr instructions that
> this isn't going to be blindingly fast... at least there aren't
> thousands of them to fixup.

Sure.  This is not critical, but even worse is the fact that this is all 
running with all caches off.  So having one less insn in the loop may 
help, even if only to serve as a good ARM coding example (will be more 
significant with thumb2).


Nicolas

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-11-10 21:43 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-05 18:40 [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa Eric Miao
2010-11-07 16:48 ` Russell King - ARM Linux
2010-11-08  4:38   ` Nicolas Pitre
2010-11-08 11:49 ` Russell King - ARM Linux
2010-11-10 16:45   ` Russell King - ARM Linux
2010-11-10 17:55     ` Russell King - ARM Linux
2010-11-10 20:23       ` Nicolas Pitre
2010-11-10 20:37         ` Russell King - ARM Linux
2010-11-10 21:43           ` Nicolas Pitre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).