* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
@ 2010-11-05 18:40 Eric Miao
2010-11-07 16:48 ` Russell King - ARM Linux
2010-11-08 11:49 ` Russell King - ARM Linux
0 siblings, 2 replies; 9+ messages in thread
From: Eric Miao @ 2010-11-05 18:40 UTC (permalink / raw)
To: linux-arm-kernel
Changes since last version, fixup of the patching stub instructions is now
performed in assembly code before MMU is on, that means no flush cache is
necessary.
Found myself clumsy in handling assembly. The load of PHYS_OFFSET needs
to be handled differently if that's going to be made into a variable
though. This
is not verified to work, and just for overview, I'll have a bit time
for that the next
week.
commit 89609f0d15a582d393576438038234898e49820c
Author: Eric Miao <eric.miao@canonical.com>
Date: Thu Aug 5 17:23:36 2010 +0800
ARM: Introduce patching of phys_to_virt and vice versa
In most cases, the delta between PHYS_OFFSET and PAGE_OFFSET is normally
16MiB aligned, which means the difference can be handled by a simple ADD
or SUB instruction with an immediate shift operand in ARM. This will be
a bit more efficient and generic when PHYS_OFFSET goes run-time.
This idea can be made generic to allow conversions more than phys_to_virt
and virt_to_phys. A stub instruction is inserted where applicable, and it
has a form of 'add rn, rd, #imm', where the lowest 8-bit of #imm is used
to identify the type of patching. Currently, only two types are defined,
but could be expanded in my POV to definitions like __io(), __mem_pci()
and so on. A __patch_table section is introduced to include the addresses
of all these stub instructions.
There are several places for improvement:
1. constant parameters which can be optimized by the compiler now needs
one additional instruction (although the optimization is neither
possible when PHYS_OFFSET goes as a variable)
2. thumb2 can be supported in a same way, but will leave that for future
enhancement.
The general idea comes from Nicolas Pitre, and is drafted at
https://wiki.ubuntu.com/Specs/ARMSingleKernel
Signed-off-by: Nicolas Pitre <nicolas.pitre@canonical.com>
Signed-off-by: Eric Miao <eric.miao@canonical.com>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b527bf5..fc9b96e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -187,6 +187,16 @@ config VECTORS_BASE
help
The base address of exception vectors.
+config ARM_PATCH_PHYS_VIRT
+ bool
+ help
+ Note this is only for non-XIP and non-Thumb2 kernels. And there
+ is CPU support which needs to read data in order to writeback
+ dirty entries in the cache. (e.g. StrongARM, ebsa110, footbridge,
+ rpc, sa1100, and shark). The mappings in the above cases do not
+ exist before paging_init() has completed. Thus this option does
+ not support these CPUs at this moment.
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -590,6 +600,7 @@ config ARCH_PXA
select TICK_ONESHOT
select PLAT_PXA
select SPARSE_IRQ
+ select ARM_PATCH_PHYS_VIRT
help
Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 23c2e8e..4b8b8da 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -154,6 +154,11 @@
#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page)))
#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys)))
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+#define PATCH_TYPE_PHYS_TO_VIRT (0)
+#define PATCH_TYPE_VIRT_TO_PHYS (1)
+#endif
+
#ifndef __ASSEMBLY__
/*
@@ -182,6 +187,34 @@
*/
#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT)
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define __patch_stub(from,to,type) \
+ __asm__( \
+ "1: add %0, %1, %2\n" \
+ "\n" \
+ " .pushsection __patch_table,\"a\"\n" \
+ " .long 1b\n" \
+ " .popsection\n" \
+ : "=r" (to) \
+ : "r" (from), "I" (type))
+
+static inline unsigned long virt_to_phys(void *x)
+{
+ unsigned long t;
+
+ __patch_stub(x, t, PATCH_TYPE_VIRT_TO_PHYS);
+ return t;
+}
+
+static inline void *phys_to_virt(unsigned long x)
+{
+ void *t;
+
+ __patch_stub(x, t, PATCH_TYPE_PHYS_TO_VIRT);
+ return t;
+}
+#else
/*
* These are *only* valid on the kernel direct mapped RAM memory.
* Note: Drivers should NOT use these. They are the wrong
@@ -197,6 +230,7 @@ static inline void *phys_to_virt(unsigned long x)
{
return (void *)(__phys_to_virt((unsigned long)(x)));
}
+#endif
/*
* Drivers should NOT use these either.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index dd6b369..973efcc 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -426,4 +426,67 @@ smp_on_up:
#endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define PATCH_INSTR_ADD (0x00800000)
+#define PATCH_INSTR_SUB (0x00400000)
+
+/* __fixup_phys_virt - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
+ * can be expressed by an immediate shifter operand. The stub instruction
+ * has a form of 'add rd, rn, #imm', where the lowest 8-bit of #imm is
+ * used to identify the type of patching.
+ */
+__fixup_phys_virt:
+ /*
+ * r0 - PHYS_OFFSET
+ * r6 - bits to set in phys_to_virt stub instructions
+ * r7 - bits to set in virt_to_phys stub instructions
+ */
+ ldr r0, =PHYS_OFFSET
+ cmp r0, #PAGE_OFFSET
+ subhi r1, r0, #PAGE_OFFSET
+ rsbls r1, r0, #PAGE_OFFSET
+ lsr r1, r1, #24
+ orr r1, r1, #0x400
+ orrhi r6, r1, #PATCH_INSTR_SUB
+ orrhi r7, r1, #PATCH_INSTR_ADD
+ orrls r6, r1, #PATCH_INSTR_ADD
+ orrls r7, r1, #PATCH_INSTR_SUB
+
+ /* r0 - instruction to patch
+ * r1 - address offset
+ * r2 - index into __patch_table
+ * r3 - __patch_table_end
+ */
+ adr r0, 1f
+ ldmia r0, {r1, r2, r3}
+ sub r1, r0, r1
+ add r2, r2, r1
+ add r3, r3, r1
+ cmp r2, r3
+ bhs 3f
+2: ldr ip, [r2]
+ add r2, r2, #4
+ ldr r0, [ip, r1]
+ and r9, r0, #0x000000ff @ to decide the patch type
+ bic r0, r0, #0x00e00000
+ bic r0, r0, #0x00000fc0
+ bic r0, r0, #0x0000003f
+ cmp r9, #PATCH_TYPE_PHYS_TO_VIRT
+ orreq r0, r0, r6
+ cmp r9, #PATCH_TYPE_VIRT_TO_PHYS
+ orreq r0, r0, r7
+ str r0, [ip, r1]
+ cmp r2, r3
+ blo 2b
+3:
+ mov pc, lr
+ENDPROC(__fixup_phys_virt)
+
+1: .word .
+ .word __patch_table_begin
+ .word __patch_table_end
+#endif
+
#include "head-common.S"
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 1953e3d..c221b61 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -57,6 +57,10 @@ SECTIONS
__smpalt_end = .;
#endif
+ __patch_table_begin = .;
+ *(__patch_table)
+ __patch_table_end = .;
+
INIT_SETUP(16)
INIT_CALLS
^ permalink raw reply related [flat|nested] 9+ messages in thread* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-05 18:40 [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa Eric Miao
@ 2010-11-07 16:48 ` Russell King - ARM Linux
2010-11-08 4:38 ` Nicolas Pitre
2010-11-08 11:49 ` Russell King - ARM Linux
1 sibling, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-07 16:48 UTC (permalink / raw)
To: linux-arm-kernel
On Sat, Nov 06, 2010 at 02:40:46AM +0800, Eric Miao wrote:
> Changes since last version, fixup of the patching stub instructions is now
> performed in assembly code before MMU is on, that means no flush cache is
> necessary.
I assume that there's more to this, because this doesn't include code to
call __fixup_phys_virt. As it corrupts r1/r2, I'm not sure where you
intend to call this from, as they must be preserved around the time that
__fixup_smp is called.
Also, I assume this only works with ARM code, not Thumb2 ?
^ permalink raw reply [flat|nested] 9+ messages in thread
* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-07 16:48 ` Russell King - ARM Linux
@ 2010-11-08 4:38 ` Nicolas Pitre
0 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-11-08 4:38 UTC (permalink / raw)
To: linux-arm-kernel
On Sun, 7 Nov 2010, Russell King - ARM Linux wrote:
> On Sat, Nov 06, 2010 at 02:40:46AM +0800, Eric Miao wrote:
> > Changes since last version, fixup of the patching stub instructions is now
> > performed in assembly code before MMU is on, that means no flush cache is
> > necessary.
>
> I assume that there's more to this, because this doesn't include code to
> call __fixup_phys_virt. As it corrupts r1/r2, I'm not sure where you
> intend to call this from, as they must be preserved around the time that
> __fixup_smp is called.
I intend to have a look and provide the necessary fixes soon.
Nicolas
^ permalink raw reply [flat|nested] 9+ messages in thread
* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-05 18:40 [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa Eric Miao
2010-11-07 16:48 ` Russell King - ARM Linux
@ 2010-11-08 11:49 ` Russell King - ARM Linux
2010-11-10 16:45 ` Russell King - ARM Linux
1 sibling, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-08 11:49 UTC (permalink / raw)
To: linux-arm-kernel
On Sat, Nov 06, 2010 at 02:40:46AM +0800, Eric Miao wrote:
> +__fixup_phys_virt:
> + /*
> + * r0 - PHYS_OFFSET
> + * r6 - bits to set in phys_to_virt stub instructions
> + * r7 - bits to set in virt_to_phys stub instructions
> + */
> + ldr r0, =PHYS_OFFSET
> + cmp r0, #PAGE_OFFSET
> + subhi r1, r0, #PAGE_OFFSET
> + rsbls r1, r0, #PAGE_OFFSET
> + lsr r1, r1, #24
> + orr r1, r1, #0x400
> + orrhi r6, r1, #PATCH_INSTR_SUB
> + orrhi r7, r1, #PATCH_INSTR_ADD
> + orrls r6, r1, #PATCH_INSTR_ADD
> + orrls r7, r1, #PATCH_INSTR_SUB
> +
> + /* r0 - instruction to patch
> + * r1 - address offset
> + * r2 - index into __patch_table
> + * r3 - __patch_table_end
> + */
> + adr r0, 1f
> + ldmia r0, {r1, r2, r3}
> + sub r1, r0, r1
Also note that r1 here is (PHYS_OFFSET - PAGE_OFFSET) - r0 was the physical
address of '1f', and the loaded value of r1 is the virtual address of '1f'.
So, I think the above code can be replaced by:
adr r0, 1f
ldmia r0, {r1-r3}
sub r1, r0, r1
mov r4, r1, lsr #24
orr r4, r4, #0x0400
orr r6, r4, #PATCH_INSTR_SUB
orr r7, r4, #PATCH_INSTR_ADD
teq r1, r4, lsl #24
bne error
noting that:
add rd, rn, #PAGE_OFFSET - PHYS_OFFSET
sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
are equivalent.
We can do better than this - just make sure that all virt_to_phys() are an
add instruction, and all phys_to_virt() are a sub struction. Then we only
need to fixup the constant. IOW, virt_to_phys() is:
add rd, rn, #PHYS_OFFSET - PAGE_OFFSET
and phys_to_virt() is:
sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-08 11:49 ` Russell King - ARM Linux
@ 2010-11-10 16:45 ` Russell King - ARM Linux
2010-11-10 17:55 ` Russell King - ARM Linux
0 siblings, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-10 16:45 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Nov 08, 2010 at 11:49:48AM +0000, Russell King - ARM Linux wrote:
> Also note that r1 here is (PHYS_OFFSET - PAGE_OFFSET) - r0 was the physical
> address of '1f', and the loaded value of r1 is the virtual address of '1f'.
>
> So, I think the above code can be replaced by:
>
> adr r0, 1f
> ldmia r0, {r1-r3}
> sub r1, r0, r1
> mov r4, r1, lsr #24
> orr r4, r4, #0x0400
> orr r6, r4, #PATCH_INSTR_SUB
> orr r7, r4, #PATCH_INSTR_ADD
> teq r1, r4, lsl #24
> bne error
>
> noting that:
>
> add rd, rn, #PAGE_OFFSET - PHYS_OFFSET
> sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
>
> are equivalent.
>
> We can do better than this - just make sure that all virt_to_phys() are an
> add instruction, and all phys_to_virt() are a sub struction. Then we only
> need to fixup the constant. IOW, virt_to_phys() is:
>
> add rd, rn, #PHYS_OFFSET - PAGE_OFFSET
>
> and phys_to_virt() is:
>
> sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
Here's something which uses the above ideas (untested). I think this is
something we can (and should) do unconditionally for the !XIP cases. We
also need to fixup modules in a similar manner, so we want to place the
__fixup_pv_table function in the .text, and give it a wrapper along the
lines of:
fixup_pv_table:
stmfd sp!, {r4 - r7, lr}
mov r3, #0 @ offset (zero as we're in virtual space)
mov r4, r0 @ loop start
mov r5, r1 @ loop end
orr r6, r2, #0x400 @ mask in rotate right 8 bits
bl 2 @ branch to __fixup_pv_table loop
ldmfd sp!, {r4 - r7, pc}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8ae3d48..b6b6dcf 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -187,6 +187,16 @@ config VECTORS_BASE
help
The base address of exception vectors.
+config ARM_PATCH_PHYS_VIRT
+ bool
+ help
+ Note this is only for non-XIP and non-Thumb2 kernels. And there
+ is CPU support which needs to read data in order to writeback
+ dirty entries in the cache. (e.g. StrongARM, ebsa110, footbridge,
+ rpc, sa1100, and shark). The mappings in the above cases do not
+ exist before paging_init() has completed. Thus this option does
+ not support these CPUs at this moment.
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -590,6 +600,7 @@ config ARCH_PXA
select TICK_ONESHOT
select PLAT_PXA
select SPARSE_IRQ
+ select ARM_PATCH_PHYS_VIRT
help
Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 23c2e8e..3c1d3e3 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -182,6 +182,34 @@
*/
#define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT)
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define __pv_stub(from,to,instr) \
+ __asm__( \
+ "1: " instr " %0, %1, %2\n" \
+ "\n" \
+ " .pushsection .pv_table,\"a\"\n" \
+ " .long 1b\n" \
+ " .popsection\n" \
+ : "=r" (to) \
+ : "r" (from), "I" (1))
+
+static inline unsigned long virt_to_phys(void *x)
+{
+ unsigned long t;
+
+ __pv_stub(x, t, "add");
+ return t;
+}
+
+static inline void *phys_to_virt(unsigned long x)
+{
+ void *t;
+
+ __pv_stub(x, t, "sub");
+ return t;
+}
+#else
/*
* These are *only* valid on the kernel direct mapped RAM memory.
* Note: Drivers should NOT use these. They are the wrong
@@ -197,6 +225,7 @@ static inline void *phys_to_virt(unsigned long x)
{
return (void *)(__phys_to_virt((unsigned long)(x)));
}
+#endif
/*
* Drivers should NOT use these either.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index dd6b369..bcc502f 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -93,6 +93,9 @@ ENTRY(stext)
#ifdef CONFIG_SMP_ON_UP
bl __fixup_smp
#endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+ bl __fixup_pv_table
+#endif
bl __create_page_tables
/*
@@ -426,4 +429,37 @@ smp_on_up:
#endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+/* __fixup_pv_table - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
+ * can be expressed by an immediate shifter operand. The stub instruction
+ * has a form of '(add|sub) rd, rn, #imm'.
+ */
+__fixup_pv_table:
+ adr r0, 1f
+ ldmia r0, {r3-r5}
+ sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET
+ mov r6, r3, lsr #24 @ constant for add/sub instructions
+ teq r3, r6, lsl #24 @ must be 16MiB aligned
+ bne __error
+ orr r6, r6, #0x400 @ mask in rotate right 8 bits
+ add r4, r4, r3
+ add r5, r5, r3
+2: cmp r4, r5
+ ldrlo r7, [r4], #4
+ ldrlo ip, [r7, r3]
+ bic ip, ip, #0x000000ff
+ bic ip, ip, #0x00000f00
+ orr ip, ip, r6
+ strlo ip, [r7, r3]
+ blo 2b
+ mov pc, lr
+ENDPROC(__fixup_phys_virt)
+
+1: .word .
+ .word __pv_table_begin
+ .word __pv_table_end
+#endif
+
#include "head-common.S"
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index cead889..fb32c9d 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -57,6 +57,10 @@ SECTIONS
__smpalt_end = .;
#endif
+ __pv_table_begin = .;
+ *(.pv_table)
+ __pv_table_end = .;
+
INIT_SETUP(16)
INIT_CALLS
^ permalink raw reply related [flat|nested] 9+ messages in thread* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-10 16:45 ` Russell King - ARM Linux
@ 2010-11-10 17:55 ` Russell King - ARM Linux
2010-11-10 20:23 ` Nicolas Pitre
0 siblings, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-10 17:55 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 10, 2010 at 04:45:08PM +0000, Russell King - ARM Linux wrote:
> Here's something which uses the above ideas (untested). I think this is
> something we can (and should) do unconditionally for the !XIP cases.
Second version - let's get _all_ v:p translations, not just virt_to_phys
and phys_to_virt (iow, __phys_to_virt/__virt_to_phys/__pa/__va) which will
include all the page table manipulations, default bus<->virt translations.
This results in a _much_ bigger fixup table.
It also results in no fixup table being generated for platforms which
have complex v:p translations (cns3xxx, iop13xx, ixp2xxx, ks8695,
realview, s3c24a0).
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8ae3d48..6758df1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -187,6 +187,18 @@ config VECTORS_BASE
help
The base address of exception vectors.
+config ARM_PATCH_PHYS_VIRT
+ bool
+ depends on EXPERIMENTAL
+ depends on !XIP && !THUMB2_KERNEL
+ help
+ Note this is only for non-XIP and non-Thumb2 kernels. And there
+ is CPU support which needs to read data in order to writeback
+ dirty entries in the cache. (e.g. StrongARM, ebsa110, footbridge,
+ rpc, sa1100, and shark). The mappings in the above cases do not
+ exist before paging_init() has completed. Thus this option does
+ not support these CPUs at this moment.
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -590,6 +602,7 @@ config ARCH_PXA
select TICK_ONESHOT
select PLAT_PXA
select SPARSE_IRQ
+ select ARM_PATCH_PHYS_VIRT
help
Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 23c2e8e..219d125 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -133,16 +133,6 @@
#endif
/*
- * Physical vs virtual RAM address space conversion. These are
- * private definitions which should NOT be used outside memory.h
- * files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-#ifndef __virt_to_phys
-#define __virt_to_phys(x) ((x) - PAGE_OFFSET + PHYS_OFFSET)
-#define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET)
-#endif
-
-/*
* Convert a physical address to a Page Frame Number and back
*/
#define __phys_to_pfn(paddr) ((paddr) >> PAGE_SHIFT)
@@ -157,6 +147,45 @@
#ifndef __ASSEMBLY__
/*
+ * Physical vs virtual RAM address space conversion. These are
+ * private definitions which should NOT be used outside memory.h
+ * files. Use virt_to_phys/phys_to_virt/__pa/__va instead.
+ */
+#ifndef __virt_to_phys
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+#define __pv_stub(from,to,instr) \
+ __asm__( \
+ "1: " instr " %0, %1, %2\n" \
+ "\n" \
+ " .pushsection .pv_table,\"a\"\n" \
+ " .long 1b\n" \
+ " .popsection\n" \
+ : "=r" (to) \
+ : "r" (from), "I" (1))
+
+static inline unsigned long __virt_to_phys(unsigned long x)
+{
+ unsigned long t;
+
+ __pv_stub(x, t, "add");
+ return t;
+}
+
+static inline unsigned long __phys_to_virt(unsigned long x)
+{
+ unsigned long t;
+
+ __pv_stub(x, t, "sub");
+ return t;
+}
+#else
+#define __virt_to_phys(x) ((x) - PAGE_OFFSET + PHYS_OFFSET)
+#define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET)
+#endif
+#endif
+
+/*
* The DMA mask corresponding to the maximum bus address allocatable
* using GFP_DMA. The default here places no restriction on DMA
* allocations. This must be the smallest DMA mask in the system,
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index dd6b369..bcc502f 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -93,6 +93,9 @@ ENTRY(stext)
#ifdef CONFIG_SMP_ON_UP
bl __fixup_smp
#endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+ bl __fixup_pv_table
+#endif
bl __create_page_tables
/*
@@ -426,4 +429,37 @@ smp_on_up:
#endif
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
+
+/* __fixup_pv_table - patch the stub instructions with the delta between
+ * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
+ * can be expressed by an immediate shifter operand. The stub instruction
+ * has a form of '(add|sub) rd, rn, #imm'.
+ */
+__fixup_pv_table:
+ adr r0, 1f
+ ldmia r0, {r3-r5}
+ sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET
+ mov r6, r3, lsr #24 @ constant for add/sub instructions
+ teq r3, r6, lsl #24 @ must be 16MiB aligned
+ bne __error
+ orr r6, r6, #0x400 @ mask in rotate right 8 bits
+ add r4, r4, r3
+ add r5, r5, r3
+2: cmp r4, r5
+ ldrlo r7, [r4], #4
+ ldrlo ip, [r7, r3]
+ bic ip, ip, #0x000000ff
+ bic ip, ip, #0x00000f00
+ orr ip, ip, r6
+ strlo ip, [r7, r3]
+ blo 2b
+ mov pc, lr
+ENDPROC(__fixup_phys_virt)
+
+1: .word .
+ .word __pv_table_begin
+ .word __pv_table_end
+#endif
+
#include "head-common.S"
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index cead889..fb32c9d 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -57,6 +57,10 @@ SECTIONS
__smpalt_end = .;
#endif
+ __pv_table_begin = .;
+ *(.pv_table)
+ __pv_table_end = .;
+
INIT_SETUP(16)
INIT_CALLS
^ permalink raw reply related [flat|nested] 9+ messages in thread* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-10 17:55 ` Russell King - ARM Linux
@ 2010-11-10 20:23 ` Nicolas Pitre
2010-11-10 20:37 ` Russell King - ARM Linux
0 siblings, 1 reply; 9+ messages in thread
From: Nicolas Pitre @ 2010-11-10 20:23 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, 8 Nov 2010, Russell King - ARM Linux wrote:
> noting that:
>
> add rd, rn, #PAGE_OFFSET - PHYS_OFFSET
> sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
>
> are equivalent.
>
> We can do better than this - just make sure that all virt_to_phys() are an
> add instruction, and all phys_to_virt() are a sub struction. Then we only
> need to fixup the constant. IOW, virt_to_phys() is:
>
> add rd, rn, #PHYS_OFFSET - PAGE_OFFSET
>
> and phys_to_virt() is:
>
> sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
Does this work even if PHYS_OFFSET - PAGE_OFFSET goes negative?
Shouldn't we need to switch the encoding of an add into a sub and vice
versa with a positive immediate value in that case? (I don't have the
instruction encoding info handy at the moment).
On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:
> Here's something which uses the above ideas (untested). I think this is
> something we can (and should) do unconditionally for the !XIP cases.
What do you mean by "unconditionally"?
> We also need to fixup modules in a similar manner, so we want to place
> the __fixup_pv_table function in the .text, and give it a wrapper
> along the lines of:
[...]
Modules... of course. I hadn't thought about them.
> Second version - let's get _all_ v:p translations, not just virt_to_phys
> and phys_to_virt (iow, __phys_to_virt/__virt_to_phys/__pa/__va) which will
> include all the page table manipulations, default bus<->virt translations.
> This results in a _much_ bigger fixup table.
>
> It also results in no fixup table being generated for platforms which
> have complex v:p translations (cns3xxx, iop13xx, ixp2xxx, ks8695,
> realview, s3c24a0).
Obviously.
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index dd6b369..bcc502f 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -93,6 +93,9 @@ ENTRY(stext)
> #ifdef CONFIG_SMP_ON_UP
> bl __fixup_smp
> #endif
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
> + bl __fixup_pv_table
> +#endif
> bl __create_page_tables
>
> /*
> @@ -426,4 +429,37 @@ smp_on_up:
>
> #endif
>
> +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
> +
> +/* __fixup_pv_table - patch the stub instructions with the delta between
> + * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
> + * can be expressed by an immediate shifter operand. The stub instruction
> + * has a form of '(add|sub) rd, rn, #imm'.
> + */
> +__fixup_pv_table:
> + adr r0, 1f
> + ldmia r0, {r3-r5}
> + sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET
> + mov r6, r3, lsr #24 @ constant for add/sub instructions
> + teq r3, r6, lsl #24 @ must be 16MiB aligned
> + bne __error
> + orr r6, r6, #0x400 @ mask in rotate right 8 bits
> + add r4, r4, r3
> + add r5, r5, r3
> +2: cmp r4, r5
> + ldrlo r7, [r4], #4
> + ldrlo ip, [r7, r3]
> + bic ip, ip, #0x000000ff
> + bic ip, ip, #0x00000f00
> + orr ip, ip, r6
The above 3 insns could be replaced with:
mov ip, ip, lsr #12
orr ip, r6, ip, lsl #12
> + strlo ip, [r7, r3]
> + blo 2b
> + mov pc, lr
> +ENDPROC(__fixup_phys_virt)
Nicolas
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-10 20:23 ` Nicolas Pitre
@ 2010-11-10 20:37 ` Russell King - ARM Linux
2010-11-10 21:43 ` Nicolas Pitre
0 siblings, 1 reply; 9+ messages in thread
From: Russell King - ARM Linux @ 2010-11-10 20:37 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 10, 2010 at 03:23:50PM -0500, Nicolas Pitre wrote:
> > add rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> >
> > and phys_to_virt() is:
> >
> > sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
>
> Does this work even if PHYS_OFFSET - PAGE_OFFSET goes negative?
Yes.
add rd, rn, #0xc0000000
sub rd, rn, #0x40000000
are identical operations. The operand is an 8 bit constant, zero padded,
rotated right by (in this case) 8 bits - there can be no sign extension.
> On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:
> > Here's something which uses the above ideas (untested). I think this is
> > something we can (and should) do unconditionally for the !XIP cases.
>
> What do you mean by "unconditionally"?
For any machine where the v:p offset is respresentable. It means that
we increase the amount of testing, and actually this becomes the
standard way.
> > +2: cmp r4, r5
> > + ldrlo r7, [r4], #4
> > + ldrlo ip, [r7, r3]
> > + bic ip, ip, #0x000000ff
> > + bic ip, ip, #0x00000f00
> > + orr ip, ip, r6
>
> The above 3 insns could be replaced with:
>
> mov ip, ip, lsr #12
> orr ip, r6, ip, lsl #12
They could be, but it's not worth optimizing to that extent - we're
already hitting load delays for both of those ldr instructions that
this isn't going to be blindingly fast... at least there aren't
thousands of them to fixup.
^ permalink raw reply [flat|nested] 9+ messages in thread
* [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa
2010-11-10 20:37 ` Russell King - ARM Linux
@ 2010-11-10 21:43 ` Nicolas Pitre
0 siblings, 0 replies; 9+ messages in thread
From: Nicolas Pitre @ 2010-11-10 21:43 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:
> On Wed, Nov 10, 2010 at 03:23:50PM -0500, Nicolas Pitre wrote:
> > > add rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> > >
> > > and phys_to_virt() is:
> > >
> > > sub rd, rn, #PHYS_OFFSET - PAGE_OFFSET
> >
> > Does this work even if PHYS_OFFSET - PAGE_OFFSET goes negative?
>
> Yes.
>
> add rd, rn, #0xc0000000
> sub rd, rn, #0x40000000
>
> are identical operations. The operand is an 8 bit constant, zero padded,
> rotated right by (in this case) 8 bits - there can be no sign extension.
OK.
> > On Wed, 10 Nov 2010, Russell King - ARM Linux wrote:
> > > Here's something which uses the above ideas (untested). I think this is
> > > something we can (and should) do unconditionally for the !XIP cases.
> >
> > What do you mean by "unconditionally"?
>
> For any machine where the v:p offset is respresentable. It means that
> we increase the amount of testing, and actually this becomes the
> standard way.
Well, sure. This can become the default, but I wouldn't remove the
ability to optimize everything at compile time for those who prefer the
tightest kernel possible.
> > > +2: cmp r4, r5
> > > + ldrlo r7, [r4], #4
> > > + ldrlo ip, [r7, r3]
> > > + bic ip, ip, #0x000000ff
> > > + bic ip, ip, #0x00000f00
> > > + orr ip, ip, r6
> >
> > The above 3 insns could be replaced with:
> >
> > mov ip, ip, lsr #12
> > orr ip, r6, ip, lsl #12
>
> They could be, but it's not worth optimizing to that extent - we're
> already hitting load delays for both of those ldr instructions that
> this isn't going to be blindingly fast... at least there aren't
> thousands of them to fixup.
Sure. This is not critical, but even worse is the fact that this is all
running with all caches off. So having one less insn in the loop may
help, even if only to serve as a good ARM coding example (will be more
significant with thumb2).
Nicolas
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2010-11-10 21:43 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-05 18:40 [RFC PATCH v3] ARM: Introduce patching of phys_to_virt and vice versa Eric Miao
2010-11-07 16:48 ` Russell King - ARM Linux
2010-11-08 4:38 ` Nicolas Pitre
2010-11-08 11:49 ` Russell King - ARM Linux
2010-11-10 16:45 ` Russell King - ARM Linux
2010-11-10 17:55 ` Russell King - ARM Linux
2010-11-10 20:23 ` Nicolas Pitre
2010-11-10 20:37 ` Russell King - ARM Linux
2010-11-10 21:43 ` Nicolas Pitre
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).