From mboxrd@z Thu Jan 1 00:00:00 1970 From: r.sricharan@ti.com (Sricharan R) Date: Fri, 4 Oct 2013 11:07:33 +0530 Subject: [PATCH v3 4/6] ARM: mm: Correct virt_to_phys patching for 64 bit physical addresses In-Reply-To: References: <1380835081-12129-1-git-send-email-santosh.shilimkar@ti.com> <1380835081-12129-5-git-send-email-santosh.shilimkar@ti.com> Message-ID: <524E541D.2030509@ti.com> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org Hi, On Friday 04 October 2013 05:47 AM, Nicolas Pitre wrote: > On Thu, 3 Oct 2013, Santosh Shilimkar wrote: > >> From: Sricharan R >> >> The current phys_to_virt patching mechanism works only for 32 bit >> physical addresses and this patch extends the idea for 64bit physical >> addresses. >> >> The 64bit v2p patching mechanism patches the higher 8 bits of physical >> address with a constant using 'mov' instruction and lower 32bits are patched >> using 'add'. While this is correct, in those platforms where the lowmem addressable >> physical memory spawns across 4GB boundary, a carry bit can be produced as a >> result of addition of lower 32bits. This has to be taken in to account and added >> in to the upper. The patched __pv_offset and va are added in lower 32bits, where >> __pv_offset can be in two's complement form when PA_START < VA_START and that can >> result in a false carry bit. >> >> e.g >> 1) PA = 0x80000000; VA = 0xC0000000 >> __pv_offset = PA - VA = 0xC0000000 (2's complement) >> >> 2) PA = 0x2 80000000; VA = 0xC000000 >> __pv_offset = PA - VA = 0x1 C0000000 >> >> So adding __pv_offset + VA should never result in a true overflow for (1). >> So in order to differentiate between a true carry, a __pv_offset is extended >> to 64bit and the upper 32bits will have 0xffffffff if __pv_offset is >> 2's complement. So 'mvn #0' is inserted instead of 'mov' while patching >> for the same reason. Since mov, add, sub instruction are to patched >> with different constants inside the same stub, the rotation field >> of the opcode is using to differentiate between them. >> >> So the above examples for v2p translation becomes for VA=0xC0000000, >> 1) PA[63:32] = 0xffffffff >> PA[31:0] = VA + 0xC0000000 --> results in a carry >> PA[63:32] = PA[63:32] + carry >> >> PA[63:0] = 0x0 80000000 >> >> 2) PA[63:32] = 0x1 >> PA[31:0] = VA + 0xC0000000 --> results in a carry >> PA[63:32] = PA[63:32] + carry >> >> PA[63:0] = 0x2 80000000 >> >> The above ideas were suggested by Nicolas Pitre as >> part of the review of first and second versions of the subject patch. >> >> There is no corresponding change on the phys_to_virt() side, because >> computations on the upper 32-bits would be discarded anyway. >> >> Cc: Nicolas Pitre >> Cc: Russell King >> >> Signed-off-by: Sricharan R >> Signed-off-by: Santosh Shilimkar > Almost there ... > >> --- >> arch/arm/include/asm/memory.h | 35 +++++++++++++++++++++--- >> arch/arm/kernel/armksyms.c | 1 + >> arch/arm/kernel/head.S | 60 ++++++++++++++++++++++++++--------------- >> arch/arm/kernel/patch.c | 3 +++ >> 4 files changed, 75 insertions(+), 24 deletions(-) >> >> diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h >> index d9b96c65..942ad84 100644 >> --- a/arch/arm/include/asm/memory.h >> +++ b/arch/arm/include/asm/memory.h >> @@ -172,9 +172,12 @@ >> * so that all we need to do is modify the 8-bit constant field. >> */ >> #define __PV_BITS_31_24 0x81000000 >> +#define __PV_BITS_7_0 0x81 >> >> extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x); >> -extern unsigned long __pv_phys_offset; >> +extern u64 __pv_phys_offset; >> +extern u64 __pv_offset; >> + >> #define PHYS_OFFSET __pv_phys_offset >> >> #define __pv_stub(from,to,instr,type) \ >> @@ -186,10 +189,36 @@ extern unsigned long __pv_phys_offset; >> : "=r" (to) \ >> : "r" (from), "I" (type)) >> >> +#define __pv_stub_mov_hi(t) \ >> + __asm__ volatile("@ __pv_stub_mov\n" \ >> + "1: mov %R0, %1\n" \ >> + " .pushsection .pv_table,\"a\"\n" \ >> + " .long 1b\n" \ >> + " .popsection\n" \ >> + : "=r" (t) \ >> + : "I" (__PV_BITS_7_0)) >> + >> +#define __pv_add_carry_stub(x, y) \ >> + __asm__ volatile("@ __pv_add_carry_stub\n" \ >> + "1: adds %Q0, %1, %2\n" \ >> + " adc %R0, %R0, #0\n" \ >> + " .pushsection .pv_table,\"a\"\n" \ >> + " .long 1b\n" \ >> + " .popsection\n" \ >> + : "+r" (y) \ >> + : "r" (x), "I" (__PV_BITS_31_24) \ > The third operand i.e. __PV_BITS_31_24 is useless here. This is used to encode the correct rotation and we use this in the patching code to identify the the offset to be patched. >> + : "cc") >> + >> static inline phys_addr_t __virt_to_phys(unsigned long x) >> { >> - unsigned long t; >> - __pv_stub(x, t, "add", __PV_BITS_31_24); >> + phys_addr_t t; >> + >> + if (sizeof(phys_addr_t) == 4) { >> + __pv_stub(x, t, "add", __PV_BITS_31_24); >> + } else { >> + __pv_stub_mov_hi(t); >> + __pv_add_carry_stub(x, t); >> + } >> return t; >> } >> >> diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c >> index 60d3b73..1f031dd 100644 >> --- a/arch/arm/kernel/armksyms.c >> +++ b/arch/arm/kernel/armksyms.c >> @@ -155,4 +155,5 @@ EXPORT_SYMBOL(__gnu_mcount_nc); >> >> #ifdef CONFIG_ARM_PATCH_PHYS_VIRT >> EXPORT_SYMBOL(__pv_phys_offset); >> +EXPORT_SYMBOL(__pv_offset); >> #endif >> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S >> index 2c7cc1e..90d04d7 100644 >> --- a/arch/arm/kernel/head.S >> +++ b/arch/arm/kernel/head.S >> @@ -536,6 +536,14 @@ ENTRY(fixup_smp) >> ldmfd sp!, {r4 - r6, pc} >> ENDPROC(fixup_smp) >> >> +#ifdef __ARMEB_ >> +#define LOW_OFFSET 0x4 >> +#define HIGH_OFFSET 0x0 >> +#else >> +#define LOW_OFFSET 0x0 >> +#define HIGH_OFFSET 0x4 >> +#endif >> + >> #ifdef CONFIG_ARM_PATCH_PHYS_VIRT >> >> /* __fixup_pv_table - patch the stub instructions with the delta between >> @@ -546,17 +554,20 @@ ENDPROC(fixup_smp) >> __HEAD >> __fixup_pv_table: >> adr r0, 1f >> - ldmia r0, {r3-r5, r7} >> - sub r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET >> + ldmia r0, {r3-r7} >> + mvn ip, #0 >> + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET >> add r4, r4, r3 @ adjust table start address >> add r5, r5, r3 @ adjust table end address >> - add r7, r7, r3 @ adjust __pv_phys_offset address >> - str r8, [r7] @ save computed PHYS_OFFSET to __pv_phys_offset >> + add r6, r6, r3 @ adjust __pv_phys_offset address >> + add r7, r7, r3 @ adjust __pv_offset address >> + str r8, [r6, #LOW_OFFSET] @ save computed PHYS_OFFSET to __pv_phys_offset >> + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits >> mov r6, r3, lsr #24 @ constant for add/sub instructions >> teq r3, r6, lsl #24 @ must be 16MiB aligned >> THUMB( it ne @ cross section branch ) >> bne __error >> - str r6, [r7, #4] @ save to __pv_offset >> + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits >> b __fixup_a_pv_table >> ENDPROC(__fixup_pv_table) >> >> @@ -565,9 +576,18 @@ ENDPROC(__fixup_pv_table) >> .long __pv_table_begin >> .long __pv_table_end >> 2: .long __pv_phys_offset >> + .long __pv_offset >> >> .text >> __fixup_a_pv_table: >> + adr r0, 3f >> + ldr r6, [r0] >> + add r6, r6, r3 >> + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word >> + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word >> + mov r6, r6, lsr #24 >> + cmn r0, #1 >> + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction >> #ifdef CONFIG_THUMB2_KERNEL >> lsls r6, #24 >> beq 2f >> @@ -582,9 +602,15 @@ __fixup_a_pv_table: >> b 2f >> 1: add r7, r3 >> ldrh ip, [r7, #2] >> - and ip, 0x8f00 >> - orr ip, r6 @ mask in offset bits 31-24 >> + tst ip, #0x4000 >> + and ip, #0x8f00 >> + orrne ip, r6 @ mask in offset bits 31-24 >> + orreq ip, r0 @ mask in offset bits 7-0 >> strh ip, [r7, #2] >> + ldrheq ip, [r7] >> + biceq ip, #0x20 >> + orreq ip, ip, r0, lsr #16 >> + strheq ip, [r7] >> 2: cmp r4, r5 >> ldrcc r7, [r4], #4 @ use branch for delay slot >> bcc 1b >> @@ -593,7 +619,10 @@ __fixup_a_pv_table: >> b 2f >> 1: ldr ip, [r7, r3] >> bic ip, ip, #0x000000ff >> - orr ip, ip, r6 @ mask in offset bits 31-24 >> + tst ip, #0xf00 @ check the rotation field >> + orrne ip, ip, r6 @ mask in offset bits 31-24 >> + biceq ip, ip, #0x400000 @ clear bit 22 >> + orreq ip, ip, r0 @ mask in offset bits 7-0 >> str ip, [r7, r3] >> 2: cmp r4, r5 >> ldrcc r7, [r4], #4 @ use branch for delay slot >> @@ -602,28 +631,17 @@ __fixup_a_pv_table: >> #endif >> ENDPROC(__fixup_a_pv_table) >> >> +3: .long __pv_offset >> + >> ENTRY(fixup_pv_table) >> stmfd sp!, {r4 - r7, lr} >> - ldr r2, 2f @ get address of __pv_phys_offset >> mov r3, #0 @ no offset >> mov r4, r0 @ r0 = table start >> add r5, r0, r1 @ r1 = table size >> - ldr r6, [r2, #4] @ get __pv_offset >> bl __fixup_a_pv_table >> ldmfd sp!, {r4 - r7, pc} >> ENDPROC(fixup_pv_table) >> >> - .align >> -2: .long __pv_phys_offset >> - >> - .data >> - .globl __pv_phys_offset >> - .type __pv_phys_offset, %object >> -__pv_phys_offset: >> - .long 0 >> - .size __pv_phys_offset, . - __pv_phys_offset >> -__pv_offset: >> - .long 0 >> #endif >> >> #include "head-common.S" >> diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c >> index 07314af..8356312 100644 >> --- a/arch/arm/kernel/patch.c >> +++ b/arch/arm/kernel/patch.c >> @@ -8,6 +8,9 @@ >> >> #include "patch.h" >> >> +u64 __pv_phys_offset __attribute__((section(".data"))); >> +u64 __pv_offset __attribute__((section(".data"))); > Please add a comment explaining why you force those variables out of the > .bss section. This is unlikely to be obvious to people. > > In fact, is there a reason why you moved those out of head.S? You only > needed to replace the .long with .quad to match the u64 type. > > I think I might have suggested moving them out if they were to be typed > with phys_addr_t, but using a fixed u64 is simpler. Yes, I moved it here after your comments :-) . Since it is always u64 i can move it to head.S with quad as well. Regards, Sricharan