From mboxrd@z Thu Jan 1 00:00:00 1970 From: gregory.clement@free-electrons.com (Gregory CLEMENT) Date: Wed, 05 Aug 2015 17:26:25 +0200 Subject: [PATCH 7/7] ARM: redo TTBR setup code for LPAE In-Reply-To: References: <20150506103000.GF2067@n2100.arm.linux.org.uk> Message-ID: <55C22B21.8000700@free-electrons.com> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org Hi Russell, On 06/05/2015 12:30, Russell King wrote: > Re-engineer the LPAE TTBR setup code. Rather than passing some shifted > address in order to fit in a CPU register, pass either a full physical > address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1). > > This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of > cpu_set_ttbr() in the secondary CPU startup code path (which was there > to re-set TTBR1 to the appropriate high physical address space on > Keystone2. It seems that since the merge of this commit the secondary CPUs on Armada XP failed to come online in Big Endian (my configuration is mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y). Once I found that this commit introduced the regression I also tested but with LPAE but I had the same result: only the first CPU boot. I don't know if there is something wrong with this patch or if it reveals something which was wrong in the Armada XP port. At a point I suspected something around the change in the get_arch_pgd function but I didn't find anything useful. Any guidance would be greatly appreciate. Thanks, Gregory > > Signed-off-by: Russell King > --- > arch/arm/include/asm/memory.h | 16 --------------- > arch/arm/include/asm/proc-fns.h | 7 ------- > arch/arm/include/asm/smp.h | 2 +- > arch/arm/kernel/head-nommu.S | 2 +- > arch/arm/kernel/head.S | 42 +++++++++++++++++++++++++++++----------- > arch/arm/kernel/smp.c | 10 ++++++---- > arch/arm/mach-keystone/platsmp.c | 13 ------------- > arch/arm/mm/proc-v7-2level.S | 6 +++--- > arch/arm/mm/proc-v7-3level.S | 14 +++++--------- > arch/arm/mm/proc-v7.S | 26 ++++++++++++------------- > 10 files changed, 60 insertions(+), 78 deletions(-) > > diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h > index 184def0e1652..3a72d69b3255 100644 > --- a/arch/arm/include/asm/memory.h > +++ b/arch/arm/include/asm/memory.h > @@ -18,8 +18,6 @@ > #include > #include > > -#include > - > #ifdef CONFIG_NEED_MACH_MEMORY_H > #include > #endif > @@ -133,20 +131,6 @@ > #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) > > /* > - * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed > - * around in head.S and proc-*.S are shifted by this amount, in order to > - * leave spare high bits for systems with physical address extension. This > - * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but > - * gives us about 38-bits or so. > - */ > -#ifdef CONFIG_ARM_LPAE > -#define ARCH_PGD_SHIFT L1_CACHE_SHIFT > -#else > -#define ARCH_PGD_SHIFT 0 > -#endif > -#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1) > - > -/* > * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical > * memory. This is used for XIP and NoMMU kernels, and on platforms that don't > * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use > diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h > index 5324c1112f3a..8877ad5ffe10 100644 > --- a/arch/arm/include/asm/proc-fns.h > +++ b/arch/arm/include/asm/proc-fns.h > @@ -125,13 +125,6 @@ extern void cpu_resume(void); > ttbr; \ > }) > > -#define cpu_set_ttbr(nr, val) \ > - do { \ > - u64 ttbr = val; \ > - __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \ > - : : "r" (ttbr)); \ > - } while (0) > - > #define cpu_get_pgd() \ > ({ \ > u64 pg = cpu_get_ttbr(0); \ > diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h > index 18f5a554134f..487aa08f31ee 100644 > --- a/arch/arm/include/asm/smp.h > +++ b/arch/arm/include/asm/smp.h > @@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void); > struct secondary_data { > union { > unsigned long mpu_rgn_szr; > - unsigned long pgdir; > + u64 pgdir; > }; > unsigned long swapper_pg_dir; > void *stack; > diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S > index aebfbf79a1a3..84da14b7cd04 100644 > --- a/arch/arm/kernel/head-nommu.S > +++ b/arch/arm/kernel/head-nommu.S > @@ -123,7 +123,7 @@ ENTRY(secondary_startup) > ENDPROC(secondary_startup) > > ENTRY(__secondary_switched) > - ldr sp, [r7, #8] @ set up the stack pointer > + ldr sp, [r7, #12] @ set up the stack pointer > mov fp, #0 > b secondary_start_kernel > ENDPROC(__secondary_switched) > diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S > index 3637973a9708..7304b4c44b52 100644 > --- a/arch/arm/kernel/head.S > +++ b/arch/arm/kernel/head.S > @@ -131,13 +131,30 @@ ENTRY(stext) > * The following calls CPU specific code in a position independent > * manner. See arch/arm/mm/proc-*.S for details. r10 = base of > * xxx_proc_info structure selected by __lookup_processor_type > - * above. On return, the CPU will be ready for the MMU to be > - * turned on, and r0 will hold the CPU control register value. > + * above. > + * > + * The processor init function will be called with: > + * r1 - machine type > + * r2 - boot data (atags/dt) pointer > + * r4 - translation table base (low word) > + * r5 - translation table base (high word, if LPAE) > + * r8 - translation table base 1 (pfn if LPAE) > + * r9 - cpuid > + * r13 - virtual address for __enable_mmu -> __turn_mmu_on > + * > + * On return, the CPU will be ready for the MMU to be turned on, > + * r0 will hold the CPU control register value, r1, r2, r4, and > + * r9 will be preserved. r5 will also be preserved if LPAE. > */ > ldr r13, =__mmap_switched @ address to jump to after > @ mmu has been enabled > adr lr, BSYM(1f) @ return (PIC) address > +#ifdef CONFIG_ARM_LPAE > + mov r5, #0 @ high TTBR0 > + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn > +#else > mov r8, r4 @ set TTBR1 to swapper_pg_dir > +#endif > ldr r12, [r10, #PROCINFO_INITFUNC] > add r12, r12, r10 > ret r12 > @@ -158,7 +175,7 @@ ENDPROC(stext) > * > * Returns: > * r0, r3, r5-r7 corrupted > - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) > + * r4 = physical page table address > */ > __create_page_tables: > pgtbl r4, r8 @ page table address > @@ -333,7 +350,6 @@ __create_page_tables: > #endif > #ifdef CONFIG_ARM_LPAE > sub r4, r4, #0x1000 @ point to the PGD table > - mov r4, r4, lsr #ARCH_PGD_SHIFT > #endif > ret lr > ENDPROC(__create_page_tables) > @@ -381,9 +397,9 @@ ENTRY(secondary_startup) > adr r4, __secondary_data > ldmia r4, {r5, r7, r12} @ address to jump to after > sub lr, r4, r5 @ mmu has been enabled > - ldr r4, [r7, lr] @ get secondary_data.pgdir > - add r7, r7, #4 > - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir > + add r3, r7, lr > + ldrd r4, [r3, #0] @ get secondary_data.pgdir > + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir > adr lr, BSYM(__enable_mmu) @ return address > mov r13, r12 @ __secondary_switched address > ldr r12, [r10, #PROCINFO_INITFUNC] > @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm) > * r6 = &secondary_data > */ > ENTRY(__secondary_switched) > - ldr sp, [r7, #4] @ get secondary_data.stack > + ldr sp, [r7, #12] @ get secondary_data.stack > mov fp, #0 > b secondary_start_kernel > ENDPROC(__secondary_switched) > @@ -416,12 +432,14 @@ __secondary_data: > /* > * Setup common bits before finally enabling the MMU. Essentially > * this is just loading the page table pointer and domain access > - * registers. > + * registers. All these registers need to be preserved by the > + * processor setup function (or set in the case of r0) > * > * r0 = cp#15 control register > * r1 = machine ID > * r2 = atags or dtb pointer > - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) > + * r4 = TTBR pointer (low word) > + * r5 = TTBR pointer (high word if LPAE) > * r9 = processor ID > * r13 = *virtual* address to jump to upon completion > */ > @@ -440,7 +458,9 @@ __enable_mmu: > #ifdef CONFIG_CPU_ICACHE_DISABLE > bic r0, r0, #CR_I > #endif > -#ifndef CONFIG_ARM_LPAE > +#ifdef CONFIG_ARM_LPAE > + mcrr p15, 0, r4, r5, c2 @ load TTBR0 > +#else > mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ > domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ > domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > index cca5b8758185..90dfbedfbfb8 100644 > --- a/arch/arm/kernel/smp.c > +++ b/arch/arm/kernel/smp.c > @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops) > > static unsigned long get_arch_pgd(pgd_t *pgd) > { > - phys_addr_t pgdir = virt_to_idmap(pgd); > - BUG_ON(pgdir & ARCH_PGD_MASK); > - return pgdir >> ARCH_PGD_SHIFT; > +#ifdef CONFIG_ARM_LPAE > + return __phys_to_pfn(virt_to_phys(pgd)); > +#else > + return virt_to_phys(pgd); > +#endif > } > > int __cpu_up(unsigned int cpu, struct task_struct *idle) > @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) > #endif > > #ifdef CONFIG_MMU > - secondary_data.pgdir = get_arch_pgd(idmap_pgd); > + secondary_data.pgdir = virt_to_phys(idmap_pgd); > secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); > #endif > sync_cache_w(&secondary_data); > diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c > index 5f46a7cf907b..4bbb18463bfd 100644 > --- a/arch/arm/mach-keystone/platsmp.c > +++ b/arch/arm/mach-keystone/platsmp.c > @@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu, > return error; > } > > -#ifdef CONFIG_ARM_LPAE > -static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) > -{ > - pgd_t *pgd0 = pgd_offset_k(0); > - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); > - local_flush_tlb_all(); > -} > -#else > -static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) > -{} > -#endif > - > struct smp_operations keystone_smp_ops __initdata = { > .smp_boot_secondary = keystone_smp_boot_secondary, > - .smp_secondary_init = keystone_smp_secondary_initmem, > }; > diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S > index 10405b8d31af..fa385140715f 100644 > --- a/arch/arm/mm/proc-v7-2level.S > +++ b/arch/arm/mm/proc-v7-2level.S > @@ -148,10 +148,10 @@ ENDPROC(cpu_v7_set_pte_ext) > * Macro for setting up the TTBRx and TTBCR registers. > * - \ttb0 and \ttb1 updated with the corresponding flags. > */ > - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp > + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp > mcr p15, 0, \zero, c2, c0, 2 @ TTB control register > - ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) > - ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) > + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) > + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) > ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) > ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) > mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 > diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S > index d3daed0ae0ad..5e5720e8bc5f 100644 > --- a/arch/arm/mm/proc-v7-3level.S > +++ b/arch/arm/mm/proc-v7-3level.S > @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext) > * Macro for setting up the TTBRx and TTBCR registers. > * - \ttbr1 updated. > */ > - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp > + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp > ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address > - mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT > - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? > - mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register > + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? > + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister > orr \tmp, \tmp, #TTB_EAE > ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) > ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) > @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext) > */ > orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ > mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR > - mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits > - mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits > + mov \tmp, \ttbr1, lsr #20 > + mov \ttbr1, \ttbr1, lsl #12 > addls \ttbr1, \ttbr1, #TTBR1_OFFSET > mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 > - mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits > - mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits > - mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0 > .endm > > /* > diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S > index 3d1054f11a8a..873230912894 100644 > --- a/arch/arm/mm/proc-v7.S > +++ b/arch/arm/mm/proc-v7.S > @@ -343,9 +343,9 @@ __v7_setup: > and r10, r0, #0xff000000 @ ARM? > teq r10, #0x41000000 > bne 3f > - and r5, r0, #0x00f00000 @ variant > + and r3, r0, #0x00f00000 @ variant > and r6, r0, #0x0000000f @ revision > - orr r6, r6, r5, lsr #20-4 @ combine variant and revision > + orr r6, r6, r3, lsr #20-4 @ combine variant and revision > ubfx r0, r0, #4, #12 @ primary part number > > /* Cortex-A8 Errata */ > @@ -354,7 +354,7 @@ __v7_setup: > bne 2f > #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) > > - teq r5, #0x00100000 @ only present in r1p* > + teq r3, #0x00100000 @ only present in r1p* > mrceq p15, 0, r10, c1, c0, 1 @ read aux control register > orreq r10, r10, #(1 << 6) @ set IBE to 1 > mcreq p15, 0, r10, c1, c0, 1 @ write aux control register > @@ -395,7 +395,7 @@ __v7_setup: > mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register > #endif > #ifdef CONFIG_ARM_ERRATA_743622 > - teq r5, #0x00200000 @ only present in r2p* > + teq r3, #0x00200000 @ only present in r2p* > mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register > orreq r10, r10, #1 << 6 @ set bit #6 > mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register > @@ -425,10 +425,10 @@ __v7_setup: > mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate > #ifdef CONFIG_MMU > mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs > - v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup > - ldr r5, =PRRR @ PRRR > + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup > + ldr r3, =PRRR @ PRRR > ldr r6, =NMRR @ NMRR > - mcr p15, 0, r5, c10, c2, 0 @ write PRRR > + mcr p15, 0, r3, c10, c2, 0 @ write PRRR > mcr p15, 0, r6, c10, c2, 1 @ write NMRR > #endif > dsb @ Complete invalidations > @@ -437,22 +437,22 @@ __v7_setup: > and r0, r0, #(0xf << 12) @ ThumbEE enabled field > teq r0, #(1 << 12) @ check if ThumbEE is present > bne 1f > - mov r5, #0 > - mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 > + mov r3, #0 > + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 > mrc p14, 6, r0, c0, c0, 0 @ load TEECR > orr r0, r0, #1 @ set the 1st bit in order to > mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access > 1: > #endif > - adr r5, v7_crval > - ldmia r5, {r5, r6} > + adr r3, v7_crval > + ldmia r3, {r3, r6} > ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables > #ifdef CONFIG_SWP_EMULATE > - orr r5, r5, #(1 << 10) @ set SW bit in "clear" > + orr r3, r3, #(1 << 10) @ set SW bit in "clear" > bic r6, r6, #(1 << 10) @ clear it in "mmuset" > #endif > mrc p15, 0, r0, c1, c0, 0 @ read control register > - bic r0, r0, r5 @ clear bits them > + bic r0, r0, r3 @ clear bits them > orr r0, r0, r6 @ set them > THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions > ret lr @ return to head.S:__ret > -- Gregory Clement, Free Electrons Kernel, drivers, real-time and embedded Linux development, consulting, training and support. http://free-electrons.com