From mboxrd@z Thu Jan 1 00:00:00 1970 From: nicolas.pitre@linaro.org (Nicolas Pitre) Date: Wed, 09 Jan 2013 19:20:40 -0500 Subject: [PATCH 05/16] ARM: bL_head: vlock-based first man election In-Reply-To: <1357777251-13541-1-git-send-email-nicolas.pitre@linaro.org> References: <1357777251-13541-1-git-send-email-nicolas.pitre@linaro.org> Message-ID: <1357777251-13541-6-git-send-email-nicolas.pitre@linaro.org> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org From: Dave Martin Instead of requiring the first man to be elected in advance (which can be suboptimal in some situations), this patch uses a per- cluster mutex to co-ordinate selection of the first man. This should also make it more feasible to reuse this code path for asynchronous cluster resume (as in CPUidle scenarios). Signed-off-by: Dave Martin Signed-off-by: Nicolas Pitre --- arch/arm/common/Makefile | 2 +- arch/arm/common/bL_head.S | 91 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 80 insertions(+), 13 deletions(-) diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 50880c494f..894c2ddf9b 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -15,4 +15,4 @@ obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_FIQ_GLUE) += fiq_glue.o fiq_glue_setup.o obj-$(CONFIG_FIQ_DEBUGGER) += fiq_debugger.o -obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o +obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o vlock.o diff --git a/arch/arm/common/bL_head.S b/arch/arm/common/bL_head.S index f7a64ac127..e70dd432e8 100644 --- a/arch/arm/common/bL_head.S +++ b/arch/arm/common/bL_head.S @@ -16,6 +16,8 @@ #include #include +#include "vlock.h" + .if BL_SYNC_CLUSTER_CPUS .error "cpus must be the first member of struct bL_cluster_sync_struct" .endif @@ -64,10 +66,11 @@ ENTRY(bL_entry_point) * position independent way. */ adr r5, 3f - ldmia r5, {r6, r7, r8} + ldmia r5, {r6, r7, r8, r11} add r6, r5, r6 @ r6 = bL_entry_vectors ldr r7, [r5, r7] @ r7 = bL_power_up_setup_phys add r8, r5, r8 @ r8 = bL_sync + add r11, r5, r11 @ r11 = first_man_locks mov r0, #BL_SYNC_CLUSTER_SIZE mla r8, r0, r10, r8 @ r8 = bL_sync cluster base @@ -83,11 +86,25 @@ ENTRY(bL_entry_point) @ At this point, the cluster cannot unexpectedly enter the GOING_DOWN @ state, because there is at least one active CPU (this CPU). - @ Check if the cluster has been set up yet: + mov r0, #.Lvlock_size + mla r11, r0, r10, r11 @ r11 = cluster first man lock + mov r0, r11 + mov r1, r9 @ cpu + bl vlock_trylock + + cmp r0, #0 @ failed to get the lock? + bne cluster_setup_wait @ wait for cluster setup if so + ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] - cmp r0, #CLUSTER_UP - beq cluster_already_up + cmp r0, #CLUSTER_UP @ cluster already up? + bne cluster_setup @ if not, set up the cluster + + @ Otherwise, release the first man lock and skip setup: + mov r0, r11 + bl vlock_unlock + b cluster_setup_complete +cluster_setup: @ Signal that the cluster is being brought up: mov r0, #INBOUND_COMING_UP strb r0, [r8, #BL_SYNC_CLUSTER_INBOUND] @@ -102,26 +119,47 @@ ENTRY(bL_entry_point) cluster_teardown_wait: ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] cmp r0, #CLUSTER_GOING_DOWN - wfeeq - beq cluster_teardown_wait + bne first_man_setup + wfe + b cluster_teardown_wait + +first_man_setup: + @ If the outbound gave up before teardown started, skip cluster setup: - @ power_up_setup is responsible for setting up the cluster: + cmp r0, #CLUSTER_UP + beq cluster_setup_leave + + @ power_up_setup is now responsible for setting up the cluster: cmp r7, #0 mov r0, #1 @ second (cluster) affinity level blxne r7 @ Call power_up_setup if defined + dsb + mov r0, #CLUSTER_UP + strb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] + +cluster_setup_leave: @ Leave the cluster setup critical section: - dsb mov r0, #INBOUND_NOT_COMING_UP strb r0, [r8, #BL_SYNC_CLUSTER_INBOUND] - mov r0, #CLUSTER_UP - strb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] dsb sev -cluster_already_up: + mov r0, r11 + bl vlock_unlock + b cluster_setup_complete + + @ In the contended case, non-first men wait here for cluster setup + @ to complete: +cluster_setup_wait: + ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER] + cmp r0, #CLUSTER_UP + wfene + bne cluster_setup_wait + +cluster_setup_complete: @ If a platform-specific CPU setup hook is needed, it is @ called from here. @@ -150,11 +188,40 @@ bL_entry_gated: 3: .word bL_entry_vectors - . .word bL_power_up_setup_phys - 3b .word bL_sync - 3b + .word first_man_locks - 3b ENDPROC(bL_entry_point) .bss - .align 5 + + @ Magic to size and align the first-man vlock structures + @ so that each does not cross a 1KB boundary. + @ We also must ensure that none of these shares a cacheline with + @ any data which might be accessed through the cache. + + .equ .Log2, 0 + .rept 11 + .if (1 << .Log2) < VLOCK_SIZE + .equ .Log2, .Log2 + 1 + .endif + .endr + .if .Log2 > 10 + .error "vlock struct is too large for guaranteed barrierless access ordering" + .endif + .equ .Lvlock_size, 1 << .Log2 + + @ The presence of two .align directives here is deliberate: we must + @ align to whichever of the two boundaries is larger: + .align __CACHE_WRITEBACK_ORDER + .align .Log2 +first_man_locks: + .rept BL_NR_CLUSTERS + .space .Lvlock_size + .endr + .size first_man_locks, . - first_man_locks + .type first_man_locks, #object + + .align __CACHE_WRITEBACK_ORDER .type bL_entry_vectors, #object ENTRY(bL_entry_vectors) -- 1.8.0