From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:48143) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Z0sf2-0006UD-N3 for qemu-devel@nongnu.org; Fri, 05 Jun 2015 10:31:55 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Z0sem-0007g8-Gi for qemu-devel@nongnu.org; Fri, 05 Jun 2015 10:31:52 -0400 Received: from greensocs.com ([193.104.36.180]:59342) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Z0sel-0007fH-KO for qemu-devel@nongnu.org; Fri, 05 Jun 2015 10:31:36 -0400 From: fred.konrad@greensocs.com Date: Fri, 5 Jun 2015 16:31:18 +0200 Message-Id: <1433514678-4464-1-git-send-email-fred.konrad@greensocs.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Subject: [Qemu-devel] [RFC PATCH] Use atomic cmpxchg to atomically check the exclusive value in a STREX List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org, mttcg@listserver.greensocs.com Cc: peter.maydell@linaro.org, mark.burton@greensocs.com, agraf@suse.de, guillaume.delbergue@greensocs.com, pbonzini@redhat.com, alex.bennee@linaro.org, fred.konrad@greensocs.com From: KONRAD Frederic This mechanism replaces the existing load/store exclusive mechanism which= seems to be broken for multithread. It follows the intention of the existing mechanism and stores the target = address and data values during a load operation and checks that they remain uncha= nged before a store. In common with the older approach, this provides weaker semantics than re= quired in that it could be that a different processor writes the same value as a non-exclusive write, however in practise this seems to be irrelevant. The old implementation didn=E2=80=99t correctly store it=E2=80=99s values= as globals, but rather kept a local copy per CPU. This new mechanism stores the values globally and also uses the atomic cm= pxchg macros to ensure atomicity - it is therefore very efficient and threadsaf= e. Signed-off-by: KONRAD Frederic --- target-arm/cpu.c | 21 +++++++ target-arm/cpu.h | 6 ++ target-arm/helper.c | 12 ++++ target-arm/helper.h | 6 ++ target-arm/op_helper.c | 146 +++++++++++++++++++++++++++++++++++++++++++= +++++- target-arm/translate.c | 98 ++++++--------------------------- 6 files changed, 207 insertions(+), 82 deletions(-) diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 4a888ab..0400061 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -31,6 +31,26 @@ #include "sysemu/kvm.h" #include "kvm_arm.h" =20 +/* Protect cpu_exclusive_* variable .*/ +__thread bool cpu_have_exclusive_lock; +QemuMutex cpu_exclusive_lock; + +inline void arm_exclusive_lock(void) +{ + if (!cpu_have_exclusive_lock) { + qemu_mutex_lock(&cpu_exclusive_lock); + cpu_have_exclusive_lock =3D true; + } +} + +inline void arm_exclusive_unlock(void) +{ + if (cpu_have_exclusive_lock) { + cpu_have_exclusive_lock =3D false; + qemu_mutex_unlock(&cpu_exclusive_lock); + } +} + static void arm_cpu_set_pc(CPUState *cs, vaddr value) { ARMCPU *cpu =3D ARM_CPU(cs); @@ -425,6 +445,7 @@ static void arm_cpu_initfn(Object *obj) cpu->psci_version =3D 2; /* TCG implements PSCI 0.2 */ if (!inited) { inited =3D true; + qemu_mutex_init(&cpu_exclusive_lock); arm_translate_init(); } } diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 21b5b8e..257ed05 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -506,6 +506,9 @@ static inline bool is_a64(CPUARMState *env) int cpu_arm_signal_handler(int host_signum, void *pinfo, void *puc); =20 +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int access= _type, + hwaddr *phys_ptr, int *prot, target_ulong *page_si= ze); + /** * pmccntr_sync * @env: CPUARMState @@ -1922,4 +1925,7 @@ enum { QEMU_PSCI_CONDUIT_HVC =3D 2, }; =20 +void arm_exclusive_lock(void); +void arm_exclusive_unlock(void); + #endif diff --git a/target-arm/helper.c b/target-arm/helper.c index 3da0c05..31ee1e5 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -23,6 +23,14 @@ static inline int get_phys_addr(CPUARMState *env, targ= et_ulong address, #define PMCRE 0x1 #endif =20 +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int access= _type, + hwaddr *phys_ptr, int *prot, target_ulong *page_si= ze) +{ + MemTxAttrs attrs =3D {}; + return get_phys_addr(env, address, access_type, cpu_mmu_index(env), + phys_ptr, &attrs, prot, page_size); +} + static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) { int nregs; @@ -4731,6 +4739,10 @@ void arm_cpu_do_interrupt(CPUState *cs) =20 arm_log_exception(cs->exception_index); =20 + arm_exclusive_lock(); + env->exclusive_addr =3D -1; + arm_exclusive_unlock(); + if (arm_is_psci_call(cpu, cs->exception_index)) { arm_handle_psci_call(cpu); qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); diff --git a/target-arm/helper.h b/target-arm/helper.h index fc885de..63c2809 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -529,6 +529,12 @@ DEF_HELPER_2(dc_zva, void, env, i64) DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) =20 +DEF_HELPER_4(atomic_cmpxchg64, i32, env, i32, i64, i32) +DEF_HELPER_2(atomic_check, i32, env, i32) +DEF_HELPER_1(atomic_release, void, env) +DEF_HELPER_1(atomic_clear, void, env) +DEF_HELPER_3(atomic_claim, void, env, i32, i64) + #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 7583ae7..81dd403 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -30,12 +30,157 @@ static void raise_exception(CPUARMState *env, uint32= _t excp, CPUState *cs =3D CPU(arm_env_get_cpu(env)); =20 assert(!excp_is_internal(excp)); + arm_exclusive_lock(); cs->exception_index =3D excp; env->exception.syndrome =3D syndrome; env->exception.target_el =3D target_el; + /* + * We MAY already have the lock - in which case we are exiting the + * instruction due to an exception. Otherwise we better make sure we= are not + * about to enter a STREX anyway. + */ + env->exclusive_addr =3D -1; + arm_exclusive_unlock(); cpu_loop_exit(cs); } =20 +/* NB return 1 for fail, 0 for pass */ +uint32_t HELPER(atomic_cmpxchg64)(CPUARMState *env, uint32_t addr, + uint64_t newval, uint32_t size) +{ + ARMCPU *cpu =3D arm_env_get_cpu(env); + CPUState *cs =3D CPU(cpu); + + bool result =3D false; + hwaddr len =3D 8 << size; + + hwaddr paddr; + target_ulong page_size; + int prot; + + arm_exclusive_lock(); + + if (env->exclusive_addr !=3D addr) { + arm_exclusive_unlock(); + return 1; + } + + if (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) !=3D = 0) { + tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, cpu_mmu_index(e= nv), 0); + if (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) = !=3D 0) { + arm_exclusive_unlock(); + return 1; + } + } + + switch (size) { + case 0: + { + uint8_t oldval, *p; + p =3D address_space_map(cs->as, paddr, &len, true); + if (len =3D=3D 8 << size) { + oldval =3D (uint8_t)env->exclusive_val; + result =3D (atomic_cmpxchg(p, oldval, (uint8_t)newval) =3D=3D= oldval); + } + address_space_unmap(cs->as, p, len, true, result ? 8 : 0); + } + break; + case 1: + { + uint16_t oldval, *p; + p =3D address_space_map(cs->as, paddr, &len, true); + if (len =3D=3D 8 << size) { + oldval =3D (uint16_t)env->exclusive_val; + result =3D (atomic_cmpxchg(p, oldval, (uint16_t)newval) =3D=3D= oldval); + } + address_space_unmap(cs->as, p, len, true, result ? 8 : 0); + } + break; + case 2: + { + uint32_t oldval, *p; + p =3D address_space_map(cs->as, paddr, &len, true); + if (len =3D=3D 8 << size) { + oldval =3D (uint32_t)env->exclusive_val; + result =3D (atomic_cmpxchg(p, oldval, (uint32_t)newval) =3D=3D= oldval); + } + address_space_unmap(cs->as, p, len, true, result ? 8 : 0); + } + break; + case 3: + { + uint64_t oldval, *p; + p =3D address_space_map(cs->as, paddr, &len, true); + if (len =3D=3D 8 << size) { + oldval =3D (uint64_t)env->exclusive_val; + result =3D (atomic_cmpxchg(p, oldval, (uint64_t)newval) =3D=3D= oldval); + } + address_space_unmap(cs->as, p, len, true, result ? 8 : 0); + } + break; + default: + abort(); + break; + } + + env->exclusive_addr =3D -1; + arm_exclusive_unlock(); + if (result) { + return 0; + } else { + return 1; + } +} + + +uint32_t HELPER(atomic_check)(CPUARMState *env, uint32_t addr) +{ + arm_exclusive_lock(); + if (env->exclusive_addr =3D=3D addr) { + return true; + } + /* we failed to keep the address tagged, so we fail */ + env->exclusive_addr =3D -1; /* for efficiency */ + arm_exclusive_unlock(); + return false; +} + +void HELPER(atomic_release)(CPUARMState *env) +{ + env->exclusive_addr =3D -1; + arm_exclusive_unlock(); +} + +void HELPER(atomic_clear)(CPUARMState *env) +{ + /* make sure no STREX is about to start */ + arm_exclusive_lock(); + env->exclusive_addr =3D -1; + arm_exclusive_unlock(); +} + + +void HELPER(atomic_claim)(CPUARMState *env, uint32_t addr, uint64_t val) +{ + CPUState *cpu; + CPUARMState *current_cpu; + + /* ensure that there are no STREX's executing */ + arm_exclusive_lock(); + + CPU_FOREACH(cpu) { + current_cpu =3D &ARM_CPU(cpu)->env; + if (current_cpu->exclusive_addr =3D=3D addr) { + /* We steal the atomic of this CPU. */ + current_cpu->exclusive_addr =3D -1; + } + } + + env->exclusive_val =3D val; + env->exclusive_addr =3D addr; + arm_exclusive_unlock(); +} + static int exception_target_el(CPUARMState *env) { int target_el =3D MAX(1, arm_current_el(env)); @@ -582,7 +727,6 @@ void HELPER(exception_return)(CPUARMState *env) =20 aarch64_save_sp(env, cur_el); =20 - env->exclusive_addr =3D -1; =20 /* We must squash the PSTATE.SS bit to zero unless both of the * following hold: diff --git a/target-arm/translate.c b/target-arm/translate.c index 39692d7..ba4eb65 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -65,8 +65,8 @@ TCGv_ptr cpu_env; static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; static TCGv_i32 cpu_R[16]; static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; -static TCGv_i64 cpu_exclusive_addr; static TCGv_i64 cpu_exclusive_val; +static TCGv_i64 cpu_exclusive_addr; #ifdef CONFIG_USER_ONLY static TCGv_i64 cpu_exclusive_test; static TCGv_i32 cpu_exclusive_info; @@ -7391,6 +7391,7 @@ static void gen_load_exclusive(DisasContext *s, int= rt, int rt2, TCGv_i32 addr, int size) { TCGv_i32 tmp =3D tcg_temp_new_i32(); + TCGv_i64 val =3D tcg_temp_new_i64(); =20 s->is_ldex =3D true; =20 @@ -7415,20 +7416,20 @@ static void gen_load_exclusive(DisasContext *s, i= nt rt, int rt2, =20 tcg_gen_addi_i32(tmp2, addr, 4); gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s)); + tcg_gen_concat_i32_i64(val, tmp, tmp3); tcg_temp_free_i32(tmp2); - tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3); store_reg(s, rt2, tmp3); } else { - tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp); + tcg_gen_extu_i32_i64(val, tmp); } - + gen_helper_atomic_claim(cpu_env, addr, val); + tcg_temp_free_i64(val); store_reg(s, rt, tmp); - tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr); } =20 static void gen_clrex(DisasContext *s) { - tcg_gen_movi_i64(cpu_exclusive_addr, -1); + gen_helper_atomic_clear(cpu_env); } =20 #ifdef CONFIG_USER_ONLY @@ -7445,84 +7446,19 @@ static void gen_store_exclusive(DisasContext *s, = int rd, int rt, int rt2, TCGv_i32 addr, int size) { TCGv_i32 tmp; - TCGv_i64 val64, extaddr; - TCGLabel *done_label; - TCGLabel *fail_label; - - /* if (env->exclusive_addr =3D=3D addr && env->exclusive_val =3D=3D = [addr]) { - [addr] =3D {Rt}; - {Rd} =3D 0; - } else { - {Rd} =3D 1; - } */ - fail_label =3D gen_new_label(); - done_label =3D gen_new_label(); - extaddr =3D tcg_temp_new_i64(); - tcg_gen_extu_i32_i64(extaddr, addr); - tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_la= bel); - tcg_temp_free_i64(extaddr); - - tmp =3D tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16u(tmp, addr, get_mem_index(s)); - break; - case 2: - case 3: - gen_aa32_ld32u(tmp, addr, get_mem_index(s)); - break; - default: - abort(); - } - - val64 =3D tcg_temp_new_i64(); - if (size =3D=3D 3) { - TCGv_i32 tmp2 =3D tcg_temp_new_i32(); - TCGv_i32 tmp3 =3D tcg_temp_new_i32(); - tcg_gen_addi_i32(tmp2, addr, 4); - gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s)); - tcg_temp_free_i32(tmp2); - tcg_gen_concat_i32_i64(val64, tmp, tmp3); - tcg_temp_free_i32(tmp3); - } else { - tcg_gen_extu_i32_i64(val64, tmp); - } - tcg_temp_free_i32(tmp); - - tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label= ); - tcg_temp_free_i64(val64); + TCGv_i32 tmp2; + TCGv_i64 val =3D tcg_temp_new_i64(); + TCGv_i32 tmp_size =3D tcg_const_i32(size); =20 tmp =3D load_reg(s, rt); - switch (size) { - case 0: - gen_aa32_st8(tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_st16(tmp, addr, get_mem_index(s)); - break; - case 2: - case 3: - gen_aa32_st32(tmp, addr, get_mem_index(s)); - break; - default: - abort(); - } + tmp2 =3D load_reg(s, rt2); + tcg_gen_concat_i32_i64(val, tmp, tmp2); tcg_temp_free_i32(tmp); - if (size =3D=3D 3) { - tcg_gen_addi_i32(addr, addr, 4); - tmp =3D load_reg(s, rt2); - gen_aa32_st32(tmp, addr, get_mem_index(s)); - tcg_temp_free_i32(tmp); - } - tcg_gen_movi_i32(cpu_R[rd], 0); - tcg_gen_br(done_label); - gen_set_label(fail_label); - tcg_gen_movi_i32(cpu_R[rd], 1); - gen_set_label(done_label); - tcg_gen_movi_i64(cpu_exclusive_addr, -1); + tcg_temp_free_i32(tmp2); + + gen_helper_atomic_cmpxchg64(cpu_R[rd], cpu_env, addr, val, tmp_size)= ; + tcg_temp_free_i64(val); + tcg_temp_free_i32(tmp_size); } #endif =20 --=20 1.9.0