* [Qemu-devel] Fwd: [RFC] Adding multithreads to LDREX/STREX.
[not found] <1425400476-24047-1-git-send-email-fred.konrad@greensocs.com>
@ 2015-03-03 16:47 ` Mark Burton
2015-03-03 17:09 ` Paolo Bonzini
0 siblings, 1 reply; 3+ messages in thread
From: Mark Burton @ 2015-03-03 16:47 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: mttcg, QEMU Developers
[-- Attachment #1: Type: text/plain, Size: 11851 bytes --]
Paolo - here is a partially cleaned up patch - it’s still not quite right - but maybe its enough so that you can see what we’re doing.
There are changes in here that wouldn’t be sensible to upstream - like changing the address from a 64 to a 32 bit and needlessly moving it about - thats just because we’ve been trying to play with things to get them working.
Hopefully you can see the wood for the trees
Cheers
Mark.
> Begin forwarded message:
>
> From: fred.konrad@greensocs.com
> To: mark.burton@greensocs.com
> Cc: fred.konrad@greensocs.com
> Subject: [RFC] Adding multithreads to LDREX/STREX.
> Date: 3 March 2015 17:34:36 CET
>
> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> ---
> target-arm/cpu.c | 21 +++++++++++++++
> target-arm/cpu.h | 7 ++++-
> target-arm/helper.c | 4 +++
> target-arm/helper.h | 7 +++++
> target-arm/machine.c | 2 +-
> target-arm/op_helper.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++-
> target-arm/translate.c | 55 +++++++++++++++++++--------------------
> 7 files changed, 135 insertions(+), 31 deletions(-)
>
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index 285947f..e08c486 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -31,6 +31,26 @@
> #include "sysemu/kvm.h"
> #include "kvm_arm.h"
>
> +/* Protect cpu_exclusive_* variable .*/
> +__thread bool cpu_have_exclusive_lock = false;
> +QemuMutex cpu_exclusive_lock;
> +
> +inline void arm_exclusive_lock(void)
> +{
> + if (!cpu_have_exclusive_lock) {
> + qemu_mutex_lock(&cpu_exclusive_lock);
> + cpu_have_exclusive_lock = true;
> + }
> +}
> +
> +inline void arm_exclusive_unlock(void)
> +{
> + if (cpu_have_exclusive_lock) {
> + cpu_have_exclusive_lock = false;
> + qemu_mutex_unlock(&cpu_exclusive_lock);
> + }
> +}
> +
> static void arm_cpu_set_pc(CPUState *cs, vaddr value)
> {
> ARMCPU *cpu = ARM_CPU(cs);
> @@ -374,6 +394,7 @@ static void arm_cpu_initfn(Object *obj)
> cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
> if (!inited) {
> inited = true;
> + qemu_mutex_init(&cpu_exclusive_lock);
> arm_translate_init();
> }
> }
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index cd7a9e8..6b4c38e 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -450,8 +450,10 @@ typedef struct CPUARMState {
> float_status fp_status;
> float_status standard_fp_status;
> } vfp;
> - uint64_t exclusive_addr;
> + uint32_t exclusive_addr;
> uint64_t exclusive_val;
> + bool exclusive_in_store;
> + bool exclusive_store_fail;
> uint64_t exclusive_high;
> #if defined(CONFIG_USER_ONLY)
> uint64_t exclusive_test;
> @@ -1819,4 +1821,7 @@ enum {
> QEMU_PSCI_CONDUIT_HVC = 2,
> };
>
> +void arm_exclusive_lock(void);
> +void arm_exclusive_unlock(void);
> +
> #endif
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 151f96b..11a8d9a 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -4256,6 +4256,10 @@ void arm_cpu_do_interrupt(CPUState *cs)
>
> arm_log_exception(cs->exception_index);
>
> + arm_exclusive_lock();
> + env->exclusive_addr = -1;
> + arm_exclusive_unlock();
> +
> if (arm_is_psci_call(cpu, cs->exception_index)) {
> arm_handle_psci_call(cpu);
> qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index dec3728..fd8eb71 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -529,6 +529,13 @@ DEF_HELPER_2(dc_zva, void, env, i64)
> DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>
> +DEF_HELPER_0(exclusive_lock, void)
> +DEF_HELPER_0(exclusive_unlock, void)
> +DEF_HELPER_2(atomic_check, i32, env, i32)
> +DEF_HELPER_1(atomic_release, void, env)
> +DEF_HELPER_1(atomic_clear, void, env)
> +DEF_HELPER_2(atomic_claim, void, env, i32)
> +
> #ifdef TARGET_AARCH64
> #include "helper-a64.h"
> #endif
> diff --git a/target-arm/machine.c b/target-arm/machine.c
> index c29e7a2..3f9c9c8 100644
> --- a/target-arm/machine.c
> +++ b/target-arm/machine.c
> @@ -270,7 +270,7 @@ const VMStateDescription vmstate_arm_cpu = {
> VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU,
> cpreg_vmstate_array_len,
> 0, vmstate_info_uint64, uint64_t),
> - VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
> +// VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
> VMSTATE_UINT64(env.exclusive_val, ARMCPU),
> VMSTATE_UINT64(env.exclusive_high, ARMCPU),
> VMSTATE_UINT64(env.features, ARMCPU),
> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
> index 2bed914..4f0fcc1 100644
> --- a/target-arm/op_helper.c
> +++ b/target-arm/op_helper.c
> @@ -29,10 +29,78 @@ static void raise_exception(CPUARMState *env, int tt)
> ARMCPU *cpu = arm_env_get_cpu(env);
> CPUState *cs = CPU(cpu);
>
> + arm_exclusive_lock();
> + // we MAY already have the lock - in which case we are exiting the
> + // instruction due to an exception. Otherwise we better make sure we are not
> + // about to enter a STREX anyway
> + env->exclusive_addr=-1;
> + arm_exclusive_unlock();
> +
> cs->exception_index = tt;
> cpu_loop_exit(cs);
> }
>
> +void HELPER(exclusive_lock)(void)
> +{
> + arm_exclusive_lock();
> +}
> +
> +
> +void HELPER(exclusive_unlock)(void)
> +{
> + arm_exclusive_unlock();
> +}
> +
> +
> +uint32_t HELPER(atomic_check)(CPUARMState *env, uint32_t addr)
> +{
> + arm_exclusive_lock();
> +
> + if (env->exclusive_addr == addr) {
> + return true;
> + }
> + /* we failed to keep the address tagged, so we fail */
> + env->exclusive_addr = -1; // for efficiency
> + arm_exclusive_unlock();
> + return false;
> +}
> +
> +void HELPER(atomic_release)(CPUARMState *env)
> +{
> + env->exclusive_addr = -1;
> + env->exclusive_in_store = false;
> + arm_exclusive_unlock();
> +}
> +
> +void HELPER(atomic_clear)(CPUARMState *env)
> +{
> + /* make sure no STREX is about to start */
> + arm_exclusive_lock();
> + env->exclusive_in_store = false;
> + env->exclusive_addr = -1;
> + arm_exclusive_unlock();
> +}
> +
> +
> +void HELPER(atomic_claim)(CPUARMState *env, uint32_t addr)
> +{
> + CPUState *cpu;
> + CPUARMState *current_cpu;
> +
> + /* ensure that there are no STREX's executing */
> + arm_exclusive_lock();
> +
> + CPU_FOREACH(cpu) {
> + current_cpu = &ARM_CPU(cpu)->env;
> + if (current_cpu->exclusive_addr == addr) {
> + // We steal the atomic of this CPU.
> + current_cpu->exclusive_addr = -1;
> + }
> + }
> +
> + env->exclusive_addr = addr;
> +}
> +
> uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
> uint32_t rn, uint32_t maxindex)
> {
> @@ -469,7 +537,7 @@ void HELPER(exception_return)(CPUARMState *env)
>
> aarch64_save_sp(env, cur_el);
>
> - env->exclusive_addr = -1;
> + env->exclusive_store_fail = true;
>
> /* We must squash the PSTATE.SS bit to zero unless both of the
> * following hold:
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 04657c1..13c2599 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -7364,7 +7364,9 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
> {
> TCGv_i32 tmp = tcg_temp_new_i32();
>
> + /* FIXME: We need to make sure nobody has writen the exclusive address! */
> s->is_ldex = true;
> + gen_helper_atomic_claim(cpu_env, addr);
>
> switch (size) {
> case 0:
> @@ -7387,20 +7389,20 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>
> tcg_gen_addi_i32(tmp2, addr, 4);
> gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> - tcg_temp_free_i32(tmp2);
> tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
> + tcg_temp_free_i32(tmp2);
> store_reg(s, rt2, tmp3);
> } else {
> - tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
> + tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
> }
>
> + gen_helper_exclusive_unlock();
> store_reg(s, rt, tmp);
> - tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
> }
>
> static void gen_clrex(DisasContext *s)
> {
> - tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> + gen_helper_atomic_clear(cpu_env);
> }
>
> #ifdef CONFIG_USER_ONLY
> @@ -7417,24 +7419,19 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
> TCGv_i32 addr, int size)
> {
> TCGv_i32 tmp;
> - TCGv_i64 val64, extaddr;
> - int done_label;
> - int fail_label;
> -
> - /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
> - [addr] = {Rt};
> - {Rd} = 0;
> - } else {
> - {Rd} = 1;
> - } */
> - fail_label = gen_new_label();
> - done_label = gen_new_label();
> - extaddr = tcg_temp_new_i64();
> - tcg_gen_extu_i32_i64(extaddr, addr);
> - tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
> - tcg_temp_free_i64(extaddr);
> + int done_label = gen_new_label();
> + int fail_label = gen_new_label();
>
> - tmp = tcg_temp_new_i32();
> + tcg_gen_movi_i32(cpu_R[rd], 1);
> +
> + TCGv_i32 success = tcg_temp_new_i32();
> + gen_helper_atomic_check(success, cpu_env, addr);
> + tcg_gen_brcondi_i32(TCG_COND_EQ, success, 0, done_label);
> + tcg_temp_free_i32(success);
> +
> + /* Store shoudl be OK lets check the value */
> + tmp = load_reg(s, rt);
> + TCGv_i64 val64=tcg_temp_new_i64();
> switch (size) {
> case 0:
> gen_aa32_ld8u(tmp, addr, get_mem_index(s));
> @@ -7450,21 +7447,19 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
> abort();
> }
>
> - val64 = tcg_temp_new_i64();
> if (size == 3) {
> TCGv_i32 tmp2 = tcg_temp_new_i32();
> TCGv_i32 tmp3 = tcg_temp_new_i32();
> +
> tcg_gen_addi_i32(tmp2, addr, 4);
> gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> - tcg_temp_free_i32(tmp2);
> tcg_gen_concat_i32_i64(val64, tmp, tmp3);
> - tcg_temp_free_i32(tmp3);
> + tcg_temp_free_i32(tmp2);
> } else {
> - tcg_gen_extu_i32_i64(val64, tmp);
> + tcg_gen_extu_i32_i64(val64, tmp);
> }
> tcg_temp_free_i32(tmp);
> -
> - tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
> + tcg_gen_brcond_i64(TCG_COND_NE, cpu_exclusive_val, val64, fail_label);
> tcg_temp_free_i64(val64);
>
> tmp = load_reg(s, rt);
> @@ -7489,12 +7484,16 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
> gen_aa32_st32(tmp, addr, get_mem_index(s));
> tcg_temp_free_i32(tmp);
> }
> +
> + gen_helper_atomic_release(cpu_env);
> tcg_gen_movi_i32(cpu_R[rd], 0);
> tcg_gen_br(done_label);
> +
> gen_set_label(fail_label);
> + gen_helper_atomic_release(cpu_env);
> tcg_gen_movi_i32(cpu_R[rd], 1);
> +
> gen_set_label(done_label);
> - tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> }
> #endif
>
> --
> 1.9.0
>
+44 (0)20 7100 3485 x 210
+33 (0)5 33 52 01 77x 210
+33 (0)603762104
mark.burton
<applewebdata://5E91396D-5670-4786-8646-6093B2F6D49B/www.greensocs.com>
[-- Attachment #2: Type: text/html, Size: 23335 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] Fwd: [RFC] Adding multithreads to LDREX/STREX.
2015-03-03 16:47 ` [Qemu-devel] Fwd: [RFC] Adding multithreads to LDREX/STREX Mark Burton
@ 2015-03-03 17:09 ` Paolo Bonzini
2015-03-03 17:22 ` [Qemu-devel] " Mark Burton
0 siblings, 1 reply; 3+ messages in thread
From: Paolo Bonzini @ 2015-03-03 17:09 UTC (permalink / raw)
To: Mark Burton; +Cc: mttcg, QEMU Developers
On 03/03/2015 17:47, Mark Burton wrote:
> +inline void arm_exclusive_lock(void)
> +{
> + if (!cpu_have_exclusive_lock) {
> + qemu_mutex_lock(&cpu_exclusive_lock);
> + cpu_have_exclusive_lock = true;
> + }
> +}
> +
> +inline void arm_exclusive_unlock(void)
> +{
> + if (cpu_have_exclusive_lock) {
> + cpu_have_exclusive_lock = false;
> + qemu_mutex_unlock(&cpu_exclusive_lock);
> + }
> +}
> +
This is almost but not quite a recursive mutex. What about changing it
like this:
- arm_exclusive_lock just takes the lock and sets the flag; no "if"
- arm_exclusive_unlock does the opposite, again no "if"
- raise_exception checks the flag and skips "arm_exclusive_lock()" if
already set
The only other question I have is this:
> + gen_helper_atomic_check(success, cpu_env, addr);
> + tcg_gen_brcondi_i32(TCG_COND_EQ, success, 0, done_label);
Are you setting cpu_R[rd] correctly in this case? That is, should you
be jumping to fail_label instead? That could case a failure to be
reported as a success.
Paolo
> + tcg_temp_free_i32(success);
> +
> + /* Store shoudl be OK lets check the value */
> + tmp = load_reg(s, rt);
> + TCGv_i64 val64=tcg_temp_new_i64();
> switch (size) {
> case 0:
> gen_aa32_ld8u(tmp, addr, get_mem_index(s));
> @@ -7450,21 +7447,19 @@ static void gen_store_exclusive(DisasContext *s,
> int rd, int rt, int rt2,
> abort();
> }
>
> - val64 = tcg_temp_new_i64();
> if (size == 3) {
> TCGv_i32 tmp2 = tcg_temp_new_i32();
> TCGv_i32 tmp3 = tcg_temp_new_i32();
> +
> tcg_gen_addi_i32(tmp2, addr, 4);
> gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> - tcg_temp_free_i32(tmp2);
> tcg_gen_concat_i32_i64(val64, tmp, tmp3);
> - tcg_temp_free_i32(tmp3);
> + tcg_temp_free_i32(tmp2);
> } else {
> - tcg_gen_extu_i32_i64(val64, tmp);
> + tcg_gen_extu_i32_i64(val64, tmp);
> }
> tcg_temp_free_i32(tmp);
> -
> - tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
> + tcg_gen_brcond_i64(TCG_COND_NE, cpu_exclusive_val, val64, fail_label);
> tcg_temp_free_i64(val64);
>
> tmp = load_reg(s, rt);
> @@ -7489,12 +7484,16 @@ static void gen_store_exclusive(DisasContext *s,
> int rd, int rt, int rt2,
> gen_aa32_st32(tmp, addr, get_mem_index(s));
> tcg_temp_free_i32(tmp);
> }
> +
> + gen_helper_atomic_release(cpu_env);
> tcg_gen_movi_i32(cpu_R[rd], 0);
> tcg_gen_br(done_label);
> +
> gen_set_label(fail_label);
> + gen_helper_atomic_release(cpu_env);
> tcg_gen_movi_i32(cpu_R[rd], 1);
> +
> gen_set_label(done_label);
> - tcg_gen_movi_i64(cpu_exclusive_addr, -1);
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [RFC] Adding multithreads to LDREX/STREX.
2015-03-03 17:09 ` Paolo Bonzini
@ 2015-03-03 17:22 ` Mark Burton
0 siblings, 0 replies; 3+ messages in thread
From: Mark Burton @ 2015-03-03 17:22 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: mttcg, QEMU Developers
> On 3 Mar 2015, at 18:09, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
>
> On 03/03/2015 17:47, Mark Burton wrote:
>> +inline void arm_exclusive_lock(void)
>> +{
>> + if (!cpu_have_exclusive_lock) {
>> + qemu_mutex_lock(&cpu_exclusive_lock);
>> + cpu_have_exclusive_lock = true;
>> + }
>> +}
>> +
>> +inline void arm_exclusive_unlock(void)
>> +{
>> + if (cpu_have_exclusive_lock) {
>> + cpu_have_exclusive_lock = false;
>> + qemu_mutex_unlock(&cpu_exclusive_lock);
>> + }
>> +}
>> +
>
> This is almost but not quite a recursive mutex. What about changing it
> like this:
>
> - arm_exclusive_lock just takes the lock and sets the flag; no "if"
>
> - arm_exclusive_unlock does the opposite, again no "if"
>
> - raise_exception checks the flag and skips "arm_exclusive_lock()" if
> already set
>
yes - thats better - though I would rather live without the if all-together. There are only a couple of places where they are ‘needed’ and I should have checked explicitly there — call it laziness ;-)
> The only other question I have is this:
>
>> + gen_helper_atomic_check(success, cpu_env, addr);
>> + tcg_gen_brcondi_i32(TCG_COND_EQ, success, 0, done_label);
>
> Are you setting cpu_R[rd] correctly in this case? That is, should you
> be jumping to fail_label instead? That could case a failure to be
> reported as a success.
I almost jumped on plain and came and kissed you —— almost….
but - actually we set R[rd] to 1 above the branch and only reset it to 0 afterwards… so I think the functionality is correct… :-(((
thanks though - your help is much appreciated.
Cheers
Mark.
>
> Paolo
>
>> + tcg_temp_free_i32(success);
>> +
>> + /* Store shoudl be OK lets check the value */
>> + tmp = load_reg(s, rt);
>> + TCGv_i64 val64=tcg_temp_new_i64();
>> switch (size) {
>> case 0:
>> gen_aa32_ld8u(tmp, addr, get_mem_index(s));
>> @@ -7450,21 +7447,19 @@ static void gen_store_exclusive(DisasContext *s,
>> int rd, int rt, int rt2,
>> abort();
>> }
>>
>> - val64 = tcg_temp_new_i64();
>> if (size == 3) {
>> TCGv_i32 tmp2 = tcg_temp_new_i32();
>> TCGv_i32 tmp3 = tcg_temp_new_i32();
>> +
>> tcg_gen_addi_i32(tmp2, addr, 4);
>> gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
>> - tcg_temp_free_i32(tmp2);
>> tcg_gen_concat_i32_i64(val64, tmp, tmp3);
>> - tcg_temp_free_i32(tmp3);
>> + tcg_temp_free_i32(tmp2);
>> } else {
>> - tcg_gen_extu_i32_i64(val64, tmp);
>> + tcg_gen_extu_i32_i64(val64, tmp);
>> }
>> tcg_temp_free_i32(tmp);
>> -
>> - tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
>> + tcg_gen_brcond_i64(TCG_COND_NE, cpu_exclusive_val, val64, fail_label);
>> tcg_temp_free_i64(val64);
>>
>> tmp = load_reg(s, rt);
>> @@ -7489,12 +7484,16 @@ static void gen_store_exclusive(DisasContext *s,
>> int rd, int rt, int rt2,
>> gen_aa32_st32(tmp, addr, get_mem_index(s));
>> tcg_temp_free_i32(tmp);
>> }
>> +
>> + gen_helper_atomic_release(cpu_env);
>> tcg_gen_movi_i32(cpu_R[rd], 0);
>> tcg_gen_br(done_label);
>> +
>> gen_set_label(fail_label);
>> + gen_helper_atomic_release(cpu_env);
>> tcg_gen_movi_i32(cpu_R[rd], 1);
>> +
>> gen_set_label(done_label);
>> - tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+44 (0)20 7100 3485 x 210
+33 (0)5 33 52 01 77x 210
+33 (0)603762104
mark.burton
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-03-03 17:22 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1425400476-24047-1-git-send-email-fred.konrad@greensocs.com>
2015-03-03 16:47 ` [Qemu-devel] Fwd: [RFC] Adding multithreads to LDREX/STREX Mark Burton
2015-03-03 17:09 ` Paolo Bonzini
2015-03-03 17:22 ` [Qemu-devel] " Mark Burton
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).