qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: fred.konrad@greensocs.com
Cc: mttcg@listserver.greensocs.com, peter.maydell@linaro.org,
	mark.burton@greensocs.com, qemu-devel@nongnu.org, agraf@suse.de,
	guillaume.delbergue@greensocs.com, pbonzini@redhat.com
Subject: Re: [Qemu-devel] [RFC PATCH] Use atomic cmpxchg to atomically check the exclusive value in a STREX
Date: Tue, 09 Jun 2015 10:12:56 +0100	[thread overview]
Message-ID: <871thl8ctj.fsf@linaro.org> (raw)
In-Reply-To: <1433514678-4464-1-git-send-email-fred.konrad@greensocs.com>


fred.konrad@greensocs.com writes:

> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> This mechanism replaces the existing load/store exclusive mechanism which seems
> to be broken for multithread.
> It follows the intention of the existing mechanism and stores the target address
> and data values during a load operation and checks that they remain unchanged
> before a store.
>
> In common with the older approach, this provides weaker semantics than required
> in that it could be that a different processor writes the same value as a
> non-exclusive write, however in practise this seems to be irrelevant.

You can WFE on the global monitor and use it for a lockless sleep on a
flag value. I don't think the Linux kernel does it but certainly
anything trying to be power efficient may use it.

>
> The old implementation didn’t correctly store it’s values as globals, but rather
> kept a local copy per CPU.
>
> This new mechanism stores the values globally and also uses the atomic cmpxchg
> macros to ensure atomicity - it is therefore very efficient and threadsafe.
>
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
> ---
>  target-arm/cpu.c       |  21 +++++++
>  target-arm/cpu.h       |   6 ++
>  target-arm/helper.c    |  12 ++++
>  target-arm/helper.h    |   6 ++
>  target-arm/op_helper.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  target-arm/translate.c |  98 ++++++---------------------------
>  6 files changed, 207 insertions(+), 82 deletions(-)
>
> diff --git a/target-arm/cpu.c b/target-arm/cpu.c
> index 4a888ab..0400061 100644
> --- a/target-arm/cpu.c
> +++ b/target-arm/cpu.c
> @@ -31,6 +31,26 @@
>  #include "sysemu/kvm.h"
>  #include "kvm_arm.h"
>  
> +/* Protect cpu_exclusive_* variable .*/
> +__thread bool cpu_have_exclusive_lock;
> +QemuMutex cpu_exclusive_lock;
> +
> +inline void arm_exclusive_lock(void)
> +{
> +    if (!cpu_have_exclusive_lock) {
> +        qemu_mutex_lock(&cpu_exclusive_lock);
> +        cpu_have_exclusive_lock = true;
> +    }
> +}
> +
> +inline void arm_exclusive_unlock(void)
> +{
> +    if (cpu_have_exclusive_lock) {
> +        cpu_have_exclusive_lock = false;
> +        qemu_mutex_unlock(&cpu_exclusive_lock);
> +    }
> +}

I don't quite follow. If these locks are mean to be protecting access to
variables then how do they do that? The lock won't block if another
thread is currently messing with the protected values.

> +
>  static void arm_cpu_set_pc(CPUState *cs, vaddr value)
>  {
>      ARMCPU *cpu = ARM_CPU(cs);
> @@ -425,6 +445,7 @@ static void arm_cpu_initfn(Object *obj)
>          cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
>          if (!inited) {
>              inited = true;
> +            qemu_mutex_init(&cpu_exclusive_lock);
>              arm_translate_init();
>          }
>      }
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index 21b5b8e..257ed05 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -506,6 +506,9 @@ static inline bool is_a64(CPUARMState *env)
>  int cpu_arm_signal_handler(int host_signum, void *pinfo,
>                             void *puc);
>  
> +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int access_type,
> +                      hwaddr *phys_ptr, int *prot, target_ulong *page_size);
> +
>  /**
>   * pmccntr_sync
>   * @env: CPUARMState
> @@ -1922,4 +1925,7 @@ enum {
>      QEMU_PSCI_CONDUIT_HVC = 2,
>  };
>  
> +void arm_exclusive_lock(void);
> +void arm_exclusive_unlock(void);
> +
>  #endif
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 3da0c05..31ee1e5 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -23,6 +23,14 @@ static inline int get_phys_addr(CPUARMState *env, target_ulong address,
>  #define PMCRE   0x1
>  #endif
>  
> +int arm_get_phys_addr(CPUARMState *env, target_ulong address, int access_type,
> +                      hwaddr *phys_ptr, int *prot, target_ulong *page_size)
> +{
> +    MemTxAttrs attrs = {};
> +    return get_phys_addr(env, address, access_type, cpu_mmu_index(env),
> +                         phys_ptr, &attrs, prot, page_size);
> +}
> +
>  static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
>  {
>      int nregs;
> @@ -4731,6 +4739,10 @@ void arm_cpu_do_interrupt(CPUState *cs)
>  
>      arm_log_exception(cs->exception_index);
>  
> +    arm_exclusive_lock();
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +
>      if (arm_is_psci_call(cpu, cs->exception_index)) {
>          arm_handle_psci_call(cpu);
>          qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
> diff --git a/target-arm/helper.h b/target-arm/helper.h
> index fc885de..63c2809 100644
> --- a/target-arm/helper.h
> +++ b/target-arm/helper.h
> @@ -529,6 +529,12 @@ DEF_HELPER_2(dc_zva, void, env, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
>  
> +DEF_HELPER_4(atomic_cmpxchg64, i32, env, i32, i64, i32)
> +DEF_HELPER_2(atomic_check, i32, env, i32)
> +DEF_HELPER_1(atomic_release, void, env)
> +DEF_HELPER_1(atomic_clear, void, env)
> +DEF_HELPER_3(atomic_claim, void, env, i32, i64)
> +
>  #ifdef TARGET_AARCH64
>  #include "helper-a64.h"
>  #endif
> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
> index 7583ae7..81dd403 100644
> --- a/target-arm/op_helper.c
> +++ b/target-arm/op_helper.c
> @@ -30,12 +30,157 @@ static void raise_exception(CPUARMState *env, uint32_t excp,
>      CPUState *cs = CPU(arm_env_get_cpu(env));
>  
>      assert(!excp_is_internal(excp));
> +    arm_exclusive_lock();
>      cs->exception_index = excp;
>      env->exception.syndrome = syndrome;
>      env->exception.target_el = target_el;
> +    /*
> +     * We MAY already have the lock - in which case we are exiting the
> +     * instruction due to an exception. Otherwise we better make sure we are not
> +     * about to enter a STREX anyway.
> +     */
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
>      cpu_loop_exit(cs);
>  }
>  
> +/* NB return 1 for fail, 0 for pass */
> +uint32_t HELPER(atomic_cmpxchg64)(CPUARMState *env, uint32_t addr,
> +                                  uint64_t newval, uint32_t size)
> +{
> +    ARMCPU *cpu = arm_env_get_cpu(env);
> +    CPUState *cs = CPU(cpu);
> +
> +    bool result = false;
> +    hwaddr len = 8 << size;
> +
> +    hwaddr paddr;
> +    target_ulong page_size;
> +    int prot;
> +
> +    arm_exclusive_lock();
> +
> +    if (env->exclusive_addr != addr) {
> +        arm_exclusive_unlock();
> +        return 1;
> +    }
> +
> +    if (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) != 0) {
> +        tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, cpu_mmu_index(env), 0);
> +        if  (arm_get_phys_addr(env, addr, 1, &paddr, &prot, &page_size) != 0) {
> +            arm_exclusive_unlock();
> +            return 1;
> +        }
> +    }

Some comments as to what we are doing here would be useful.

> +
> +    switch (size) {
> +    case 0:
> +    {
> +        uint8_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint8_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint8_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 1:
> +    {
> +        uint16_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint16_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint16_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 2:
> +    {
> +        uint32_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint32_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint32_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    case 3:
> +    {
> +        uint64_t oldval, *p;
> +        p = address_space_map(cs->as, paddr, &len, true);
> +        if (len == 8 << size) {
> +            oldval = (uint64_t)env->exclusive_val;
> +            result = (atomic_cmpxchg(p, oldval, (uint64_t)newval) == oldval);
> +        }
> +        address_space_unmap(cs->as, p, len, true, result ? 8 : 0);
> +    }
> +    break;
> +    default:
> +        abort();
> +    break;
> +    }
> +
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +    if (result) {
> +        return 0;
> +    } else {
> +        return 1;
> +    }
> +}
> +
> +
> +uint32_t HELPER(atomic_check)(CPUARMState *env, uint32_t addr)
> +{
> +      arm_exclusive_lock();
> +      if (env->exclusive_addr == addr) {
> +        return true;
> +      }
> +      /* we failed to keep the address tagged, so we fail */
> +      env->exclusive_addr = -1;  /* for efficiency */
> +      arm_exclusive_unlock();
> +      return false;
> +}
> +
> +void HELPER(atomic_release)(CPUARMState *env)
> +{
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +}
> +
> +void HELPER(atomic_clear)(CPUARMState *env)
> +{
> +    /* make sure no STREX is about to start */
> +    arm_exclusive_lock();
> +    env->exclusive_addr = -1;
> +    arm_exclusive_unlock();
> +}
> +
> +
> +void HELPER(atomic_claim)(CPUARMState *env, uint32_t addr, uint64_t val)
> +{
> +    CPUState *cpu;
> +    CPUARMState *current_cpu;
> +
> +    /* ensure that there are no STREX's executing */
> +    arm_exclusive_lock();
> +
> +    CPU_FOREACH(cpu) {
> +        current_cpu = &ARM_CPU(cpu)->env;
> +        if (current_cpu->exclusive_addr  == addr) {
> +            /* We steal the atomic of this CPU. */
> +            current_cpu->exclusive_addr = -1;
> +        }
> +    }
> +
> +    env->exclusive_val = val;
> +    env->exclusive_addr = addr;
> +    arm_exclusive_unlock();
> +}
> +
>  static int exception_target_el(CPUARMState *env)
>  {
>      int target_el = MAX(1, arm_current_el(env));
> @@ -582,7 +727,6 @@ void HELPER(exception_return)(CPUARMState *env)
>  
>      aarch64_save_sp(env, cur_el);
>  
> -    env->exclusive_addr = -1;
>  
>      /* We must squash the PSTATE.SS bit to zero unless both of the
>       * following hold:
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 39692d7..ba4eb65 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -65,8 +65,8 @@ TCGv_ptr cpu_env;
>  static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
>  static TCGv_i32 cpu_R[16];
>  static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
> -static TCGv_i64 cpu_exclusive_addr;
>  static TCGv_i64 cpu_exclusive_val;
> +static TCGv_i64 cpu_exclusive_addr;
>  #ifdef CONFIG_USER_ONLY
>  static TCGv_i64 cpu_exclusive_test;
>  static TCGv_i32 cpu_exclusive_info;
> @@ -7391,6 +7391,7 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>                                 TCGv_i32 addr, int size)
>  {
>      TCGv_i32 tmp = tcg_temp_new_i32();
> +    TCGv_i64 val = tcg_temp_new_i64();
>  
>      s->is_ldex = true;
>  
> @@ -7415,20 +7416,20 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
>  
>          tcg_gen_addi_i32(tmp2, addr, 4);
>          gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> +        tcg_gen_concat_i32_i64(val, tmp, tmp3);
>          tcg_temp_free_i32(tmp2);
> -        tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
>          store_reg(s, rt2, tmp3);
>      } else {
> -        tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
> +        tcg_gen_extu_i32_i64(val, tmp);
>      }
> -
> +    gen_helper_atomic_claim(cpu_env, addr, val);
> +    tcg_temp_free_i64(val);
>      store_reg(s, rt, tmp);
> -    tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
>  }
>  
>  static void gen_clrex(DisasContext *s)
>  {
> -    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> +    gen_helper_atomic_clear(cpu_env);
>  }
>  
>  #ifdef CONFIG_USER_ONLY
> @@ -7445,84 +7446,19 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
>                                  TCGv_i32 addr, int size)
>  {
>      TCGv_i32 tmp;
> -    TCGv_i64 val64, extaddr;
> -    TCGLabel *done_label;
> -    TCGLabel *fail_label;
> -
> -    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
> -         [addr] = {Rt};
> -         {Rd} = 0;
> -       } else {
> -         {Rd} = 1;
> -       } */
> -    fail_label = gen_new_label();
> -    done_label = gen_new_label();
> -    extaddr = tcg_temp_new_i64();
> -    tcg_gen_extu_i32_i64(extaddr, addr);
> -    tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
> -    tcg_temp_free_i64(extaddr);
> -
> -    tmp = tcg_temp_new_i32();
> -    switch (size) {
> -    case 0:
> -        gen_aa32_ld8u(tmp, addr, get_mem_index(s));
> -        break;
> -    case 1:
> -        gen_aa32_ld16u(tmp, addr, get_mem_index(s));
> -        break;
> -    case 2:
> -    case 3:
> -        gen_aa32_ld32u(tmp, addr, get_mem_index(s));
> -        break;
> -    default:
> -        abort();
> -    }
> -
> -    val64 = tcg_temp_new_i64();
> -    if (size == 3) {
> -        TCGv_i32 tmp2 = tcg_temp_new_i32();
> -        TCGv_i32 tmp3 = tcg_temp_new_i32();
> -        tcg_gen_addi_i32(tmp2, addr, 4);
> -        gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
> -        tcg_temp_free_i32(tmp2);
> -        tcg_gen_concat_i32_i64(val64, tmp, tmp3);
> -        tcg_temp_free_i32(tmp3);
> -    } else {
> -        tcg_gen_extu_i32_i64(val64, tmp);
> -    }
> -    tcg_temp_free_i32(tmp);
> -
> -    tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
> -    tcg_temp_free_i64(val64);
> +    TCGv_i32 tmp2;
> +    TCGv_i64 val = tcg_temp_new_i64();
> +    TCGv_i32 tmp_size = tcg_const_i32(size);
>  
>      tmp = load_reg(s, rt);
> -    switch (size) {
> -    case 0:
> -        gen_aa32_st8(tmp, addr, get_mem_index(s));
> -        break;
> -    case 1:
> -        gen_aa32_st16(tmp, addr, get_mem_index(s));
> -        break;
> -    case 2:
> -    case 3:
> -        gen_aa32_st32(tmp, addr, get_mem_index(s));
> -        break;
> -    default:
> -        abort();
> -    }
> +    tmp2 = load_reg(s, rt2);
> +    tcg_gen_concat_i32_i64(val, tmp, tmp2);
>      tcg_temp_free_i32(tmp);
> -    if (size == 3) {
> -        tcg_gen_addi_i32(addr, addr, 4);
> -        tmp = load_reg(s, rt2);
> -        gen_aa32_st32(tmp, addr, get_mem_index(s));
> -        tcg_temp_free_i32(tmp);
> -    }
> -    tcg_gen_movi_i32(cpu_R[rd], 0);
> -    tcg_gen_br(done_label);
> -    gen_set_label(fail_label);
> -    tcg_gen_movi_i32(cpu_R[rd], 1);
> -    gen_set_label(done_label);
> -    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
> +    tcg_temp_free_i32(tmp2);
> +
> +    gen_helper_atomic_cmpxchg64(cpu_R[rd], cpu_env, addr, val, tmp_size);
> +    tcg_temp_free_i64(val);
> +    tcg_temp_free_i32(tmp_size);
>  }
>  #endif

-- 
Alex Bennée

  reply	other threads:[~2015-06-09  9:12 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-05 14:31 [Qemu-devel] [RFC PATCH] Use atomic cmpxchg to atomically check the exclusive value in a STREX fred.konrad
2015-06-09  9:12 ` Alex Bennée [this message]
2015-06-09  9:39   ` Mark Burton
2015-06-09 13:55   ` Alex Bennée
2015-06-09 14:00     ` Mark Burton
2015-06-09 15:35       ` Alex Bennée
2015-06-10  8:03     ` Frederic Konrad
2015-06-10  8:41       ` Frederic Konrad
2015-06-09 13:59 ` Alex Bennée
2015-06-09 14:02   ` Mark Burton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=871thl8ctj.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=agraf@suse.de \
    --cc=fred.konrad@greensocs.com \
    --cc=guillaume.delbergue@greensocs.com \
    --cc=mark.burton@greensocs.com \
    --cc=mttcg@listserver.greensocs.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).