From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:49887) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZQHHi-0003wX-MR for qemu-devel@nongnu.org; Fri, 14 Aug 2015 11:52:51 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZQHHh-0003WR-Bf for qemu-devel@nongnu.org; Fri, 14 Aug 2015 11:52:46 -0400 Received: from mail-wi0-f181.google.com ([209.85.212.181]:36492) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZQHHh-0003WI-1o for qemu-devel@nongnu.org; Fri, 14 Aug 2015 11:52:45 -0400 Received: by wicja10 with SMTP id ja10so22214772wic.1 for ; Fri, 14 Aug 2015 08:52:44 -0700 (PDT) From: Alvise Rigo Date: Fri, 14 Aug 2015 17:55:30 +0200 Message-Id: <1439567732-14118-5-git-send-email-a.rigo@virtualopensystems.com> In-Reply-To: <1439567732-14118-1-git-send-email-a.rigo@virtualopensystems.com> References: <1439567732-14118-1-git-send-email-a.rigo@virtualopensystems.com> Subject: [Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org, mttcg@listserver.greensocs.com Cc: claudio.fontana@huawei.com, pbonzini@redhat.com, jani.kokkonen@huawei.com, tech@virtualopensystems.com, alex.bennee@linaro.org, aurelien@aurel32.net Update the TCG LL/SC helpers to work in multi-threading. The basic idea remains untouched, but the whole mechanism takes now into account of the multiple, concurrent, vCPUs execution. In essence, if a vCPU does a LL it checks the vCPUs that have not the excl bit set for the accessed page. For those vCPUs it then: - sets the excl bit - queries a TLB flush Doing so, we make sure that all the vCPUs will have the EXCL flag in the TLB entry for that specific page *before* entering the next TB Changes from v3: - The rendez-vous mechanism has been removed since the reworked TLB flush query addresses the same purpose. Suggested-by: Jani Kokkonen Suggested-by: Claudio Fontana Signed-off-by: Alvise Rigo --- cputlb.c | 4 ++++ softmmu_llsc_template.h | 59 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/cputlb.c b/cputlb.c index 7cbaaca..08949df 100644 --- a/cputlb.c +++ b/cputlb.c @@ -36,6 +36,10 @@ /* statistics */ int tlb_flush_count; +/* For atomic instruction handling. */ +int exit_flush_request = 0; +QemuMutex tcg_excl_access_lock; + /* NOTE: * If flush_global is true (the usual case), flush all tlb entries. * If flush_global is false, flush (at least) all tlb entries not diff --git a/softmmu_llsc_template.h b/softmmu_llsc_template.h index d2e92b4..9486385 100644 --- a/softmmu_llsc_template.h +++ b/softmmu_llsc_template.h @@ -33,25 +33,39 @@ #define helper_ldlink_name glue(glue(helper_be_ldlink, USUFFIX), MMUSUFFIX) #define helper_stcond_name glue(glue(helper_be_stcond, SUFFIX), MMUSUFFIX) -#define helper_ld_legacy glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX) -#define helper_st_legacy glue(glue(helper_be_st, SUFFIX), MMUSUFFIX) +#define helper_ld glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX) +#define helper_st glue(glue(helper_be_st, SUFFIX), MMUSUFFIX) #else /* LE helpers + 8bit helpers (generated only once for both LE end BE) */ #if DATA_SIZE > 1 #define helper_ldlink_name glue(glue(helper_le_ldlink, USUFFIX), MMUSUFFIX) #define helper_stcond_name glue(glue(helper_le_stcond, SUFFIX), MMUSUFFIX) -#define helper_ld_legacy glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX) -#define helper_st_legacy glue(glue(helper_le_st, SUFFIX), MMUSUFFIX) +#define helper_ld glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX) +#define helper_st glue(glue(helper_le_st, SUFFIX), MMUSUFFIX) #else /* DATA_SIZE <= 1 */ #define helper_ldlink_name glue(glue(helper_ret_ldlink, USUFFIX), MMUSUFFIX) #define helper_stcond_name glue(glue(helper_ret_stcond, SUFFIX), MMUSUFFIX) -#define helper_ld_legacy glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) -#define helper_st_legacy glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX) +#define helper_ld glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX) +#define helper_st glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX) #endif #endif +#define is_read_tlb_entry_set(env, page, index) \ +({ \ + (addr & TARGET_PAGE_MASK) \ + == ((env->tlb_table[mmu_idx][index].addr_read) & \ + (TARGET_PAGE_MASK | TLB_INVALID_MASK)); \ +}) +/* Whenever a SC operation fails, we add a small delay to reduce the + * concurrency among the atomic instruction emulation code. Without this delay, + * in very congested situation where plain stores make all the pending LLs + * fail, the code could reach a stalling situation in which all the SCs happen + * to fail. + * */ +#define TCG_ATOMIC_INSN_EMUL_DELAY 100 + WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { @@ -61,11 +75,13 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr, hwaddr hw_addr; unsigned mmu_idx = get_mmuidx(oi); - /* Use the proper load helper from cpu_ldst.h */ - ret = helper_ld_legacy(env, addr, mmu_idx, retaddr); - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + if (!is_read_tlb_entry_set(env, addr, index) || + !VICTIM_TLB_HIT(addr_read)) { + tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, mmu_idx, retaddr); + } + /* hw_addr = hwaddr of the page (i.e. section->mr->ram_addr + xlat) * plus the offset (i.e. addr & ~TARGET_PAGE_MASK) */ hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) + addr; @@ -73,22 +89,34 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr, cpu_physical_memory_clear_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index); /* If all the vCPUs have the EXCL bit set for this page there is no need * to request any flush. */ - if (cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus)) { + if (unlikely(!atomic_xchg(&exit_flush_request, 1) && + cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus))) { CPU_FOREACH(cpu) { - if (current_cpu != cpu) { + if (cpu->thread_id != qemu_get_thread_id()) { if (cpu_physical_memory_excl_is_dirty(hw_addr, cpu->cpu_index)) { cpu_physical_memory_clear_excl_dirty(hw_addr, cpu->cpu_index); - tlb_flush(cpu, 1); + tlb_query_flush_cpu(cpu, 1); } } } + + atomic_set(&exit_flush_request, 0); } + env->ll_sc_context = true; + + qemu_mutex_lock(&tcg_excl_access_lock); + + /* Use the proper load helper from cpu_ldst.h */ + ret = helper_ld(env, addr, mmu_idx, retaddr); + env->excl_protected_range.begin = hw_addr; env->excl_protected_range.end = hw_addr + DATA_SIZE; + qemu_mutex_unlock(&tcg_excl_access_lock); + /* For this vCPU, just update the TLB entry, no need to flush. */ env->tlb_table[mmu_idx][index].addr_write |= TLB_EXCL; @@ -106,12 +134,13 @@ WORD_TYPE helper_stcond_name(CPUArchState *env, target_ulong addr, * access as one made by the store conditional wrapper. If the store * conditional does not succeed, the value will be set to 0.*/ env->excl_succeeded = 1; - helper_st_legacy(env, addr, val, mmu_idx, retaddr); + helper_st(env, addr, val, mmu_idx, retaddr); if (env->excl_succeeded) { env->excl_succeeded = 0; ret = 0; } else { + g_usleep(TCG_ATOMIC_INSN_EMUL_DELAY); ret = 1; } @@ -120,5 +149,5 @@ WORD_TYPE helper_stcond_name(CPUArchState *env, target_ulong addr, #undef helper_ldlink_name #undef helper_stcond_name -#undef helper_ld_legacy -#undef helper_st_legacy +#undef helper_ld +#undef helper_st -- 2.5.0