From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55475) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZG5Bb-0003Ed-Ep for qemu-devel@nongnu.org; Fri, 17 Jul 2015 08:56:21 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZG5BV-0005q8-IE for qemu-devel@nongnu.org; Fri, 17 Jul 2015 08:56:19 -0400 Received: from mail-wg0-f42.google.com ([74.125.82.42]:35852) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZG5BV-0005pu-A4 for qemu-devel@nongnu.org; Fri, 17 Jul 2015 08:56:13 -0400 Received: by wgxm20 with SMTP id m20so81611383wgx.3 for ; Fri, 17 Jul 2015 05:56:12 -0700 (PDT) References: <1436516626-8322-1-git-send-email-a.rigo@virtualopensystems.com> <1436516626-8322-7-git-send-email-a.rigo@virtualopensystems.com> From: Alex =?utf-8?Q?Benn=C3=A9e?= In-reply-to: <1436516626-8322-7-git-send-email-a.rigo@virtualopensystems.com> Date: Fri, 17 Jul 2015 13:56:10 +0100 Message-ID: <87bnfb54et.fsf@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Subject: Re: [Qemu-devel] [RFC v3 06/13] target-i386: translate: implement qemu_ldlink and qemu_stcond ops List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Alvise Rigo Cc: mttcg@listserver.greensocs.com, claudio.fontana@huawei.com, qemu-devel@nongnu.org, pbonzini@redhat.com, jani.kokkonen@huawei.com, tech@virtualopensystems.com Alvise Rigo writes: > Implement strex and ldrex instruction relying on TCG's qemu_ldlink and > qemu_stcond. For the time being only 32bit configurations are supported. > > Suggested-by: Jani Kokkonen > Suggested-by: Claudio Fontana > Signed-off-by: Alvise Rigo > --- > tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 114 insertions(+), 22 deletions(-) > > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 0d7c99c..d8250a9 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = { > [MO_BEQ] = helper_be_ldq_mmu, > }; > > +/* LoadLink helpers, only unsigned. Use the macro below to access them. */ > +static void * const qemu_ldex_helpers[16] = { > + [MO_LEUL] = helper_le_ldlinkul_mmu, > +}; > + > +#define LDEX_HELPER(mem_op) \ > +({ \ > + assert(mem_op & MO_EXCL); \ > + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \ > +}) > + > /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, > * uintxx_t val, int mmu_idx, uintptr_t ra) > */ > @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = { > [MO_BEQ] = helper_be_stq_mmu, > }; > > +/* StoreConditional helpers. Use the macro below to access them. */ > +static void * const qemu_stex_helpers[16] = { > + [MO_LEUL] = helper_le_stcondl_mmu, > +}; > + > +#define STEX_HELPER(mem_op) \ > +({ \ > + assert(mem_op & MO_EXCL); \ > + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \ > +}) > + Same comments as for target-arm. Do we need to be protecting backends with HAS_LDST_EXCL defines or some such macro hackery? What currently happens if you use the new TCG ops when the backend doesn't support them? Is supporting all backends a prerequisite for the series? > /* Perform the TLB load and compare. > > Inputs: > @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, > * for a load or store, so that we can later generate the correct helper code > */ > static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, > + TCGReg llsc_success, > TCGReg datalo, TCGReg datahi, > TCGReg addrlo, TCGReg addrhi, > tcg_insn_unit *raddr, > @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, > TCGLabelQemuLdst *label = new_ldst_label(s); > > label->is_ld = is_ld; > + label->llsc_success = llsc_success; > label->oi = oi; > label->datalo_reg = datalo; > label->datahi_reg = datahi; > @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) > (uintptr_t)l->raddr); > } > > - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); > + if (opc & MO_EXCL) { > + tcg_out_call(s, LDEX_HELPER(opc)); > + } else { > + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); > + } > > data_reg = l->datalo_reg; > switch (opc & MO_SSIZE) { > @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) > } > } > > - /* "Tail call" to the helper, with the return address back inline. */ > - tcg_out_push(s, retaddr); > - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); > + if (opc & MO_EXCL) { > + tcg_out_call(s, STEX_HELPER(opc)); > + /* Save the output of the StoreConditional */ > + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX); > + tcg_out_jmp(s, l->raddr); > + } else { > + /* "Tail call" to the helper, with the return address back inline. */ > + tcg_out_push(s, retaddr); > + tcg_out_jmp(s, qemu_st_helpers[opc]); > + } > } > #elif defined(__x86_64__) && defined(__linux__) > # include > @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, > /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and > EAX. It will be useful once fixed registers globals are less > common. */ > -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) > +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64, > + bool isLoadLink) > { > TCGReg datalo, datahi, addrlo; > TCGReg addrhi __attribute__((unused)); > @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) > mem_index = get_mmuidx(oi); > s_bits = opc & MO_SIZE; > > - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > - label_ptr, offsetof(CPUTLBEntry, addr_read)); > + if (isLoadLink) { > + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? > + TCG_TYPE_I64 : TCG_TYPE_I32; > + /* The JMP address will be patched afterwards, > + * in tcg_out_qemu_ld_slow_path (two times when > + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ > + tcg_out_mov(s, t, TCG_REG_L1, addrlo); > + > + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > + /* Store the second part of the address. */ > + tcg_out_mov(s, t, TCG_REG_L0, addrhi); > + /* We add 4 to include the jmp that follows. */ > + label_ptr[1] = s->code_ptr + 4; > + } > > - /* TLB Hit. */ > - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); > + label_ptr[0] = s->code_ptr; > + s->code_ptr += 4; > + } else { > + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > + label_ptr, offsetof(CPUTLBEntry, addr_read)); > + > + /* TLB Hit. */ > + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + } > > /* Record the current context of a load into ldst label */ > - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, > + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi, > s->code_ptr, label_ptr); > #else > { > @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, > } > } > > -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) > +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64, > + bool isStoreCond) > { > - TCGReg datalo, datahi, addrlo; > + TCGReg datalo, datahi, addrlo, llsc_success; > TCGReg addrhi __attribute__((unused)); > TCGMemOpIdx oi; > TCGMemOp opc; > @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) > tcg_insn_unit *label_ptr[2]; > #endif > > + /* The stcond variant has one more param */ > + llsc_success = (isStoreCond ? *args++ : 0); > + > datalo = *args++; > datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); > addrlo = *args++; > @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) > mem_index = get_mmuidx(oi); > s_bits = opc & MO_SIZE; > > - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > - label_ptr, offsetof(CPUTLBEntry, addr_write)); > + if (isStoreCond) { > + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? > + TCG_TYPE_I64 : TCG_TYPE_I32; > + /* The JMP address will be filled afterwards, > + * in tcg_out_qemu_ld_slow_path (two times when > + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ > + tcg_out_mov(s, t, TCG_REG_L1, addrlo); > + > + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > + /* Store the second part of the address. */ > + tcg_out_mov(s, t, TCG_REG_L0, addrhi); > + /* We add 4 to include the jmp that follows. */ > + label_ptr[1] = s->code_ptr + 4; > + } > > - /* TLB Hit. */ > - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); > + label_ptr[0] = s->code_ptr; > + s->code_ptr += 4; > + } else { > + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > + label_ptr, offsetof(CPUTLBEntry, addr_write)); > + > + /* TLB Hit. */ > + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + } > > /* Record the current context of a store into ldst label */ > - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, > - s->code_ptr, label_ptr); > + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo, > + addrhi, s->code_ptr, label_ptr); > #else > { > int32_t offset = GUEST_BASE; > @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, > break; > > case INDEX_op_qemu_ld_i32: > - tcg_out_qemu_ld(s, args, 0); > + tcg_out_qemu_ld(s, args, 0, 0); > + break; > + case INDEX_op_qemu_ldlink_i32: > + tcg_out_qemu_ld(s, args, 0, 1); > break; > case INDEX_op_qemu_ld_i64: > - tcg_out_qemu_ld(s, args, 1); > + tcg_out_qemu_ld(s, args, 1, 0); > break; > case INDEX_op_qemu_st_i32: > - tcg_out_qemu_st(s, args, 0); > + tcg_out_qemu_st(s, args, 0, 0); > + break; > + case INDEX_op_qemu_stcond_i32: > + tcg_out_qemu_st(s, args, 0, 1); > break; > case INDEX_op_qemu_st_i64: > - tcg_out_qemu_st(s, args, 1); > + tcg_out_qemu_st(s, args, 1, 0); > break; > > OP_32_64(mulu2): > @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = { > > #if TCG_TARGET_REG_BITS == 64 > { INDEX_op_qemu_ld_i32, { "r", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L" } }, > #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS > { INDEX_op_qemu_ld_i32, { "r", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, > #else > { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, > #endif -- Alex Bennée