From: "Alex Bennée" <alex.bennee@linaro.org>
To: Alvise Rigo <a.rigo@virtualopensystems.com>
Cc: mttcg@listserver.greensocs.com, claudio.fontana@huawei.com,
qemu-devel@nongnu.org, pbonzini@redhat.com,
jani.kokkonen@huawei.com, tech@virtualopensystems.com
Subject: Re: [Qemu-devel] [RFC v3 06/13] target-i386: translate: implement qemu_ldlink and qemu_stcond ops
Date: Fri, 17 Jul 2015 13:56:10 +0100 [thread overview]
Message-ID: <87bnfb54et.fsf@linaro.org> (raw)
In-Reply-To: <1436516626-8322-7-git-send-email-a.rigo@virtualopensystems.com>
Alvise Rigo <a.rigo@virtualopensystems.com> writes:
> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
> qemu_stcond. For the time being only 32bit configurations are supported.
>
> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> ---
> tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 114 insertions(+), 22 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 0d7c99c..d8250a9 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = {
> [MO_BEQ] = helper_be_ldq_mmu,
> };
>
> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */
> +static void * const qemu_ldex_helpers[16] = {
> + [MO_LEUL] = helper_le_ldlinkul_mmu,
> +};
> +
> +#define LDEX_HELPER(mem_op) \
> +({ \
> + assert(mem_op & MO_EXCL); \
> + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \
> +})
> +
> /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
> * uintxx_t val, int mmu_idx, uintptr_t ra)
> */
> @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = {
> [MO_BEQ] = helper_be_stq_mmu,
> };
>
> +/* StoreConditional helpers. Use the macro below to access them. */
> +static void * const qemu_stex_helpers[16] = {
> + [MO_LEUL] = helper_le_stcondl_mmu,
> +};
> +
> +#define STEX_HELPER(mem_op) \
> +({ \
> + assert(mem_op & MO_EXCL); \
> + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \
> +})
> +
Same comments as for target-arm.
Do we need to be protecting backends with HAS_LDST_EXCL defines or some
such macro hackery? What currently happens if you use the new TCG ops
when the backend doesn't support them? Is supporting all backends a
prerequisite for the series?
> /* Perform the TLB load and compare.
>
> Inputs:
> @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
> * for a load or store, so that we can later generate the correct helper code
> */
> static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
> + TCGReg llsc_success,
> TCGReg datalo, TCGReg datahi,
> TCGReg addrlo, TCGReg addrhi,
> tcg_insn_unit *raddr,
> @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
> TCGLabelQemuLdst *label = new_ldst_label(s);
>
> label->is_ld = is_ld;
> + label->llsc_success = llsc_success;
> label->oi = oi;
> label->datalo_reg = datalo;
> label->datahi_reg = datahi;
> @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
> (uintptr_t)l->raddr);
> }
>
> - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
> + if (opc & MO_EXCL) {
> + tcg_out_call(s, LDEX_HELPER(opc));
> + } else {
> + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
> + }
>
> data_reg = l->datalo_reg;
> switch (opc & MO_SSIZE) {
> @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
> }
> }
>
> - /* "Tail call" to the helper, with the return address back inline. */
> - tcg_out_push(s, retaddr);
> - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
> + if (opc & MO_EXCL) {
> + tcg_out_call(s, STEX_HELPER(opc));
> + /* Save the output of the StoreConditional */
> + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX);
> + tcg_out_jmp(s, l->raddr);
> + } else {
> + /* "Tail call" to the helper, with the return address back inline. */
> + tcg_out_push(s, retaddr);
> + tcg_out_jmp(s, qemu_st_helpers[opc]);
> + }
> }
> #elif defined(__x86_64__) && defined(__linux__)
> # include <asm/prctl.h>
> @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
> /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
> EAX. It will be useful once fixed registers globals are less
> common. */
> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
> + bool isLoadLink)
> {
> TCGReg datalo, datahi, addrlo;
> TCGReg addrhi __attribute__((unused));
> @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
> mem_index = get_mmuidx(oi);
> s_bits = opc & MO_SIZE;
>
> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> - label_ptr, offsetof(CPUTLBEntry, addr_read));
> + if (isLoadLink) {
> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
> + TCG_TYPE_I64 : TCG_TYPE_I32;
> + /* The JMP address will be patched afterwards,
> + * in tcg_out_qemu_ld_slow_path (two times when
> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
> + tcg_out_mov(s, t, TCG_REG_L1, addrlo);
> +
> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> + /* Store the second part of the address. */
> + tcg_out_mov(s, t, TCG_REG_L0, addrhi);
> + /* We add 4 to include the jmp that follows. */
> + label_ptr[1] = s->code_ptr + 4;
> + }
>
> - /* TLB Hit. */
> - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
> + label_ptr[0] = s->code_ptr;
> + s->code_ptr += 4;
> + } else {
> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> + label_ptr, offsetof(CPUTLBEntry, addr_read));
> +
> + /* TLB Hit. */
> + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> + }
>
> /* Record the current context of a load into ldst label */
> - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
> + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi,
> s->code_ptr, label_ptr);
> #else
> {
> @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
> }
> }
>
> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
> + bool isStoreCond)
> {
> - TCGReg datalo, datahi, addrlo;
> + TCGReg datalo, datahi, addrlo, llsc_success;
> TCGReg addrhi __attribute__((unused));
> TCGMemOpIdx oi;
> TCGMemOp opc;
> @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
> tcg_insn_unit *label_ptr[2];
> #endif
>
> + /* The stcond variant has one more param */
> + llsc_success = (isStoreCond ? *args++ : 0);
> +
> datalo = *args++;
> datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
> addrlo = *args++;
> @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
> mem_index = get_mmuidx(oi);
> s_bits = opc & MO_SIZE;
>
> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> - label_ptr, offsetof(CPUTLBEntry, addr_write));
> + if (isStoreCond) {
> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
> + TCG_TYPE_I64 : TCG_TYPE_I32;
> + /* The JMP address will be filled afterwards,
> + * in tcg_out_qemu_ld_slow_path (two times when
> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
> + tcg_out_mov(s, t, TCG_REG_L1, addrlo);
> +
> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> + /* Store the second part of the address. */
> + tcg_out_mov(s, t, TCG_REG_L0, addrhi);
> + /* We add 4 to include the jmp that follows. */
> + label_ptr[1] = s->code_ptr + 4;
> + }
>
> - /* TLB Hit. */
> - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
> + label_ptr[0] = s->code_ptr;
> + s->code_ptr += 4;
> + } else {
> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> + label_ptr, offsetof(CPUTLBEntry, addr_write));
> +
> + /* TLB Hit. */
> + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> + }
>
> /* Record the current context of a store into ldst label */
> - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
> - s->code_ptr, label_ptr);
> + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo,
> + addrhi, s->code_ptr, label_ptr);
> #else
> {
> int32_t offset = GUEST_BASE;
> @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> break;
>
> case INDEX_op_qemu_ld_i32:
> - tcg_out_qemu_ld(s, args, 0);
> + tcg_out_qemu_ld(s, args, 0, 0);
> + break;
> + case INDEX_op_qemu_ldlink_i32:
> + tcg_out_qemu_ld(s, args, 0, 1);
> break;
> case INDEX_op_qemu_ld_i64:
> - tcg_out_qemu_ld(s, args, 1);
> + tcg_out_qemu_ld(s, args, 1, 0);
> break;
> case INDEX_op_qemu_st_i32:
> - tcg_out_qemu_st(s, args, 0);
> + tcg_out_qemu_st(s, args, 0, 0);
> + break;
> + case INDEX_op_qemu_stcond_i32:
> + tcg_out_qemu_st(s, args, 0, 1);
> break;
> case INDEX_op_qemu_st_i64:
> - tcg_out_qemu_st(s, args, 1);
> + tcg_out_qemu_st(s, args, 1, 0);
> break;
>
> OP_32_64(mulu2):
> @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = {
>
> #if TCG_TARGET_REG_BITS == 64
> { INDEX_op_qemu_ld_i32, { "r", "L" } },
> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
> { INDEX_op_qemu_st_i32, { "L", "L" } },
> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
> { INDEX_op_qemu_ld_i64, { "r", "L" } },
> { INDEX_op_qemu_st_i64, { "L", "L" } },
> #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
> { INDEX_op_qemu_ld_i32, { "r", "L" } },
> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
> { INDEX_op_qemu_st_i32, { "L", "L" } },
> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
> { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
> { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
> #else
> { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
> + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } },
> { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } },
> { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
> { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
> #endif
--
Alex Bennée
next prev parent reply other threads:[~2015-07-17 12:56 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-10 8:23 [Qemu-devel] [RFC v3 00/13] Slow-path for atomic instruction translation Alvise Rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 01/13] exec: Add new exclusive bitmap to ram_list Alvise Rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 02/13] cputlb: Add new TLB_EXCL flag Alvise Rigo
2015-07-16 14:32 ` Alex Bennée
2015-07-16 15:04 ` alvise rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 03/13] softmmu: Add helpers for a new slow-path Alvise Rigo
2015-07-16 14:53 ` Alex Bennée
2015-07-16 15:15 ` alvise rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 04/13] tcg-op: create new TCG qemu_ldlink and qemu_stcond instructions Alvise Rigo
2015-07-17 9:49 ` Alex Bennée
2015-07-17 10:05 ` alvise rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 05/13] target-arm: translate: implement qemu_ldlink and qemu_stcond ops Alvise Rigo
2015-07-17 12:51 ` Alex Bennée
2015-07-17 13:01 ` alvise rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 06/13] target-i386: " Alvise Rigo
2015-07-17 12:56 ` Alex Bennée [this message]
2015-07-17 13:27 ` alvise rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 07/13] ram_addr.h: Make exclusive bitmap accessors atomic Alvise Rigo
2015-07-17 13:32 ` Alex Bennée
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 08/13] exec.c: introduce a simple rendezvous support Alvise Rigo
2015-07-17 13:45 ` Alex Bennée
2015-07-17 13:54 ` alvise rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 09/13] cpus.c: introduce simple callback support Alvise Rigo
2015-07-10 9:36 ` Paolo Bonzini
2015-07-10 9:47 ` alvise rigo
2015-07-10 9:53 ` Frederic Konrad
2015-07-10 10:06 ` alvise rigo
2015-07-10 10:24 ` Paolo Bonzini
2015-07-10 12:16 ` Frederic Konrad
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 10/13] Simple TLB flush wrap to use as exit callback Alvise Rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 11/13] Introduce exit_flush_req and tcg_excl_access_lock Alvise Rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 12/13] softmmu_llsc_template.h: move to multithreading Alvise Rigo
2015-07-17 15:27 ` Alex Bennée
2015-07-17 15:31 ` alvise rigo
2015-07-10 8:23 ` [Qemu-devel] [RFC v3 13/13] softmmu_template.h: " Alvise Rigo
2015-07-17 15:57 ` Alex Bennée
2015-07-17 16:19 ` alvise rigo
2015-07-10 8:31 ` [Qemu-devel] [RFC v3 00/13] Slow-path for atomic instruction translation Mark Burton
2015-07-10 8:58 ` alvise rigo
2015-07-10 8:39 ` Frederic Konrad
2015-07-10 9:04 ` alvise rigo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87bnfb54et.fsf@linaro.org \
--to=alex.bennee@linaro.org \
--cc=a.rigo@virtualopensystems.com \
--cc=claudio.fontana@huawei.com \
--cc=jani.kokkonen@huawei.com \
--cc=mttcg@listserver.greensocs.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=tech@virtualopensystems.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).