qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: "Chen Gang" <xili_gchen_5257@hotmail.com>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Andreas Färber" <afaerber@suse.de>,
	"Chris Metcalf" <cmetcalf@ezchip.com>
Cc: "walt@tilera.com" <walt@tilera.com>,
	Riku Voipio <riku.voipio@iki.fi>,
	qemu-devel <qemu-devel@nongnu.org>
Subject: Re: [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructions to execute to _init_malloc in glib
Date: Mon, 11 May 2015 09:55:27 -0700	[thread overview]
Message-ID: <5550DEFF.8050204@twiddle.net> (raw)
In-Reply-To: <BLU436-SMTP63FD3A6167A47FDC4A3D91B9DC0@phx.gbl>

On 05/10/2015 03:45 PM, Chen Gang wrote:
> +static void gen_cmpltsi(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, int8_t imm8)
> +{
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltsi r%d, r%d, %d\n",
> +                  rdst, rsrc, imm8);
> +    tcg_gen_setcondi_i64(TCG_COND_LTU, dest_gr(dc, rdst), load_gr(dc, rsrc),
> +                         (int64_t)imm8);
> +}
> +
> +static void gen_cmpltui(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltui r%d, r%d, %d\n",
> +                  rdst, rsrc, imm8);
> +    tcg_gen_setcondi_i64(TCG_COND_LTU,
> +                         dest_gr(dc, rdst), load_gr(dc, rsrc), imm8);
> +}
> +
> +static void gen_cmpeqi(struct DisasContext *dc,
> +                       uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpeqi r%d, r%d, %d\n", rdst, rsrc, imm8);
> +    tcg_gen_setcondi_i64(TCG_COND_EQ,
> +                         dest_gr(dc, rdst), load_gr(dc, rsrc), imm8);
> +}

Merge these.

> +
> +static void gen_cmp(struct DisasContext *dc,
> +                    uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, TCGCond cond)
> +{
> +    const char *prefix;
> +
> +    switch (cond) {
> +    case TCG_COND_EQ:
> +        prefix = "eq";
> +        break;
> +    case TCG_COND_LE:
> +        prefix = "les";
> +        break;
> +    case TCG_COND_LEU:
> +        prefix = "leu";
> +        break;
> +    case TCG_COND_LT:
> +        prefix = "lts";
> +        break;
> +    case TCG_COND_LTU:
> +        prefix = "ltu";
> +        break;
> +    case TCG_COND_NE:
> +        prefix = "ne";
> +        break;
> +    default:
> +        dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN;
> +        return;
> +    }
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmp%s r%d, r%d, r%d\n",
> +                  prefix, rdst, rsrc, rsrcb);

Better to just pass down the opcode name with the TCGCond rather than trying to
recreate it.  Then there's no need for a switch, nor a need for a confusing
TILEGX_EXCP_OPCODE_UNKNOWN path.

> +static void gen_exch(struct DisasContext *dc,
> +                     uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int excp)
> +{
> +    const char *prefix, *width;
> +
> +    switch (excp) {
> +    case TILEGX_EXCP_OPCODE_EXCH4:
> +        prefix = "";
> +        width = "4";
> +        break;
> +    case TILEGX_EXCP_OPCODE_EXCH:
> +        prefix = "";
> +        width = "";
> +        break;
> +    case TILEGX_EXCP_OPCODE_CMPEXCH4:
> +        prefix = "cmp";
> +        width = "4";
> +        break;
> +    case TILEGX_EXCP_OPCODE_CMPEXCH:
> +        prefix = "cmp";
> +        width = "";
> +        break;
> +    default:
> +        dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN;
> +        return;
> +    }

Likewise.

> +static void gen_v1cmpeqi(struct DisasContext *dc,
> +                         uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> +    int count;
> +    TCGv vdst = dest_gr(dc, rdst);
> +    TCGv tmp = tcg_temp_new_i64();
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1cmpeqi r%d, r%d, %d\n",
> +                  rdst, rsrc, imm8);
> +
> +    tcg_gen_movi_i64(vdst, 0);
> +
> +    for (count = 0; count < 8; count++) {
> +        tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (8 - count - 1) * 8);
> +        tcg_gen_andi_i64(tmp, tmp, 0xff);
> +        tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8);
> +        tcg_gen_or_i64(vdst, vdst, tmp);
> +        tcg_gen_shli_i64(vdst, vdst, 8);

For all of these vector instructions, I would encourage you to use helpers to
extract and insert values.  Extraction has little choice but to use a shift and
a mask, as you use here.  But insertion can use tcg_gen_deposit_i64.  I think
that is a lot easier to reason with than your construction here which
sequentially shifts vdst.

E.g.

static inline void
extract_v1(TCGv out, TCGv in, unsigned byte)
{
  tcg_gen_shri_i64(out, in, byte * 8);
  tcg_gen_ext8u_i64(out, out);
}

static inline void
insert_v1(TCGv out, TCGv in, unsigned byte)
{
  tcg_gen_deposit_i64(out, out, in, byte * 8, 8);
}


This loop then becomes

	TCGv vsrc = load_gr(dc, src);
	for (count = 0; count < 8; ++count) {
	    extract_v1(tmp, vsrc, count);
	    tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8);
	    insert_v1(vdst, tmp, count);
	}

> +static void gen_v1int_l(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
> +{
> +    int count;
> +    TCGv vdst = dest_gr(dc, rdst);
> +    TCGv tmp = tcg_temp_new_i64();
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1int_l r%d, r%d, r%d\n",
> +                  rdst, rsrc, rsrcb);
> +
> +    tcg_gen_movi_i64(vdst, 0);
> +    for (count = 0; count < 4; count++) {
> +
> +        tcg_gen_shli_i64(vdst, vdst, 8);
> +
> +        tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (4 - count - 1) * 8);
> +        tcg_gen_andi_i64(tmp, tmp, 0xff);
> +        tcg_gen_or_i64(vdst, vdst, tmp);
> +        tcg_gen_shli_i64(vdst, vdst, 8);
> +
> +        tcg_gen_shri_i64(tmp, load_gr(dc, rsrcb), (4 - count - 1) * 8);
> +        tcg_gen_andi_i64(tmp, tmp, 0xff);
> +        tcg_gen_or_i64(vdst, vdst, tmp);
> +    }
> +    tcg_temp_free_i64(tmp);

	TCGv vsrc = load_gr(dc, rsrc);
	TCGv vsrcb = load_gr(dc, rsrcb);

	for (count = 0; count < 4; ++count) {
	    extract_v1(tmp, vsrc, count);
	    insert_v1(vdst, tmp, count * 2);
	    extract_v1(tmp, vsrcb, count);
	    insert_v1(vdst, tmp, count * 2 + 1);
	}


> +}
> +
> +/*
> + * Functional Description
> + *
> + *        uint64_t output = 0;
> + *        uint32_t counter;
> + *        for (counter = 0; counter < (WORD_SIZE / 32); counter++)
> + *        {
> + *            bool asel = ((counter & 1) == 1);
> + *            int in_sel = 0 + counter / 2;
> + *            int32_t srca = get4Byte (rf[SrcA], in_sel);
> + *            int32_t srcb = get4Byte (rf[SrcB], in_sel);
> + *            output = set4Byte (output, counter, (asel ? srca : srcb));
> + *        }
> + *        rf[Dest] = output;
> +*/
> +
> +static void gen_v4int_l(struct DisasContext *dc,
> +                        uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
> +{
> +    TCGv vdst = dest_gr(dc, rdst);
> +    TCGv tmp = tcg_temp_new_i64();
> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "v4int_l r%d, r%d, r%d\n",
> +                  rdst, rsrc, rsrcb);
> +
> +    tcg_gen_andi_i64(vdst, load_gr(dc, rsrc), 0xffffffff);
> +    tcg_gen_shli_i64(vdst, vdst, 8);
> +    tcg_gen_andi_i64(tmp, load_gr(dc, rsrcb), 0xffffffff);
> +    tcg_gen_or_i64(vdst, vdst, tmp);

And herein is a bug, that I'd hope using the helper functions would avoid: you
shift by 8 instead of 32.  This function simplifies to

	tcg_gen_deposit_i64(vdst, load_gr(dc, rsrc), load_gr(dc, rsrcb),
                            32, 32);

> +/*
> + * uint64_t mask = 0;
> + * int64_t background = ((rf[SrcA] >> BFEnd) & 1) ? -1ULL : 0ULL;
> + * mask = ((-1ULL) ^ ((-1ULL << ((BFEnd - BFStart) & 63)) << 1));
> + * uint64_t rot_src = (((uint64_t) rf[SrcA]) >> BFStart)
> + *                    | (rf[SrcA] << (64 - BFStart));
> + * rf[Dest] = (rot_src & mask) | (background & ~mask);
> + */
> +static void gen_bfexts(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc,
> +                       uint8_t start, uint8_t end)
> +{
> +    uint64_t mask = (-1ULL) ^ ((-1ULL << ((end - start) & 63)) << 1);
> +    TCGv vldst = tcg_temp_local_new_i64();
> +    TCGv tmp = tcg_temp_local_new_i64();
> +    TCGv pmsk = tcg_const_i64(-1ULL);

Why the local temps?

> +
> +    qemu_log_mask(CPU_LOG_TB_IN_ASM, "bfexts r%d, r%d, %d, %d\n",
> +                  rdst, rsrc, start, end);
> +
> +    tcg_gen_rotri_i64(vldst, load_gr(dc, rsrc), start);
> +    tcg_gen_andi_i64(vldst, vldst, mask);
> +
> +    tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), end);
> +    tcg_gen_andi_i64(tmp, tmp, 1);
> +    tcg_gen_movcond_i64(TCG_COND_EQ, tmp, tmp, load_zero(dc),
> +                        load_zero(dc), pmsk);

This movcond is equivalent to negation.

> +/*
> +   Functional Description
> +       uint64_t a = rf[SrcA];
> +       uint64_t b = rf[SrcB];
> +       uint64_t d = rf[Dest];
> +       uint64_t output = 0;
> +       unsigned int counter;
> +       for (counter = 0; counter < (WORD_SIZE / BYTE_SIZE); counter++)
> +       {
> +           int sel = getByte (b, counter) & 0xf;
> +           uint8_t byte = (sel < 8) ? getByte (d, sel) : getByte (a, (sel - 8));
> +           output = setByte (output, counter, byte);
> +       }
> +       rf[Dest] = output;
> + */
> +static void gen_shufflebytes(struct DisasContext *dc,
> +                             uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)

I strongly suggest this be moved to op_helper.c.  It's too big.

> +/* FIXME: At present, skip unalignment checking */
> +static void gen_ld(struct DisasContext *dc,
> +                   uint8_t rdst, uint8_t rsrc, TCGMemOp ops)

Alignment checks would never be done here anyway.
Again, pass down the opcode string rather than rebuild.


r~

  reply	other threads:[~2015-05-11 16:55 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-10 22:36 [Qemu-devel] [PATCH 00/10 v10] tilegx: Firstly add tilegx target for linux-user Chen Gang
2015-05-10 22:38 ` [Qemu-devel] [PATCH 01/10 v10] linux-user: tilegx: Firstly add architecture related features Chen Gang
2015-05-10 22:39 ` [Qemu-devel] [PATCH 02/10 v10] linux-user: Support tilegx architecture in linux-user Chen Gang
2015-05-10 22:40 ` [Qemu-devel] [PATCH 03/10 v10] linux-user/syscall.c: Conditionalize syscalls which are not defined in tilegx Chen Gang
2015-05-10 22:41 ` [Qemu-devel] [PATCH 04/10 v10] target-tilegx: Add opcode basic implementation from Tilera Corporation Chen Gang
2015-05-10 22:43 ` [Qemu-devel] [PATCH 06/10 v10] target-tilegx: Add special register information " Chen Gang
2015-05-10 22:44 ` [Qemu-devel] [PATCH 07/10 v10] target-tilegx: Add cpu basic features for linux-user Chen Gang
2015-05-10 22:44 ` [Qemu-devel] [PATCH 08/10 v10] target-tilegx: Add helper " Chen Gang
2015-05-10 22:45 ` [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructions to execute to _init_malloc in glib Chen Gang
2015-05-11 16:55   ` Richard Henderson [this message]
2015-05-11 21:26     ` Chen Gang
2015-05-26 21:39       ` Chen Gang
2015-06-01 20:58         ` Chen Gang
2015-05-14 14:56     ` Chen Gang
2015-05-14 15:08     ` Chen Gang
2015-05-14 16:05     ` Chen Gang
2015-05-15  2:31       ` Chen Gang
2015-05-29 19:29     ` Chen Gang
2015-05-10 22:46 ` [Qemu-devel] [PATCH 10/10 v10] target-tilegx: Add TILE-Gx building files Chen Gang
     [not found] ` <BLU437-SMTP59B35F884A72A991334DBB9DC0@phx.gbl>
2015-05-11 16:01   ` [Qemu-devel] [PATCH 05/10 v10] target-tilegx/opcode_tilegx.h: Modify it to fit qemu using Richard Henderson
2015-05-11 21:06     ` Chen Gang
2015-05-11 22:06       ` Richard Henderson
2015-05-12  0:43         ` gchen gchen
2015-05-12 10:56           ` Chen Gang
2015-05-12 11:08             ` Peter Maydell
2015-05-12 11:16               ` Chen Gang
2015-05-19  2:47                 ` Chen Gang
2015-05-21 20:59                   ` Chen Gang
2015-05-21 23:40                     ` Chris Metcalf
2015-05-22  1:48                       ` Chen Gang
2015-05-24 22:03                         ` Chen Gang
2015-05-25 15:13                           ` Chen Gang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5550DEFF.8050204@twiddle.net \
    --to=rth@twiddle.net \
    --cc=afaerber@suse.de \
    --cc=cmetcalf@ezchip.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=riku.voipio@iki.fi \
    --cc=walt@tilera.com \
    --cc=xili_gchen_5257@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).