Re: [PATCH v4 12/17] target/riscv: support for 128-bit arithmetic instructions

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <richard.henderson@linaro.org>
To: "Frédéric Pétrot" <frederic.petrot@univ-grenoble-alpes.fr>,
	qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: philmd@redhat.com, bin.meng@windriver.com,
	alistair.francis@wdc.com, palmer@dabbelt.com,
	fabien.portas@grenoble-inp.org
Subject: Re: [PATCH v4 12/17] target/riscv: support for 128-bit arithmetic instructions
Date: Tue, 2 Nov 2021 08:43:28 -0400	[thread overview]
Message-ID: <c010789f-de86-f71f-2f4d-34ba4a6aa1b7@linaro.org> (raw)
In-Reply-To: <20211025122818.168890-13-frederic.petrot@univ-grenoble-alpes.fr>

On 10/25/21 5:28 AM, Frédéric Pétrot wrote:
> +    if (get_xl(ctx) < MXL_RV128 || get_ol(ctx) < MXL_RV128) {

Only the second test.  Two times.

> +static bool gen_setcond_i128(TCGv rl, TCGv rh,
> +                             TCGv al, TCGv ah,
> +                             TCGv bl, TCGv bh,
> +                             TCGCond cond)
> +{
> +    switch (cond) {
> +    case TCG_COND_EQ:
> +        tcg_gen_xor_tl(rl, al, bl);
> +        tcg_gen_xor_tl(rh, ah, bh);
> +        tcg_gen_or_tl(rh, rl, rh);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, rl, rh, 0);
> +    break;

Indentation.

> +
> +    case TCG_COND_NE:
> +        tcg_gen_xor_tl(rl, al, bl);
> +        tcg_gen_xor_tl(rh, ah, bh);
> +        tcg_gen_or_tl(rh, rl, rh);
> +        tcg_gen_setcondi_tl(TCG_COND_NE, rl, rh, 0);
> +        break;
> +
> +    case TCG_COND_LT:
> +    {
> +        TCGv tmp1 = tcg_temp_new();
> +        TCGv tmp2 = tcg_temp_new();
> +
> +        tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
> +        tcg_gen_xor_tl(tmp1, rh, ah);
> +        tcg_gen_xor_tl(tmp2, ah, bh);
> +        tcg_gen_and_tl(tmp1, tmp1, tmp2);
> +        tcg_gen_xor_tl(tmp1, rh, tmp1);
> +        tcg_gen_shri_tl(rl, tmp1, 63);
> +
> +        tcg_temp_free(tmp1);
> +        tcg_temp_free(tmp2);
> +        break;
> +    }
> +
> +    case TCG_COND_GE:
> +        /* Invert result of TCG_COND_LT */
> +        gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_LT);
> +        tcg_gen_xori_tl(rl, rl, 1);
> +        break;
> +
> +    case TCG_COND_LTU:
> +        gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_GEU);
> +        tcg_gen_xori_tl(rl, rl, 1);
> +        break;
> +
> +    case TCG_COND_GEU:
> +    {
> +        TCGv tmp1 = tcg_temp_new();
> +        TCGv tmp2 = tcg_temp_new();
> +        TCGv zero = tcg_constant_tl(0);
> +        TCGv one = tcg_constant_tl(1);
> +        /* borrow in to second word */
> +        tcg_gen_setcond_tl(TCG_COND_LTU, tmp1, al, bl);
> +        /* seed third word with 1, which will be result */
> +        tcg_gen_sub2_tl(tmp1, tmp2, ah, one, tmp1, zero);
> +        tcg_gen_sub2_tl(tmp1, rl, tmp1, tmp2, bh, zero);
> +
> +        tcg_temp_free(tmp1);
> +        tcg_temp_free(tmp2);
> +        break;
> +    }
> +
> +    default:
> +        return false;

This should be g_assert_not_reached(), not return false.

> +    }
> +    tcg_gen_movi_tl(rh, 0);
> +    return true;
> +}

Which begs the question of what the return value is for when you don't even use it below.

I think we should treat this as more of a subroutine, and return the final TCGCond 
required to get the correct result, *without* actually computing the final setcond.

static TCGCond compare_128i(TCGv rl, int rs1, int rs2, TCGCond cond)
{
     TCGv al = get_gpr(ctx, rs1, EXT_SIGN);
     TCGv bl = get_gpr(ctx, rs2, EXT_SIGN);
     TCGv ah = get_gprh(ctx, a->rs1);
     TCGv bh = get_gprh(ctx, a->rs2);
     TCGv rh = tcg_temp_new();
     bool invert = false;

     switch (cond) {
     case TCG_COND_EQ:
     case TCG_COND_NE:
         if (rs2 == 0) {
             tcg_gen_or_tl(rl, al, ah);
         } else {
             tcg_gen_xor_tl(rl, al, bl);
             tcg_gen_xor_tl(rh, ah, bh);
             tcg_gen_or_tl(rl, rl, rh);
         }
         break;

     case TCG_COND_GE:
         invert = true;
         cond = TCG_COND_LT;
         /* fall through */
     case TCG_COND_LT:
         if (rs2 == 0) {
             tcg_gen_mov_tl(rl, ah);
         } else {
             TCGv tmp1 = tcg_temp_new();
             TCGv tmp2 = tcg_temp_new();
             tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
             tcg_gen_xor_tl(tmp1, rh, ah);
             tcg_gen_xor_tl(tmp2, ah, bh);
             tcg_gen_and_tl(tmp1, tmp1, tmp2);
             tcg_gen_xor_tl(rl, rh, tmp1);
             tcg_temp_free(tmp1);
             tcg_temp_free(tmp2);
         }
         break;
     ...
     }
     tcg_temp_free(rh);

     if (invert) {
         cond = tcg_invert_cond(cond);
     }
     return cond;
}

static void setcond_128i(...)
{
     cond = compare_128i(rl, rh, al, ah, bl, bh, cond);
     tcg_gen_setcondi_tl(cond, rl, rl, 0);
     tcg_gen_movi_tl(rh, 0);
}

static void branch_128i(...)
{
     TCGv tmpl = tcg_temp_new();
     TCGv tmph = tcg_temp_new();

     cond = compare_128i(tmpl, tmph, al, ah, bl, bh, cond);
     tcg_gen_brcondi_tl(cond, tmpl, 0, label);

     tcg_temp_free(tmpl);
     tcg_temp_free(tmph);
}

> +
>   static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
>   {
>       TCGLabel *l = gen_new_label();
>       TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
>       TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>   
> -    tcg_gen_brcond_tl(cond, src1, src2, l);
> +    if (get_xl(ctx) == MXL_RV128) {
> +        TCGv src1h = get_gprh(ctx, a->rs1);
> +        TCGv src2h = get_gprh(ctx, a->rs2);
> +        TCGv tmpl = tcg_temp_new();
> +        TCGv tmph = tcg_temp_new();
> +
> +        /*
> +         * Do not use gen_setcond_i128 for EQ and NE as these conditions are
> +         * often met and can be more efficiently implemented.
> +         * Some comparisons with zero are also simpler, let's do them.
> +         */
> +        if (cond == TCG_COND_EQ || cond == TCG_COND_NE) {
> +            /*
> +             * bnez and beqz being used quite often too, lets optimize them,
> +             * although QEMU's tcg optimizer handles these cases nicely
> +             */
> +            if (a->rs2 == 0) {
> +                tcg_gen_or_tl(tmpl, src1, src1h);
> +                tcg_gen_brcondi_tl(cond, tmpl, 0, l);
> +            } else {
> +                tcg_gen_xor_tl(tmpl, src1, src2);
> +                tcg_gen_xor_tl(tmph, src1h, src2h);
> +                tcg_gen_or_tl(tmpl, tmpl, tmph);
> +                tcg_gen_brcondi_tl(cond, tmpl, 0, l);
> +            }
> +        } else if (a->rs2 == 0
> +                   && (cond == TCG_COND_LT || cond == TCG_COND_GE)) {
> +            tcg_gen_shri_tl(tmpl, src1h, 63);
> +            /* hack: transform LT in EQ and GE in NE */
> +            tcg_gen_brcondi_tl((cond & 13) | 8, tmpl, 1, l);
> +        } else {
> +            if (cond == TCG_COND_GE || cond == TCG_COND_LTU) {
> +                gen_setcond_i128(tmpl, tmph, src1, src1h, src2, src2h,
> +                                 tcg_invert_cond(cond));
> +                tcg_gen_brcondi_tl(TCG_COND_EQ, tmpl, 0, l);
> +            } else {
> +                gen_setcond_i128(tmpl, tmph, src1, src1h, src2, src2h, cond);
> +                tcg_gen_brcondi_tl(TCG_COND_NE, tmpl, 0, l);
> +            }
> +        }

Which then takes care of all of this.

BTW, the hack was quite a hack, and quite unnecessary.  You could have dropped the shift 
and performed the LT/GE brcond directly on src1h.


r~

next prev parent reply	other threads:[~2021-11-02 12:55 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-25 12:28 [PATCH v4 00/17] Adding partial support for 128-bit riscv target Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 01/17] exec/memop: Rename MO_Q definition as MO_UQ and add MO_UO Frédéric Pétrot
2021-10-25 20:09   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 02/17] qemu/int128: addition of a few 128-bit operations Frédéric Pétrot
2021-10-25 15:47   ` Philippe Mathieu-Daudé
2021-10-25 20:16     ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 03/17] target/riscv: additional macros to check instruction support Frédéric Pétrot
2021-10-30 23:49   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 04/17] target/riscv: separation of bitwise logic and aritmetic helpers Frédéric Pétrot
2021-10-25 15:51   ` Philippe Mathieu-Daudé
2021-10-25 19:08   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 05/17] target/riscv: array for the 64 upper bits of 128-bit registers Frédéric Pétrot
2021-10-25 15:55   ` Philippe Mathieu-Daudé
2021-10-25 19:10   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 06/17] target/riscv: setup everything so that riscv128-softmmu compiles Frédéric Pétrot
2021-10-30 23:52   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 07/17] target/riscv: moving some insns close to similar insns Frédéric Pétrot
2021-10-25 15:56   ` Philippe Mathieu-Daudé
2021-10-25 12:28 ` [PATCH v4 08/17] target/riscv: accessors to registers upper part and 128-bit load/store Frédéric Pétrot
2021-10-31  3:41   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 09/17] target/riscv: support for 128-bit bitwise instructions Frédéric Pétrot
2021-10-31  3:44   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 10/17] target/riscv: support for 128-bit U-type instructions Frédéric Pétrot
2021-10-31  3:49   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 11/17] target/riscv: support for 128-bit shift instructions Frédéric Pétrot
2021-10-31  4:03   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 12/17] target/riscv: support for 128-bit arithmetic instructions Frédéric Pétrot
2021-11-02 12:43   ` Richard Henderson [this message]
2021-10-25 12:28 ` [PATCH v4 13/17] target/riscv: support for 128-bit M extension Frédéric Pétrot
2021-11-02 13:05   ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 14/17] target/riscv: adding high part of some csrs Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 15/17] target/riscv: helper functions to wrap calls to 128-bit csr insns Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 16/17] target/riscv: modification of the trans_csrxx for 128-bit support Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 17/17] target/riscv: actual functions to realize crs 128-bit insns Frédéric Pétrot
2021-11-02 13:22   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c010789f-de86-f71f-2f4d-34ba4a6aa1b7@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alistair.francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=fabien.portas@grenoble-inp.org \
    --cc=frederic.petrot@univ-grenoble-alpes.fr \
    --cc=palmer@dabbelt.com \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).