From: Richard Henderson <richard.henderson@linaro.org>
To: "Frédéric Pétrot" <frederic.petrot@univ-grenoble-alpes.fr>,
qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: philmd@redhat.com, bin.meng@windriver.com,
alistair.francis@wdc.com, palmer@dabbelt.com,
fabien.portas@grenoble-inp.org
Subject: Re: [PATCH v4 12/17] target/riscv: support for 128-bit arithmetic instructions
Date: Tue, 2 Nov 2021 08:43:28 -0400 [thread overview]
Message-ID: <c010789f-de86-f71f-2f4d-34ba4a6aa1b7@linaro.org> (raw)
In-Reply-To: <20211025122818.168890-13-frederic.petrot@univ-grenoble-alpes.fr>
On 10/25/21 5:28 AM, Frédéric Pétrot wrote:
> + if (get_xl(ctx) < MXL_RV128 || get_ol(ctx) < MXL_RV128) {
Only the second test. Two times.
> +static bool gen_setcond_i128(TCGv rl, TCGv rh,
> + TCGv al, TCGv ah,
> + TCGv bl, TCGv bh,
> + TCGCond cond)
> +{
> + switch (cond) {
> + case TCG_COND_EQ:
> + tcg_gen_xor_tl(rl, al, bl);
> + tcg_gen_xor_tl(rh, ah, bh);
> + tcg_gen_or_tl(rh, rl, rh);
> + tcg_gen_setcondi_tl(TCG_COND_EQ, rl, rh, 0);
> + break;
Indentation.
> +
> + case TCG_COND_NE:
> + tcg_gen_xor_tl(rl, al, bl);
> + tcg_gen_xor_tl(rh, ah, bh);
> + tcg_gen_or_tl(rh, rl, rh);
> + tcg_gen_setcondi_tl(TCG_COND_NE, rl, rh, 0);
> + break;
> +
> + case TCG_COND_LT:
> + {
> + TCGv tmp1 = tcg_temp_new();
> + TCGv tmp2 = tcg_temp_new();
> +
> + tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
> + tcg_gen_xor_tl(tmp1, rh, ah);
> + tcg_gen_xor_tl(tmp2, ah, bh);
> + tcg_gen_and_tl(tmp1, tmp1, tmp2);
> + tcg_gen_xor_tl(tmp1, rh, tmp1);
> + tcg_gen_shri_tl(rl, tmp1, 63);
> +
> + tcg_temp_free(tmp1);
> + tcg_temp_free(tmp2);
> + break;
> + }
> +
> + case TCG_COND_GE:
> + /* Invert result of TCG_COND_LT */
> + gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_LT);
> + tcg_gen_xori_tl(rl, rl, 1);
> + break;
> +
> + case TCG_COND_LTU:
> + gen_setcond_i128(rl, rh, al, ah, bl, bh, TCG_COND_GEU);
> + tcg_gen_xori_tl(rl, rl, 1);
> + break;
> +
> + case TCG_COND_GEU:
> + {
> + TCGv tmp1 = tcg_temp_new();
> + TCGv tmp2 = tcg_temp_new();
> + TCGv zero = tcg_constant_tl(0);
> + TCGv one = tcg_constant_tl(1);
> + /* borrow in to second word */
> + tcg_gen_setcond_tl(TCG_COND_LTU, tmp1, al, bl);
> + /* seed third word with 1, which will be result */
> + tcg_gen_sub2_tl(tmp1, tmp2, ah, one, tmp1, zero);
> + tcg_gen_sub2_tl(tmp1, rl, tmp1, tmp2, bh, zero);
> +
> + tcg_temp_free(tmp1);
> + tcg_temp_free(tmp2);
> + break;
> + }
> +
> + default:
> + return false;
This should be g_assert_not_reached(), not return false.
> + }
> + tcg_gen_movi_tl(rh, 0);
> + return true;
> +}
Which begs the question of what the return value is for when you don't even use it below.
I think we should treat this as more of a subroutine, and return the final TCGCond
required to get the correct result, *without* actually computing the final setcond.
static TCGCond compare_128i(TCGv rl, int rs1, int rs2, TCGCond cond)
{
TCGv al = get_gpr(ctx, rs1, EXT_SIGN);
TCGv bl = get_gpr(ctx, rs2, EXT_SIGN);
TCGv ah = get_gprh(ctx, a->rs1);
TCGv bh = get_gprh(ctx, a->rs2);
TCGv rh = tcg_temp_new();
bool invert = false;
switch (cond) {
case TCG_COND_EQ:
case TCG_COND_NE:
if (rs2 == 0) {
tcg_gen_or_tl(rl, al, ah);
} else {
tcg_gen_xor_tl(rl, al, bl);
tcg_gen_xor_tl(rh, ah, bh);
tcg_gen_or_tl(rl, rl, rh);
}
break;
case TCG_COND_GE:
invert = true;
cond = TCG_COND_LT;
/* fall through */
case TCG_COND_LT:
if (rs2 == 0) {
tcg_gen_mov_tl(rl, ah);
} else {
TCGv tmp1 = tcg_temp_new();
TCGv tmp2 = tcg_temp_new();
tcg_gen_sub2_tl(rl, rh, al, ah, bl, bh);
tcg_gen_xor_tl(tmp1, rh, ah);
tcg_gen_xor_tl(tmp2, ah, bh);
tcg_gen_and_tl(tmp1, tmp1, tmp2);
tcg_gen_xor_tl(rl, rh, tmp1);
tcg_temp_free(tmp1);
tcg_temp_free(tmp2);
}
break;
...
}
tcg_temp_free(rh);
if (invert) {
cond = tcg_invert_cond(cond);
}
return cond;
}
static void setcond_128i(...)
{
cond = compare_128i(rl, rh, al, ah, bl, bh, cond);
tcg_gen_setcondi_tl(cond, rl, rl, 0);
tcg_gen_movi_tl(rh, 0);
}
static void branch_128i(...)
{
TCGv tmpl = tcg_temp_new();
TCGv tmph = tcg_temp_new();
cond = compare_128i(tmpl, tmph, al, ah, bl, bh, cond);
tcg_gen_brcondi_tl(cond, tmpl, 0, label);
tcg_temp_free(tmpl);
tcg_temp_free(tmph);
}
> +
> static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
> {
> TCGLabel *l = gen_new_label();
> TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
> TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
>
> - tcg_gen_brcond_tl(cond, src1, src2, l);
> + if (get_xl(ctx) == MXL_RV128) {
> + TCGv src1h = get_gprh(ctx, a->rs1);
> + TCGv src2h = get_gprh(ctx, a->rs2);
> + TCGv tmpl = tcg_temp_new();
> + TCGv tmph = tcg_temp_new();
> +
> + /*
> + * Do not use gen_setcond_i128 for EQ and NE as these conditions are
> + * often met and can be more efficiently implemented.
> + * Some comparisons with zero are also simpler, let's do them.
> + */
> + if (cond == TCG_COND_EQ || cond == TCG_COND_NE) {
> + /*
> + * bnez and beqz being used quite often too, lets optimize them,
> + * although QEMU's tcg optimizer handles these cases nicely
> + */
> + if (a->rs2 == 0) {
> + tcg_gen_or_tl(tmpl, src1, src1h);
> + tcg_gen_brcondi_tl(cond, tmpl, 0, l);
> + } else {
> + tcg_gen_xor_tl(tmpl, src1, src2);
> + tcg_gen_xor_tl(tmph, src1h, src2h);
> + tcg_gen_or_tl(tmpl, tmpl, tmph);
> + tcg_gen_brcondi_tl(cond, tmpl, 0, l);
> + }
> + } else if (a->rs2 == 0
> + && (cond == TCG_COND_LT || cond == TCG_COND_GE)) {
> + tcg_gen_shri_tl(tmpl, src1h, 63);
> + /* hack: transform LT in EQ and GE in NE */
> + tcg_gen_brcondi_tl((cond & 13) | 8, tmpl, 1, l);
> + } else {
> + if (cond == TCG_COND_GE || cond == TCG_COND_LTU) {
> + gen_setcond_i128(tmpl, tmph, src1, src1h, src2, src2h,
> + tcg_invert_cond(cond));
> + tcg_gen_brcondi_tl(TCG_COND_EQ, tmpl, 0, l);
> + } else {
> + gen_setcond_i128(tmpl, tmph, src1, src1h, src2, src2h, cond);
> + tcg_gen_brcondi_tl(TCG_COND_NE, tmpl, 0, l);
> + }
> + }
Which then takes care of all of this.
BTW, the hack was quite a hack, and quite unnecessary. You could have dropped the shift
and performed the LT/GE brcond directly on src1h.
r~
next prev parent reply other threads:[~2021-11-02 12:55 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-25 12:28 [PATCH v4 00/17] Adding partial support for 128-bit riscv target Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 01/17] exec/memop: Rename MO_Q definition as MO_UQ and add MO_UO Frédéric Pétrot
2021-10-25 20:09 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 02/17] qemu/int128: addition of a few 128-bit operations Frédéric Pétrot
2021-10-25 15:47 ` Philippe Mathieu-Daudé
2021-10-25 20:16 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 03/17] target/riscv: additional macros to check instruction support Frédéric Pétrot
2021-10-30 23:49 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 04/17] target/riscv: separation of bitwise logic and aritmetic helpers Frédéric Pétrot
2021-10-25 15:51 ` Philippe Mathieu-Daudé
2021-10-25 19:08 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 05/17] target/riscv: array for the 64 upper bits of 128-bit registers Frédéric Pétrot
2021-10-25 15:55 ` Philippe Mathieu-Daudé
2021-10-25 19:10 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 06/17] target/riscv: setup everything so that riscv128-softmmu compiles Frédéric Pétrot
2021-10-30 23:52 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 07/17] target/riscv: moving some insns close to similar insns Frédéric Pétrot
2021-10-25 15:56 ` Philippe Mathieu-Daudé
2021-10-25 12:28 ` [PATCH v4 08/17] target/riscv: accessors to registers upper part and 128-bit load/store Frédéric Pétrot
2021-10-31 3:41 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 09/17] target/riscv: support for 128-bit bitwise instructions Frédéric Pétrot
2021-10-31 3:44 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 10/17] target/riscv: support for 128-bit U-type instructions Frédéric Pétrot
2021-10-31 3:49 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 11/17] target/riscv: support for 128-bit shift instructions Frédéric Pétrot
2021-10-31 4:03 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 12/17] target/riscv: support for 128-bit arithmetic instructions Frédéric Pétrot
2021-11-02 12:43 ` Richard Henderson [this message]
2021-10-25 12:28 ` [PATCH v4 13/17] target/riscv: support for 128-bit M extension Frédéric Pétrot
2021-11-02 13:05 ` Richard Henderson
2021-10-25 12:28 ` [PATCH v4 14/17] target/riscv: adding high part of some csrs Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 15/17] target/riscv: helper functions to wrap calls to 128-bit csr insns Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 16/17] target/riscv: modification of the trans_csrxx for 128-bit support Frédéric Pétrot
2021-10-25 12:28 ` [PATCH v4 17/17] target/riscv: actual functions to realize crs 128-bit insns Frédéric Pétrot
2021-11-02 13:22 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c010789f-de86-f71f-2f4d-34ba4a6aa1b7@linaro.org \
--to=richard.henderson@linaro.org \
--cc=alistair.francis@wdc.com \
--cc=bin.meng@windriver.com \
--cc=fabien.portas@grenoble-inp.org \
--cc=frederic.petrot@univ-grenoble-alpes.fr \
--cc=palmer@dabbelt.com \
--cc=philmd@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-riscv@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).