* [PATCH 1/4] target/s390x: Improve cc computation for ADD LOGICAL
2020-10-17 2:28 [PATCH 0/4] target/s390x: Improve carry computation Richard Henderson
@ 2020-10-17 2:28 ` Richard Henderson
2020-10-20 14:08 ` David Hildenbrand
2020-10-17 2:28 ` [PATCH 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY Richard Henderson
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2020-10-17 2:28 UTC (permalink / raw)
To: qemu-devel; +Cc: david
The resulting cc is only dependent on the result and the
carry-out. So save those things rather than the inputs.
Carry-out for 64-bit inputs is had via tcg_gen_add2_i64 directly
into cc_src. Carry-out for 32-bit inputs is had via extraction
from a normal 64-bit add (with zero-extended inputs).
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 4 +-
target/s390x/cc_helper.c | 25 ++++-----
target/s390x/helper.c | 3 +-
target/s390x/translate.c | 103 ++++++++++++++++++++++++-------------
target/s390x/insn-data.def | 36 ++++++-------
5 files changed, 97 insertions(+), 74 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 64602660ae..55c5442102 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -160,6 +160,8 @@ enum cc_op {
CC_OP_STATIC, /* CC value is env->cc_op */
CC_OP_NZ, /* env->cc_dst != 0 */
+ CC_OP_ADDU, /* dst != 0, src = carry out (0,1) */
+
CC_OP_LTGT_32, /* signed less/greater than (32bit) */
CC_OP_LTGT_64, /* signed less/greater than (64bit) */
CC_OP_LTUGTU_32, /* unsigned less/greater than (32bit) */
@@ -168,7 +170,6 @@ enum cc_op {
CC_OP_LTGT0_64, /* signed less/greater than 0 (64bit) */
CC_OP_ADD_64, /* overflow on add (64bit) */
- CC_OP_ADDU_64, /* overflow on unsigned add (64bit) */
CC_OP_ADDC_64, /* overflow on unsigned add-carry (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
@@ -178,7 +179,6 @@ enum cc_op {
CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
CC_OP_ADD_32, /* overflow on add (32bit) */
- CC_OP_ADDU_32, /* overflow on unsigned add (32bit) */
CC_OP_ADDC_32, /* overflow on unsigned add-carry (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index 5432aeeed4..59da4d1cc2 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -123,6 +123,12 @@ static uint32_t cc_calc_nz(uint64_t dst)
return !!dst;
}
+static uint32_t cc_calc_addu(uint64_t carry_out, uint64_t result)
+{
+ g_assert(carry_out <= 1);
+ return (result != 0) + 2 * carry_out;
+}
+
static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
{
if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
@@ -138,11 +144,6 @@ static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_addu_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- return (ar != 0) + 2 * (ar < a1);
-}
-
static uint32_t cc_calc_addc_64(uint64_t a1, uint64_t a2, uint64_t ar)
{
/* Recover a2 + carry_in. */
@@ -239,11 +240,6 @@ static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_addu_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- return (ar != 0) + 2 * (ar < a1);
-}
-
static uint32_t cc_calc_addc_32(uint32_t a1, uint32_t a2, uint32_t ar)
{
/* Recover a2 + carry_in. */
@@ -483,12 +479,12 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_NZ:
r = cc_calc_nz(dst);
break;
+ case CC_OP_ADDU:
+ r = cc_calc_addu(src, dst);
+ break;
case CC_OP_ADD_64:
r = cc_calc_add_64(src, dst, vr);
break;
- case CC_OP_ADDU_64:
- r = cc_calc_addu_64(src, dst, vr);
- break;
case CC_OP_ADDC_64:
r = cc_calc_addc_64(src, dst, vr);
break;
@@ -517,9 +513,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADD_32:
r = cc_calc_add_32(src, dst, vr);
break;
- case CC_OP_ADDU_32:
- r = cc_calc_addu_32(src, dst, vr);
- break;
case CC_OP_ADDC_32:
r = cc_calc_addc_32(src, dst, vr);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index b877690845..db87a62a57 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -395,6 +395,7 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_DYNAMIC] = "CC_OP_DYNAMIC",
[CC_OP_STATIC] = "CC_OP_STATIC",
[CC_OP_NZ] = "CC_OP_NZ",
+ [CC_OP_ADDU] = "CC_OP_ADDU",
[CC_OP_LTGT_32] = "CC_OP_LTGT_32",
[CC_OP_LTGT_64] = "CC_OP_LTGT_64",
[CC_OP_LTUGTU_32] = "CC_OP_LTUGTU_32",
@@ -402,7 +403,6 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_32] = "CC_OP_LTGT0_32",
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
- [CC_OP_ADDU_64] = "CC_OP_ADDU_64",
[CC_OP_ADDC_64] = "CC_OP_ADDC_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
[CC_OP_SUBU_64] = "CC_OP_SUBU_64",
@@ -410,7 +410,6 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
- [CC_OP_ADDU_32] = "CC_OP_ADDU_32",
[CC_OP_ADDC_32] = "CC_OP_ADDC_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
[CC_OP_SUBU_32] = "CC_OP_SUBU_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index ac10f42f10..9bf4c14f66 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -600,13 +600,11 @@ static void gen_op_calc_cc(DisasContext *s)
dummy = tcg_const_i64(0);
/* FALLTHRU */
case CC_OP_ADD_64:
- case CC_OP_ADDU_64:
case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDU_32:
case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
@@ -650,6 +648,7 @@ static void gen_op_calc_cc(DisasContext *s)
/* 1 argument */
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, dummy, cc_dst, dummy);
break;
+ case CC_OP_ADDU:
case CC_OP_ICM:
case CC_OP_LTGT_32:
case CC_OP_LTGT_64:
@@ -666,13 +665,11 @@ static void gen_op_calc_cc(DisasContext *s)
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
break;
case CC_OP_ADD_64:
- case CC_OP_ADDU_64:
case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDU_32:
case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
@@ -849,20 +846,19 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
account_inline_branch(s, old_cc_op);
break;
- case CC_OP_ADDU_32:
- case CC_OP_ADDU_64:
+ case CC_OP_ADDU:
switch (mask) {
- case 8 | 2: /* vr == 0 */
+ case 8 | 2: /* result == 0 */
cond = TCG_COND_EQ;
break;
- case 4 | 1: /* vr != 0 */
+ case 4 | 1: /* result != 0 */
cond = TCG_COND_NE;
break;
- case 8 | 4: /* no carry -> vr >= src */
- cond = TCG_COND_GEU;
+ case 8 | 4: /* no carry */
+ cond = TCG_COND_EQ;
break;
- case 2 | 1: /* carry -> vr < src */
- cond = TCG_COND_LTU;
+ case 2 | 1: /* carry */
+ cond = TCG_COND_NE;
break;
default:
goto do_dynamic;
@@ -950,26 +946,21 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst);
break;
- case CC_OP_ADDU_32:
- c->is_64 = false;
- c->u.s32.a = tcg_temp_new_i32();
- c->u.s32.b = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(c->u.s32.a, cc_vr);
- if (cond == TCG_COND_EQ || cond == TCG_COND_NE) {
- tcg_gen_movi_i32(c->u.s32.b, 0);
- } else {
- tcg_gen_extrl_i64_i32(c->u.s32.b, cc_src);
- }
- break;
-
- case CC_OP_ADDU_64:
- c->u.s64.a = cc_vr;
+ case CC_OP_ADDU:
+ c->is_64 = true;
+ c->u.s64.b = tcg_const_i64(0);
c->g1 = true;
- if (cond == TCG_COND_EQ || cond == TCG_COND_NE) {
- c->u.s64.b = tcg_const_i64(0);
- } else {
- c->u.s64.b = cc_src;
- c->g2 = true;
+ switch (mask) {
+ case 8 | 2:
+ case 4 | 1: /* result */
+ c->u.s64.a = cc_dst;
+ break;
+ case 8 | 4:
+ case 2 | 1: /* carry */
+ c->u.s64.a = cc_src;
+ break;
+ default:
+ g_assert_not_reached();
}
break;
@@ -1444,6 +1435,13 @@ static DisasJumpType op_add(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
+static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
+{
+ tcg_gen_movi_i64(cc_src, 0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+ return DISAS_NEXT;
+}
+
static DisasJumpType op_addc(DisasContext *s, DisasOps *o)
{
DisasCompare cmp;
@@ -1473,9 +1471,10 @@ static DisasJumpType op_addc(DisasContext *s, DisasOps *o)
static DisasJumpType op_asi(DisasContext *s, DisasOps *o)
{
- o->in1 = tcg_temp_new_i64();
+ bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
- if (!s390_has_feat(S390_FEAT_STFLE_45)) {
+ o->in1 = tcg_temp_new_i64();
+ if (non_atomic) {
tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
} else {
/* Perform the atomic addition in memory. */
@@ -1486,7 +1485,30 @@ static DisasJumpType op_asi(DisasContext *s, DisasOps *o)
/* Recompute also for atomic case: needed for setting CC. */
tcg_gen_add_i64(o->out, o->in1, o->in2);
- if (!s390_has_feat(S390_FEAT_STFLE_45)) {
+ if (non_atomic) {
+ tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_asiu64(DisasContext *s, DisasOps *o)
+{
+ bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
+
+ o->in1 = tcg_temp_new_i64();
+ if (non_atomic) {
+ tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+ } else {
+ /* Perform the atomic addition in memory. */
+ tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+ s->insn->data);
+ }
+
+ /* Recompute also for atomic case: needed for setting CC. */
+ tcg_gen_movi_i64(cc_src, 0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+
+ if (non_atomic) {
tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
}
return DISAS_NEXT;
@@ -5184,12 +5206,14 @@ static void cout_adds64(DisasContext *s, DisasOps *o)
static void cout_addu32(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_ADDU_32, o->in1, o->in2, o->out);
+ tcg_gen_shri_i64(cc_src, o->out, 32);
+ tcg_gen_ext32u_i64(cc_dst, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, cc_dst);
}
static void cout_addu64(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_ADDU_64, o->in1, o->in2, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out);
}
static void cout_addc32(DisasContext *s, DisasOps *o)
@@ -5636,6 +5660,13 @@ static void in1_r2_sr32(DisasContext *s, DisasOps *o)
}
#define SPEC_in1_r2_sr32 0
+static void in1_r2_32u(DisasContext *s, DisasOps *o)
+{
+ o->in1 = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r2)]);
+}
+#define SPEC_in1_r2_32u 0
+
static void in1_r3(DisasContext *s, DisasOps *o)
{
o->in1 = load_reg(get_field(s, r3));
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index d3bcdfd67b..b9ca9aeff5 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -58,29 +58,29 @@
C(0xa70b, AGHI, RI_a, Z, r1, i2, r1, 0, add, adds64)
/* ADD LOGICAL */
- C(0x1e00, ALR, RR_a, Z, r1, r2, new, r1_32, add, addu32)
- C(0xb9fa, ALRK, RRF_a, DO, r2, r3, new, r1_32, add, addu32)
- C(0x5e00, AL, RX_a, Z, r1, m2_32u, new, r1_32, add, addu32)
- C(0xe35e, ALY, RXY_a, LD, r1, m2_32u, new, r1_32, add, addu32)
- C(0xb90a, ALGR, RRE, Z, r1, r2, r1, 0, add, addu64)
- C(0xb91a, ALGFR, RRE, Z, r1, r2_32u, r1, 0, add, addu64)
- C(0xb9ea, ALGRK, RRF_a, DO, r2, r3, r1, 0, add, addu64)
- C(0xe30a, ALG, RXY_a, Z, r1, m2_64, r1, 0, add, addu64)
- C(0xe31a, ALGF, RXY_a, Z, r1, m2_32u, r1, 0, add, addu64)
+ C(0x1e00, ALR, RR_a, Z, r1_32u, r2_32u, new, r1_32, add, addu32)
+ C(0xb9fa, ALRK, RRF_a, DO, r2_32u, r3_32u, new, r1_32, add, addu32)
+ C(0x5e00, AL, RX_a, Z, r1_32u, m2_32u, new, r1_32, add, addu32)
+ C(0xe35e, ALY, RXY_a, LD, r1_32u, m2_32u, new, r1_32, add, addu32)
+ C(0xb90a, ALGR, RRE, Z, r1, r2, r1, 0, addu64, addu64)
+ C(0xb91a, ALGFR, RRE, Z, r1, r2_32u, r1, 0, addu64, addu64)
+ C(0xb9ea, ALGRK, RRF_a, DO, r2, r3, r1, 0, addu64, addu64)
+ C(0xe30a, ALG, RXY_a, Z, r1, m2_64, r1, 0, addu64, addu64)
+ C(0xe31a, ALGF, RXY_a, Z, r1, m2_32u, r1, 0, addu64, addu64)
/* ADD LOGICAL HIGH */
C(0xb9ca, ALHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, add, addu32)
- C(0xb9da, ALHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, add, addu32)
+ C(0xb9da, ALHHLR, RRF_a, HW, r2_sr32, r3_32u, new, r1_32h, add, addu32)
/* ADD LOGICAL IMMEDIATE */
- C(0xc20b, ALFI, RIL_a, EI, r1, i2_32u, new, r1_32, add, addu32)
- C(0xc20a, ALGFI, RIL_a, EI, r1, i2_32u, r1, 0, add, addu64)
+ C(0xc20b, ALFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, add, addu32)
+ C(0xc20a, ALGFI, RIL_a, EI, r1, i2_32u, r1, 0, addu64, addu64)
/* ADD LOGICAL WITH SIGNED IMMEDIATE */
- D(0xeb6e, ALSI, SIY, GIE, la1, i2, new, 0, asi, addu32, MO_TEUL)
- C(0xecda, ALHSIK, RIE_d, DO, r3, i2, new, r1_32, add, addu32)
- D(0xeb7e, ALGSI, SIY, GIE, la1, i2, new, 0, asi, addu64, MO_TEQ)
- C(0xecdb, ALGHSIK, RIE_d, DO, r3, i2, r1, 0, add, addu64)
+ D(0xeb6e, ALSI, SIY, GIE, la1, i2_32u, new, 0, asi, addu32, MO_TEUL)
+ C(0xecda, ALHSIK, RIE_d, DO, r3_32u, i2_32u, new, r1_32, add, addu32)
+ C(0xeb7e, ALGSI, SIY, GIE, la1, i2, r1, 0, asiu64, addu64)
+ C(0xecdb, ALGHSIK, RIE_d, DO, r3, i2, r1, 0, addu64, addu64)
/* ADD LOGICAL WITH SIGNED IMMEDIATE HIGH */
- C(0xcc0a, ALSIH, RIL_a, HW, r1_sr32, i2, new, r1_32h, add, addu32)
- C(0xcc0b, ALSIHN, RIL_a, HW, r1_sr32, i2, new, r1_32h, add, 0)
+ C(0xcc0a, ALSIH, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, addu32)
+ C(0xcc0b, ALSIHN, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, 0)
/* ADD LOGICAL WITH CARRY */
C(0xb998, ALCR, RRE, Z, r1, r2, new, r1_32, addc, addc32)
C(0xb988, ALCGR, RRE, Z, r1, r2, r1, 0, addc, addc64)
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 1/4] target/s390x: Improve cc computation for ADD LOGICAL
2020-10-17 2:28 ` [PATCH 1/4] target/s390x: Improve cc computation for ADD LOGICAL Richard Henderson
@ 2020-10-20 14:08 ` David Hildenbrand
0 siblings, 0 replies; 11+ messages in thread
From: David Hildenbrand @ 2020-10-20 14:08 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 17.10.20 04:28, Richard Henderson wrote:
> The resulting cc is only dependent on the result and the
> carry-out. So save those things rather than the inputs.
>
> Carry-out for 64-bit inputs is had via tcg_gen_add2_i64 directly
> into cc_src. Carry-out for 32-bit inputs is had via extraction
> from a normal 64-bit add (with zero-extended inputs).
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
LGTM
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY
2020-10-17 2:28 [PATCH 0/4] target/s390x: Improve carry computation Richard Henderson
2020-10-17 2:28 ` [PATCH 1/4] target/s390x: Improve cc computation for ADD LOGICAL Richard Henderson
@ 2020-10-17 2:28 ` Richard Henderson
2020-10-20 14:12 ` David Hildenbrand
2020-10-17 2:29 ` [PATCH 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL Richard Henderson
2020-10-17 2:29 ` [PATCH 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW Richard Henderson
3 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2020-10-17 2:28 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Now that ADD LOGICAL outputs carry, we can use that as input directly.
It also means we can re-use CC_OP_ZC and produce an output carry
directly from ADD LOGICAL WITH CARRY.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 2 --
target/s390x/cc_helper.c | 26 ---------------
target/s390x/helper.c | 2 --
target/s390x/translate.c | 66 ++++++++++++++++++--------------------
target/s390x/insn-data.def | 8 ++---
5 files changed, 35 insertions(+), 69 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 55c5442102..f5f3ae063e 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -170,7 +170,6 @@ enum cc_op {
CC_OP_LTGT0_64, /* signed less/greater than 0 (64bit) */
CC_OP_ADD_64, /* overflow on add (64bit) */
- CC_OP_ADDC_64, /* overflow on unsigned add-carry (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
@@ -179,7 +178,6 @@ enum cc_op {
CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
CC_OP_ADD_32, /* overflow on add (32bit) */
- CC_OP_ADDC_32, /* overflow on unsigned add-carry (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index 59da4d1cc2..cd2c5c4b39 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -144,16 +144,6 @@ static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_addc_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- /* Recover a2 + carry_in. */
- uint64_t a2c = ar - a1;
- /* Check for a2+carry_in overflow, then a1+a2c overflow. */
- int carry_out = (a2c < a2) || (ar < a1);
-
- return (ar != 0) + 2 * carry_out;
-}
-
static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
{
if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
@@ -240,16 +230,6 @@ static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_addc_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- /* Recover a2 + carry_in. */
- uint32_t a2c = ar - a1;
- /* Check for a2+carry_in overflow, then a1+a2c overflow. */
- int carry_out = (a2c < a2) || (ar < a1);
-
- return (ar != 0) + 2 * carry_out;
-}
-
static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
{
if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
@@ -485,9 +465,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADD_64:
r = cc_calc_add_64(src, dst, vr);
break;
- case CC_OP_ADDC_64:
- r = cc_calc_addc_64(src, dst, vr);
- break;
case CC_OP_SUB_64:
r = cc_calc_sub_64(src, dst, vr);
break;
@@ -513,9 +490,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADD_32:
r = cc_calc_add_32(src, dst, vr);
break;
- case CC_OP_ADDC_32:
- r = cc_calc_addc_32(src, dst, vr);
- break;
case CC_OP_SUB_32:
r = cc_calc_sub_32(src, dst, vr);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index db87a62a57..4f4561bc64 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -403,14 +403,12 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_32] = "CC_OP_LTGT0_32",
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
- [CC_OP_ADDC_64] = "CC_OP_ADDC_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
[CC_OP_SUBU_64] = "CC_OP_SUBU_64",
[CC_OP_SUBB_64] = "CC_OP_SUBB_64",
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
- [CC_OP_ADDC_32] = "CC_OP_ADDC_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
[CC_OP_SUBU_32] = "CC_OP_SUBU_32",
[CC_OP_SUBB_32] = "CC_OP_SUBB_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 9bf4c14f66..570b3c88c8 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -600,12 +600,10 @@ static void gen_op_calc_cc(DisasContext *s)
dummy = tcg_const_i64(0);
/* FALLTHRU */
case CC_OP_ADD_64:
- case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
@@ -665,12 +663,10 @@ static void gen_op_calc_cc(DisasContext *s)
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
break;
case CC_OP_ADD_64:
- case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
@@ -1442,30 +1438,40 @@ static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
-static DisasJumpType op_addc(DisasContext *s, DisasOps *o)
+/* Compute carry into cc_src. */
+static void compute_carry(DisasContext *s)
{
- DisasCompare cmp;
- TCGv_i64 carry;
-
- tcg_gen_add_i64(o->out, o->in1, o->in2);
-
- /* The carry flag is the msb of CC, therefore the branch mask that would
- create that comparison is 3. Feeding the generated comparison to
- setcond produces the carry flag that we desire. */
- disas_jcc(s, &cmp, 3);
- carry = tcg_temp_new_i64();
- if (cmp.is_64) {
- tcg_gen_setcond_i64(cmp.cond, carry, cmp.u.s64.a, cmp.u.s64.b);
- } else {
- TCGv_i32 t = tcg_temp_new_i32();
- tcg_gen_setcond_i32(cmp.cond, t, cmp.u.s32.a, cmp.u.s32.b);
- tcg_gen_extu_i32_i64(carry, t);
- tcg_temp_free_i32(t);
+ switch (s->cc_op) {
+ case CC_OP_ADDU:
+ break;
+ default:
+ gen_op_calc_cc(s);
+ /* fall through */
+ case CC_OP_STATIC:
+ /* The carry flag is the msb of CC; compute into cc_src. */
+ tcg_gen_extu_i32_i64(cc_src, cc_op);
+ tcg_gen_shri_i64(cc_src, cc_src, 1);
+ break;
}
- free_compare(&cmp);
+}
+
+static DisasJumpType op_addc32(DisasContext *s, DisasOps *o)
+{
+ compute_carry(s);
+ tcg_gen_add_i64(o->out, o->in1, o->in2);
+ tcg_gen_add_i64(o->out, o->out, cc_src);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_addc64(DisasContext *s, DisasOps *o)
+{
+ compute_carry(s);
+
+ TCGv_i64 zero = tcg_const_i64(0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero);
+ tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+ tcg_temp_free_i64(zero);
- tcg_gen_add_i64(o->out, o->out, carry);
- tcg_temp_free_i64(carry);
return DISAS_NEXT;
}
@@ -5216,16 +5222,6 @@ static void cout_addu64(DisasContext *s, DisasOps *o)
gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out);
}
-static void cout_addc32(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_ADDC_32, o->in1, o->in2, o->out);
-}
-
-static void cout_addc64(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_ADDC_64, o->in1, o->in2, o->out);
-}
-
static void cout_cmps32(DisasContext *s, DisasOps *o)
{
gen_op_update2_cc_i64(s, CC_OP_LTGT_32, o->in1, o->in2);
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index b9ca9aeff5..d9e65a0380 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -82,10 +82,10 @@
C(0xcc0a, ALSIH, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, addu32)
C(0xcc0b, ALSIHN, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, 0)
/* ADD LOGICAL WITH CARRY */
- C(0xb998, ALCR, RRE, Z, r1, r2, new, r1_32, addc, addc32)
- C(0xb988, ALCGR, RRE, Z, r1, r2, r1, 0, addc, addc64)
- C(0xe398, ALC, RXY_a, Z, r1, m2_32u, new, r1_32, addc, addc32)
- C(0xe388, ALCG, RXY_a, Z, r1, m2_64, r1, 0, addc, addc64)
+ C(0xb998, ALCR, RRE, Z, r1_32u, r2_32u, new, r1_32, addc32, addu32)
+ C(0xb988, ALCGR, RRE, Z, r1, r2, r1, 0, addc64, addu64)
+ C(0xe398, ALC, RXY_a, Z, r1_32u, m2_32u, new, r1_32, addc32, addu32)
+ C(0xe388, ALCG, RXY_a, Z, r1, m2_64, r1, 0, addc64, addu64)
/* AND */
C(0x1400, NR, RR_a, Z, r1, r2, new, r1_32, and, nz32)
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY
2020-10-17 2:28 ` [PATCH 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY Richard Henderson
@ 2020-10-20 14:12 ` David Hildenbrand
0 siblings, 0 replies; 11+ messages in thread
From: David Hildenbrand @ 2020-10-20 14:12 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 17.10.20 04:28, Richard Henderson wrote:
> Now that ADD LOGICAL outputs carry, we can use that as input directly.
> It also means we can re-use CC_OP_ZC and produce an output carry
> directly from ADD LOGICAL WITH CARRY.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/s390x/internal.h | 2 --
> target/s390x/cc_helper.c | 26 ---------------
> target/s390x/helper.c | 2 --
> target/s390x/translate.c | 66 ++++++++++++++++++--------------------
> target/s390x/insn-data.def | 8 ++---
> 5 files changed, 35 insertions(+), 69 deletions(-)
>
> diff --git a/target/s390x/internal.h b/target/s390x/internal.h
> index 55c5442102..f5f3ae063e 100644
> --- a/target/s390x/internal.h
> +++ b/target/s390x/internal.h
> @@ -170,7 +170,6 @@ enum cc_op {
> CC_OP_LTGT0_64, /* signed less/greater than 0 (64bit) */
>
> CC_OP_ADD_64, /* overflow on add (64bit) */
> - CC_OP_ADDC_64, /* overflow on unsigned add-carry (64bit) */
> CC_OP_SUB_64, /* overflow on subtraction (64bit) */
> CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
> CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
> @@ -179,7 +178,6 @@ enum cc_op {
> CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
>
> CC_OP_ADD_32, /* overflow on add (32bit) */
> - CC_OP_ADDC_32, /* overflow on unsigned add-carry (32bit) */
> CC_OP_SUB_32, /* overflow on subtraction (32bit) */
> CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
> CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
> diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
> index 59da4d1cc2..cd2c5c4b39 100644
> --- a/target/s390x/cc_helper.c
> +++ b/target/s390x/cc_helper.c
> @@ -144,16 +144,6 @@ static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
> }
> }
>
> -static uint32_t cc_calc_addc_64(uint64_t a1, uint64_t a2, uint64_t ar)
> -{
> - /* Recover a2 + carry_in. */
> - uint64_t a2c = ar - a1;
> - /* Check for a2+carry_in overflow, then a1+a2c overflow. */
> - int carry_out = (a2c < a2) || (ar < a1);
> -
> - return (ar != 0) + 2 * carry_out;
> -}
> -
> static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
> {
> if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
> @@ -240,16 +230,6 @@ static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar)
> }
> }
>
> -static uint32_t cc_calc_addc_32(uint32_t a1, uint32_t a2, uint32_t ar)
> -{
> - /* Recover a2 + carry_in. */
> - uint32_t a2c = ar - a1;
> - /* Check for a2+carry_in overflow, then a1+a2c overflow. */
> - int carry_out = (a2c < a2) || (ar < a1);
> -
> - return (ar != 0) + 2 * carry_out;
> -}
> -
> static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
> {
> if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
> @@ -485,9 +465,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
> case CC_OP_ADD_64:
> r = cc_calc_add_64(src, dst, vr);
> break;
> - case CC_OP_ADDC_64:
> - r = cc_calc_addc_64(src, dst, vr);
> - break;
> case CC_OP_SUB_64:
> r = cc_calc_sub_64(src, dst, vr);
> break;
> @@ -513,9 +490,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
> case CC_OP_ADD_32:
> r = cc_calc_add_32(src, dst, vr);
> break;
> - case CC_OP_ADDC_32:
> - r = cc_calc_addc_32(src, dst, vr);
> - break;
> case CC_OP_SUB_32:
> r = cc_calc_sub_32(src, dst, vr);
> break;
> diff --git a/target/s390x/helper.c b/target/s390x/helper.c
> index db87a62a57..4f4561bc64 100644
> --- a/target/s390x/helper.c
> +++ b/target/s390x/helper.c
> @@ -403,14 +403,12 @@ const char *cc_name(enum cc_op cc_op)
> [CC_OP_LTGT0_32] = "CC_OP_LTGT0_32",
> [CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
> [CC_OP_ADD_64] = "CC_OP_ADD_64",
> - [CC_OP_ADDC_64] = "CC_OP_ADDC_64",
> [CC_OP_SUB_64] = "CC_OP_SUB_64",
> [CC_OP_SUBU_64] = "CC_OP_SUBU_64",
> [CC_OP_SUBB_64] = "CC_OP_SUBB_64",
> [CC_OP_ABS_64] = "CC_OP_ABS_64",
> [CC_OP_NABS_64] = "CC_OP_NABS_64",
> [CC_OP_ADD_32] = "CC_OP_ADD_32",
> - [CC_OP_ADDC_32] = "CC_OP_ADDC_32",
> [CC_OP_SUB_32] = "CC_OP_SUB_32",
> [CC_OP_SUBU_32] = "CC_OP_SUBU_32",
> [CC_OP_SUBB_32] = "CC_OP_SUBB_32",
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 9bf4c14f66..570b3c88c8 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -600,12 +600,10 @@ static void gen_op_calc_cc(DisasContext *s)
> dummy = tcg_const_i64(0);
> /* FALLTHRU */
> case CC_OP_ADD_64:
> - case CC_OP_ADDC_64:
> case CC_OP_SUB_64:
> case CC_OP_SUBU_64:
> case CC_OP_SUBB_64:
> case CC_OP_ADD_32:
> - case CC_OP_ADDC_32:
> case CC_OP_SUB_32:
> case CC_OP_SUBU_32:
> case CC_OP_SUBB_32:
> @@ -665,12 +663,10 @@ static void gen_op_calc_cc(DisasContext *s)
> gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
> break;
> case CC_OP_ADD_64:
> - case CC_OP_ADDC_64:
> case CC_OP_SUB_64:
> case CC_OP_SUBU_64:
> case CC_OP_SUBB_64:
> case CC_OP_ADD_32:
> - case CC_OP_ADDC_32:
> case CC_OP_SUB_32:
> case CC_OP_SUBU_32:
> case CC_OP_SUBB_32:
> @@ -1442,30 +1438,40 @@ static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
> return DISAS_NEXT;
> }
>
> -static DisasJumpType op_addc(DisasContext *s, DisasOps *o)
> +/* Compute carry into cc_src. */
> +static void compute_carry(DisasContext *s)
> {
> - DisasCompare cmp;
> - TCGv_i64 carry;
> -
> - tcg_gen_add_i64(o->out, o->in1, o->in2);
> -
> - /* The carry flag is the msb of CC, therefore the branch mask that would
> - create that comparison is 3. Feeding the generated comparison to
> - setcond produces the carry flag that we desire. */
> - disas_jcc(s, &cmp, 3);
> - carry = tcg_temp_new_i64();
> - if (cmp.is_64) {
> - tcg_gen_setcond_i64(cmp.cond, carry, cmp.u.s64.a, cmp.u.s64.b);
> - } else {
> - TCGv_i32 t = tcg_temp_new_i32();
> - tcg_gen_setcond_i32(cmp.cond, t, cmp.u.s32.a, cmp.u.s32.b);
> - tcg_gen_extu_i32_i64(carry, t);
> - tcg_temp_free_i32(t);
> + switch (s->cc_op) {
> + case CC_OP_ADDU:
Can you add a comment that we have the carry right in out hands already?
Took me while to figure that out.
Apart from that
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL
2020-10-17 2:28 [PATCH 0/4] target/s390x: Improve carry computation Richard Henderson
2020-10-17 2:28 ` [PATCH 1/4] target/s390x: Improve cc computation for ADD LOGICAL Richard Henderson
2020-10-17 2:28 ` [PATCH 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY Richard Henderson
@ 2020-10-17 2:29 ` Richard Henderson
2020-10-20 14:14 ` David Hildenbrand
2020-10-17 2:29 ` [PATCH 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW Richard Henderson
3 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2020-10-17 2:29 UTC (permalink / raw)
To: qemu-devel; +Cc: david
The resulting cc is only dependent on the result and the
borrow-out. So save those things rather than the inputs.
Borrow-out for 64-bit inputs is had via tcg_gen_sub2_i64 directly
into cc_src. Borrow-out for 32-bit inputs is had via extraction
from a normal 64-bit sub (with zero-extended inputs).
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 3 +--
target/s390x/cc_helper.c | 40 ++++++---------------------
target/s390x/helper.c | 3 +--
target/s390x/translate.c | 55 +++++++++++++++-----------------------
target/s390x/insn-data.def | 24 ++++++++---------
5 files changed, 43 insertions(+), 82 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index f5f3ae063e..4077047494 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -161,6 +161,7 @@ enum cc_op {
CC_OP_NZ, /* env->cc_dst != 0 */
CC_OP_ADDU, /* dst != 0, src = carry out (0,1) */
+ CC_OP_SUBU, /* dst != 0, src = borrow out (0,-1) */
CC_OP_LTGT_32, /* signed less/greater than (32bit) */
CC_OP_LTGT_64, /* signed less/greater than (64bit) */
@@ -171,7 +172,6 @@ enum cc_op {
CC_OP_ADD_64, /* overflow on add (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
- CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
CC_OP_ABS_64, /* sign eval on abs (64bit) */
CC_OP_NABS_64, /* sign eval on nabs (64bit) */
@@ -179,7 +179,6 @@ enum cc_op {
CC_OP_ADD_32, /* overflow on add (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
- CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
CC_OP_ABS_32, /* sign eval on abs (64bit) */
CC_OP_NABS_32, /* sign eval on nabs (64bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index cd2c5c4b39..c7728d1225 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -129,6 +129,11 @@ static uint32_t cc_calc_addu(uint64_t carry_out, uint64_t result)
return (result != 0) + 2 * carry_out;
}
+static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
+{
+ return cc_calc_addu(borrow_out + 1, result);
+}
+
static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
{
if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
@@ -159,19 +164,6 @@ static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_subu_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- if (ar == 0) {
- return 2;
- } else {
- if (a2 > a1) {
- return 1;
- } else {
- return 3;
- }
- }
-}
-
static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar)
{
int borrow_out;
@@ -245,19 +237,6 @@ static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_subu_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- if (ar == 0) {
- return 2;
- } else {
- if (a2 > a1) {
- return 1;
- } else {
- return 3;
- }
- }
-}
-
static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar)
{
int borrow_out;
@@ -462,15 +441,15 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADDU:
r = cc_calc_addu(src, dst);
break;
+ case CC_OP_SUBU:
+ r = cc_calc_subu(src, dst);
+ break;
case CC_OP_ADD_64:
r = cc_calc_add_64(src, dst, vr);
break;
case CC_OP_SUB_64:
r = cc_calc_sub_64(src, dst, vr);
break;
- case CC_OP_SUBU_64:
- r = cc_calc_subu_64(src, dst, vr);
- break;
case CC_OP_SUBB_64:
r = cc_calc_subb_64(src, dst, vr);
break;
@@ -493,9 +472,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_SUB_32:
r = cc_calc_sub_32(src, dst, vr);
break;
- case CC_OP_SUBU_32:
- r = cc_calc_subu_32(src, dst, vr);
- break;
case CC_OP_SUBB_32:
r = cc_calc_subb_32(src, dst, vr);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 4f4561bc64..fa3aa500e5 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -396,6 +396,7 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_STATIC] = "CC_OP_STATIC",
[CC_OP_NZ] = "CC_OP_NZ",
[CC_OP_ADDU] = "CC_OP_ADDU",
+ [CC_OP_SUBU] = "CC_OP_SUBU",
[CC_OP_LTGT_32] = "CC_OP_LTGT_32",
[CC_OP_LTGT_64] = "CC_OP_LTGT_64",
[CC_OP_LTUGTU_32] = "CC_OP_LTUGTU_32",
@@ -404,13 +405,11 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
- [CC_OP_SUBU_64] = "CC_OP_SUBU_64",
[CC_OP_SUBB_64] = "CC_OP_SUBB_64",
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
- [CC_OP_SUBU_32] = "CC_OP_SUBU_32",
[CC_OP_SUBB_32] = "CC_OP_SUBB_32",
[CC_OP_ABS_32] = "CC_OP_ABS_32",
[CC_OP_NABS_32] = "CC_OP_NABS_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 570b3c88c8..48494a86cc 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -601,11 +601,9 @@ static void gen_op_calc_cc(DisasContext *s)
/* FALLTHRU */
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
local_cc_op = tcg_const_i32(s->cc_op);
break;
@@ -656,6 +654,7 @@ static void gen_op_calc_cc(DisasContext *s)
case CC_OP_TM_64:
case CC_OP_SLA_32:
case CC_OP_SLA_64:
+ case CC_OP_SUBU:
case CC_OP_NZ_F128:
case CC_OP_VC:
case CC_OP_MULS_64:
@@ -664,11 +663,9 @@ static void gen_op_calc_cc(DisasContext *s)
break;
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
/* 3 arguments */
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
@@ -843,6 +840,7 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_ADDU:
+ case CC_OP_SUBU:
switch (mask) {
case 8 | 2: /* result == 0 */
cond = TCG_COND_EQ;
@@ -850,33 +848,11 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
case 4 | 1: /* result != 0 */
cond = TCG_COND_NE;
break;
- case 8 | 4: /* no carry */
- cond = TCG_COND_EQ;
+ case 8 | 4: /* !carry (borrow) */
+ cond = old_cc_op == CC_OP_ADDU ? TCG_COND_EQ : TCG_COND_NE;
break;
- case 2 | 1: /* carry */
- cond = TCG_COND_NE;
- break;
- default:
- goto do_dynamic;
- }
- account_inline_branch(s, old_cc_op);
- break;
-
- case CC_OP_SUBU_32:
- case CC_OP_SUBU_64:
- /* Note that CC=0 is impossible; treat it as dont-care. */
- switch (mask & 7) {
- case 2: /* zero -> op1 == op2 */
- cond = TCG_COND_EQ;
- break;
- case 4 | 1: /* !zero -> op1 != op2 */
- cond = TCG_COND_NE;
- break;
- case 4: /* borrow (!carry) -> op1 < op2 */
- cond = TCG_COND_LTU;
- break;
- case 2 | 1: /* !borrow (carry) -> op1 >= op2 */
- cond = TCG_COND_GEU;
+ case 2 | 1: /* carry (!borrow) */
+ cond = old_cc_op == CC_OP_ADDU ? TCG_COND_NE : TCG_COND_EQ;
break;
default:
goto do_dynamic;
@@ -911,7 +887,6 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_LTGT_32:
case CC_OP_LTUGTU_32:
- case CC_OP_SUBU_32:
c->is_64 = false;
c->u.s32.a = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src);
@@ -928,7 +903,6 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_LTGT_64:
case CC_OP_LTUGTU_64:
- case CC_OP_SUBU_64:
c->u.s64.a = cc_src;
c->u.s64.b = cc_dst;
c->g1 = c->g2 = true;
@@ -943,6 +917,7 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_ADDU:
+ case CC_OP_SUBU:
c->is_64 = true;
c->u.s64.b = tcg_const_i64(0);
c->g1 = true;
@@ -1444,6 +1419,9 @@ static void compute_carry(DisasContext *s)
switch (s->cc_op) {
case CC_OP_ADDU:
break;
+ case CC_OP_SUBU:
+ tcg_gen_addi_i64(cc_src, cc_src, 1);
+ break;
default:
gen_op_calc_cc(s);
/* fall through */
@@ -4759,6 +4737,13 @@ static DisasJumpType op_sub(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
+static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
+{
+ tcg_gen_movi_i64(cc_src, 0);
+ tcg_gen_sub2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+ return DISAS_NEXT;
+}
+
static DisasJumpType op_subb(DisasContext *s, DisasOps *o)
{
DisasCompare cmp;
@@ -5310,12 +5295,14 @@ static void cout_subs64(DisasContext *s, DisasOps *o)
static void cout_subu32(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_SUBU_32, o->in1, o->in2, o->out);
+ tcg_gen_sari_i64(cc_src, o->out, 32);
+ tcg_gen_ext32u_i64(cc_dst, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, cc_dst);
}
static void cout_subu64(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_SUBU_64, o->in1, o->in2, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
}
static void cout_subb32(DisasContext *s, DisasOps *o)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index d9e65a0380..65ee998484 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -900,21 +900,21 @@
C(0xb9c9, SHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subs32)
C(0xb9d9, SHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, sub, subs32)
/* SUBTRACT LOGICAL */
- C(0x1f00, SLR, RR_a, Z, r1, r2, new, r1_32, sub, subu32)
- C(0xb9fb, SLRK, RRF_a, DO, r2, r3, new, r1_32, sub, subu32)
- C(0x5f00, SL, RX_a, Z, r1, m2_32u, new, r1_32, sub, subu32)
- C(0xe35f, SLY, RXY_a, LD, r1, m2_32u, new, r1_32, sub, subu32)
- C(0xb90b, SLGR, RRE, Z, r1, r2, r1, 0, sub, subu64)
- C(0xb91b, SLGFR, RRE, Z, r1, r2_32u, r1, 0, sub, subu64)
- C(0xb9eb, SLGRK, RRF_a, DO, r2, r3, r1, 0, sub, subu64)
- C(0xe30b, SLG, RXY_a, Z, r1, m2_64, r1, 0, sub, subu64)
- C(0xe31b, SLGF, RXY_a, Z, r1, m2_32u, r1, 0, sub, subu64)
+ C(0x1f00, SLR, RR_a, Z, r1_32u, r2_32u, new, r1_32, sub, subu32)
+ C(0xb9fb, SLRK, RRF_a, DO, r2_32u, r3_32u, new, r1_32, sub, subu32)
+ C(0x5f00, SL, RX_a, Z, r1_32u, m2_32u, new, r1_32, sub, subu32)
+ C(0xe35f, SLY, RXY_a, LD, r1_32u, m2_32u, new, r1_32, sub, subu32)
+ C(0xb90b, SLGR, RRE, Z, r1, r2, r1, 0, subu64, subu64)
+ C(0xb91b, SLGFR, RRE, Z, r1, r2_32u, r1, 0, subu64, subu64)
+ C(0xb9eb, SLGRK, RRF_a, DO, r2, r3, r1, 0, subu64, subu64)
+ C(0xe30b, SLG, RXY_a, Z, r1, m2_64, r1, 0, subu64, subu64)
+ C(0xe31b, SLGF, RXY_a, Z, r1, m2_32u, r1, 0, subu64, subu64)
/* SUBTRACT LOCICAL HIGH */
C(0xb9cb, SLHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subu32)
- C(0xb9db, SLHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, sub, subu32)
+ C(0xb9db, SLHHLR, RRF_a, HW, r2_sr32, r3_32u, new, r1_32h, sub, subu32)
/* SUBTRACT LOGICAL IMMEDIATE */
- C(0xc205, SLFI, RIL_a, EI, r1, i2_32u, new, r1_32, sub, subu32)
- C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, sub, subu64)
+ C(0xc205, SLFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, sub, subu32)
+ C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, subu64, subu64)
/* SUBTRACT LOGICAL WITH BORROW */
C(0xb999, SLBR, RRE, Z, r1, r2, new, r1_32, subb, subb32)
C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb, subb64)
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL
2020-10-17 2:29 ` [PATCH 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL Richard Henderson
@ 2020-10-20 14:14 ` David Hildenbrand
0 siblings, 0 replies; 11+ messages in thread
From: David Hildenbrand @ 2020-10-20 14:14 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 17.10.20 04:29, Richard Henderson wrote:
> The resulting cc is only dependent on the result and the
> borrow-out. So save those things rather than the inputs.
>
> Borrow-out for 64-bit inputs is had via tcg_gen_sub2_i64 directly
> into cc_src. Borrow-out for 32-bit inputs is had via extraction
> from a normal 64-bit sub (with zero-extended inputs).
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/s390x/internal.h | 3 +--
> target/s390x/cc_helper.c | 40 ++++++---------------------
> target/s390x/helper.c | 3 +--
> target/s390x/translate.c | 55 +++++++++++++++-----------------------
> target/s390x/insn-data.def | 24 ++++++++---------
> 5 files changed, 43 insertions(+), 82 deletions(-)
>
> diff --git a/target/s390x/internal.h b/target/s390x/internal.h
> index f5f3ae063e..4077047494 100644
> --- a/target/s390x/internal.h
> +++ b/target/s390x/internal.h
> @@ -161,6 +161,7 @@ enum cc_op {
>
> CC_OP_NZ, /* env->cc_dst != 0 */
> CC_OP_ADDU, /* dst != 0, src = carry out (0,1) */
> + CC_OP_SUBU, /* dst != 0, src = borrow out (0,-1) */
>
> CC_OP_LTGT_32, /* signed less/greater than (32bit) */
> CC_OP_LTGT_64, /* signed less/greater than (64bit) */
> @@ -171,7 +172,6 @@ enum cc_op {
>
> CC_OP_ADD_64, /* overflow on add (64bit) */
> CC_OP_SUB_64, /* overflow on subtraction (64bit) */
> - CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
> CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
> CC_OP_ABS_64, /* sign eval on abs (64bit) */
> CC_OP_NABS_64, /* sign eval on nabs (64bit) */
> @@ -179,7 +179,6 @@ enum cc_op {
>
> CC_OP_ADD_32, /* overflow on add (32bit) */
> CC_OP_SUB_32, /* overflow on subtraction (32bit) */
> - CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
> CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
> CC_OP_ABS_32, /* sign eval on abs (64bit) */
> CC_OP_NABS_32, /* sign eval on nabs (64bit) */
> diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
> index cd2c5c4b39..c7728d1225 100644
> --- a/target/s390x/cc_helper.c
> +++ b/target/s390x/cc_helper.c
> @@ -129,6 +129,11 @@ static uint32_t cc_calc_addu(uint64_t carry_out, uint64_t result)
> return (result != 0) + 2 * carry_out;
> }
>
> +static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
> +{
> + return cc_calc_addu(borrow_out + 1, result);
> +}
> +
> static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
> {
> if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
> @@ -159,19 +164,6 @@ static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
> }
> }
>
> -static uint32_t cc_calc_subu_64(uint64_t a1, uint64_t a2, uint64_t ar)
> -{
> - if (ar == 0) {
> - return 2;
> - } else {
> - if (a2 > a1) {
> - return 1;
> - } else {
> - return 3;
> - }
> - }
> -}
> -
> static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar)
> {
> int borrow_out;
> @@ -245,19 +237,6 @@ static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
> }
> }
>
> -static uint32_t cc_calc_subu_32(uint32_t a1, uint32_t a2, uint32_t ar)
> -{
> - if (ar == 0) {
> - return 2;
> - } else {
> - if (a2 > a1) {
> - return 1;
> - } else {
> - return 3;
> - }
> - }
> -}
> -
> static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar)
> {
> int borrow_out;
> @@ -462,15 +441,15 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
> case CC_OP_ADDU:
> r = cc_calc_addu(src, dst);
> break;
> + case CC_OP_SUBU:
> + r = cc_calc_subu(src, dst);
> + break;
> case CC_OP_ADD_64:
> r = cc_calc_add_64(src, dst, vr);
> break;
> case CC_OP_SUB_64:
> r = cc_calc_sub_64(src, dst, vr);
> break;
> - case CC_OP_SUBU_64:
> - r = cc_calc_subu_64(src, dst, vr);
> - break;
> case CC_OP_SUBB_64:
> r = cc_calc_subb_64(src, dst, vr);
> break;
> @@ -493,9 +472,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
> case CC_OP_SUB_32:
> r = cc_calc_sub_32(src, dst, vr);
> break;
> - case CC_OP_SUBU_32:
> - r = cc_calc_subu_32(src, dst, vr);
> - break;
> case CC_OP_SUBB_32:
> r = cc_calc_subb_32(src, dst, vr);
> break;
> diff --git a/target/s390x/helper.c b/target/s390x/helper.c
> index 4f4561bc64..fa3aa500e5 100644
> --- a/target/s390x/helper.c
> +++ b/target/s390x/helper.c
> @@ -396,6 +396,7 @@ const char *cc_name(enum cc_op cc_op)
> [CC_OP_STATIC] = "CC_OP_STATIC",
> [CC_OP_NZ] = "CC_OP_NZ",
> [CC_OP_ADDU] = "CC_OP_ADDU",
> + [CC_OP_SUBU] = "CC_OP_SUBU",
> [CC_OP_LTGT_32] = "CC_OP_LTGT_32",
> [CC_OP_LTGT_64] = "CC_OP_LTGT_64",
> [CC_OP_LTUGTU_32] = "CC_OP_LTUGTU_32",
> @@ -404,13 +405,11 @@ const char *cc_name(enum cc_op cc_op)
> [CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
> [CC_OP_ADD_64] = "CC_OP_ADD_64",
> [CC_OP_SUB_64] = "CC_OP_SUB_64",
> - [CC_OP_SUBU_64] = "CC_OP_SUBU_64",
> [CC_OP_SUBB_64] = "CC_OP_SUBB_64",
> [CC_OP_ABS_64] = "CC_OP_ABS_64",
> [CC_OP_NABS_64] = "CC_OP_NABS_64",
> [CC_OP_ADD_32] = "CC_OP_ADD_32",
> [CC_OP_SUB_32] = "CC_OP_SUB_32",
> - [CC_OP_SUBU_32] = "CC_OP_SUBU_32",
> [CC_OP_SUBB_32] = "CC_OP_SUBB_32",
> [CC_OP_ABS_32] = "CC_OP_ABS_32",
> [CC_OP_NABS_32] = "CC_OP_NABS_32",
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 570b3c88c8..48494a86cc 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -601,11 +601,9 @@ static void gen_op_calc_cc(DisasContext *s)
> /* FALLTHRU */
> case CC_OP_ADD_64:
> case CC_OP_SUB_64:
> - case CC_OP_SUBU_64:
> case CC_OP_SUBB_64:
> case CC_OP_ADD_32:
> case CC_OP_SUB_32:
> - case CC_OP_SUBU_32:
> case CC_OP_SUBB_32:
> local_cc_op = tcg_const_i32(s->cc_op);
> break;
> @@ -656,6 +654,7 @@ static void gen_op_calc_cc(DisasContext *s)
> case CC_OP_TM_64:
> case CC_OP_SLA_32:
> case CC_OP_SLA_64:
> + case CC_OP_SUBU:
> case CC_OP_NZ_F128:
> case CC_OP_VC:
> case CC_OP_MULS_64:
> @@ -664,11 +663,9 @@ static void gen_op_calc_cc(DisasContext *s)
> break;
> case CC_OP_ADD_64:
> case CC_OP_SUB_64:
> - case CC_OP_SUBU_64:
> case CC_OP_SUBB_64:
> case CC_OP_ADD_32:
> case CC_OP_SUB_32:
> - case CC_OP_SUBU_32:
> case CC_OP_SUBB_32:
> /* 3 arguments */
> gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
> @@ -843,6 +840,7 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
> break;
>
> case CC_OP_ADDU:
> + case CC_OP_SUBU:
> switch (mask) {
> case 8 | 2: /* result == 0 */
> cond = TCG_COND_EQ;
> @@ -850,33 +848,11 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
> case 4 | 1: /* result != 0 */
> cond = TCG_COND_NE;
> break;
> - case 8 | 4: /* no carry */
> - cond = TCG_COND_EQ;
> + case 8 | 4: /* !carry (borrow) */
> + cond = old_cc_op == CC_OP_ADDU ? TCG_COND_EQ : TCG_COND_NE;
> break;
> - case 2 | 1: /* carry */
> - cond = TCG_COND_NE;
> - break;
> - default:
> - goto do_dynamic;
> - }
> - account_inline_branch(s, old_cc_op);
> - break;
> -
> - case CC_OP_SUBU_32:
> - case CC_OP_SUBU_64:
> - /* Note that CC=0 is impossible; treat it as dont-care. */
> - switch (mask & 7) {
> - case 2: /* zero -> op1 == op2 */
> - cond = TCG_COND_EQ;
> - break;
> - case 4 | 1: /* !zero -> op1 != op2 */
> - cond = TCG_COND_NE;
> - break;
> - case 4: /* borrow (!carry) -> op1 < op2 */
> - cond = TCG_COND_LTU;
> - break;
> - case 2 | 1: /* !borrow (carry) -> op1 >= op2 */
> - cond = TCG_COND_GEU;
> + case 2 | 1: /* carry (!borrow) */
> + cond = old_cc_op == CC_OP_ADDU ? TCG_COND_NE : TCG_COND_EQ;
> break;
> default:
> goto do_dynamic;
> @@ -911,7 +887,6 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
> break;
> case CC_OP_LTGT_32:
> case CC_OP_LTUGTU_32:
> - case CC_OP_SUBU_32:
> c->is_64 = false;
> c->u.s32.a = tcg_temp_new_i32();
> tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src);
> @@ -928,7 +903,6 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
> break;
> case CC_OP_LTGT_64:
> case CC_OP_LTUGTU_64:
> - case CC_OP_SUBU_64:
> c->u.s64.a = cc_src;
> c->u.s64.b = cc_dst;
> c->g1 = c->g2 = true;
> @@ -943,6 +917,7 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
> break;
>
> case CC_OP_ADDU:
> + case CC_OP_SUBU:
> c->is_64 = true;
> c->u.s64.b = tcg_const_i64(0);
> c->g1 = true;
> @@ -1444,6 +1419,9 @@ static void compute_carry(DisasContext *s)
> switch (s->cc_op) {
> case CC_OP_ADDU:
> break;
> + case CC_OP_SUBU:
> + tcg_gen_addi_i64(cc_src, cc_src, 1);
> + break;
> default:
> gen_op_calc_cc(s);
> /* fall through */
> @@ -4759,6 +4737,13 @@ static DisasJumpType op_sub(DisasContext *s, DisasOps *o)
> return DISAS_NEXT;
> }
>
> +static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
> +{
> + tcg_gen_movi_i64(cc_src, 0);
> + tcg_gen_sub2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
> + return DISAS_NEXT;
> +}
> +
> static DisasJumpType op_subb(DisasContext *s, DisasOps *o)
> {
> DisasCompare cmp;
> @@ -5310,12 +5295,14 @@ static void cout_subs64(DisasContext *s, DisasOps *o)
>
> static void cout_subu32(DisasContext *s, DisasOps *o)
> {
> - gen_op_update3_cc_i64(s, CC_OP_SUBU_32, o->in1, o->in2, o->out);
> + tcg_gen_sari_i64(cc_src, o->out, 32);
> + tcg_gen_ext32u_i64(cc_dst, o->out);
> + gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, cc_dst);
> }
>
> static void cout_subu64(DisasContext *s, DisasOps *o)
> {
> - gen_op_update3_cc_i64(s, CC_OP_SUBU_64, o->in1, o->in2, o->out);
> + gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
> }
>
> static void cout_subb32(DisasContext *s, DisasOps *o)
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index d9e65a0380..65ee998484 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -900,21 +900,21 @@
> C(0xb9c9, SHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subs32)
> C(0xb9d9, SHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, sub, subs32)
> /* SUBTRACT LOGICAL */
> - C(0x1f00, SLR, RR_a, Z, r1, r2, new, r1_32, sub, subu32)
> - C(0xb9fb, SLRK, RRF_a, DO, r2, r3, new, r1_32, sub, subu32)
> - C(0x5f00, SL, RX_a, Z, r1, m2_32u, new, r1_32, sub, subu32)
> - C(0xe35f, SLY, RXY_a, LD, r1, m2_32u, new, r1_32, sub, subu32)
> - C(0xb90b, SLGR, RRE, Z, r1, r2, r1, 0, sub, subu64)
> - C(0xb91b, SLGFR, RRE, Z, r1, r2_32u, r1, 0, sub, subu64)
> - C(0xb9eb, SLGRK, RRF_a, DO, r2, r3, r1, 0, sub, subu64)
> - C(0xe30b, SLG, RXY_a, Z, r1, m2_64, r1, 0, sub, subu64)
> - C(0xe31b, SLGF, RXY_a, Z, r1, m2_32u, r1, 0, sub, subu64)
> + C(0x1f00, SLR, RR_a, Z, r1_32u, r2_32u, new, r1_32, sub, subu32)
> + C(0xb9fb, SLRK, RRF_a, DO, r2_32u, r3_32u, new, r1_32, sub, subu32)
> + C(0x5f00, SL, RX_a, Z, r1_32u, m2_32u, new, r1_32, sub, subu32)
> + C(0xe35f, SLY, RXY_a, LD, r1_32u, m2_32u, new, r1_32, sub, subu32)
> + C(0xb90b, SLGR, RRE, Z, r1, r2, r1, 0, subu64, subu64)
> + C(0xb91b, SLGFR, RRE, Z, r1, r2_32u, r1, 0, subu64, subu64)
> + C(0xb9eb, SLGRK, RRF_a, DO, r2, r3, r1, 0, subu64, subu64)
> + C(0xe30b, SLG, RXY_a, Z, r1, m2_64, r1, 0, subu64, subu64)
> + C(0xe31b, SLGF, RXY_a, Z, r1, m2_32u, r1, 0, subu64, subu64)
> /* SUBTRACT LOCICAL HIGH */
> C(0xb9cb, SLHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subu32)
> - C(0xb9db, SLHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, sub, subu32)
> + C(0xb9db, SLHHLR, RRF_a, HW, r2_sr32, r3_32u, new, r1_32h, sub, subu32)
> /* SUBTRACT LOGICAL IMMEDIATE */
> - C(0xc205, SLFI, RIL_a, EI, r1, i2_32u, new, r1_32, sub, subu32)
> - C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, sub, subu64)
> + C(0xc205, SLFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, sub, subu32)
> + C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, subu64, subu64)
> /* SUBTRACT LOGICAL WITH BORROW */
> C(0xb999, SLBR, RRE, Z, r1, r2, new, r1_32, subb, subb32)
> C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb, subb64)
>
Same as patch #1
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW
2020-10-17 2:28 [PATCH 0/4] target/s390x: Improve carry computation Richard Henderson
` (2 preceding siblings ...)
2020-10-17 2:29 ` [PATCH 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL Richard Henderson
@ 2020-10-17 2:29 ` Richard Henderson
2020-10-20 14:17 ` David Hildenbrand
3 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2020-10-17 2:29 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Now that SUB LOGICAL outputs carry, we can use that as input directly.
It also means we can re-use CC_OP_ZC and produce an output carry
directly from SUB LOGICAL WITH BORROW.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 2 --
target/s390x/cc_helper.c | 32 -----------------
target/s390x/helper.c | 2 --
target/s390x/translate.c | 74 ++++++++++++++++++++------------------
target/s390x/insn-data.def | 8 ++---
5 files changed, 44 insertions(+), 74 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 4077047494..11515bb617 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -172,14 +172,12 @@ enum cc_op {
CC_OP_ADD_64, /* overflow on add (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
- CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
CC_OP_ABS_64, /* sign eval on abs (64bit) */
CC_OP_NABS_64, /* sign eval on nabs (64bit) */
CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
CC_OP_ADD_32, /* overflow on add (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
- CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
CC_OP_ABS_32, /* sign eval on abs (64bit) */
CC_OP_NABS_32, /* sign eval on nabs (64bit) */
CC_OP_MULS_32, /* overflow on signed multiply (32bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index c7728d1225..e7039d0d18 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -164,19 +164,6 @@ static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- int borrow_out;
-
- if (ar != a1 - a2) { /* difference means borrow-in */
- borrow_out = (a2 >= a1);
- } else {
- borrow_out = (a2 > a1);
- }
-
- return (ar != 0) + 2 * !borrow_out;
-}
-
static uint32_t cc_calc_abs_64(int64_t dst)
{
if ((uint64_t)dst == 0x8000000000000000ULL) {
@@ -237,19 +224,6 @@ static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- int borrow_out;
-
- if (ar != a1 - a2) { /* difference means borrow-in */
- borrow_out = (a2 >= a1);
- } else {
- borrow_out = (a2 > a1);
- }
-
- return (ar != 0) + 2 * !borrow_out;
-}
-
static uint32_t cc_calc_abs_32(int32_t dst)
{
if ((uint32_t)dst == 0x80000000UL) {
@@ -450,9 +424,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_SUB_64:
r = cc_calc_sub_64(src, dst, vr);
break;
- case CC_OP_SUBB_64:
- r = cc_calc_subb_64(src, dst, vr);
- break;
case CC_OP_ABS_64:
r = cc_calc_abs_64(dst);
break;
@@ -472,9 +443,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_SUB_32:
r = cc_calc_sub_32(src, dst, vr);
break;
- case CC_OP_SUBB_32:
- r = cc_calc_subb_32(src, dst, vr);
- break;
case CC_OP_ABS_32:
r = cc_calc_abs_32(dst);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index fa3aa500e5..7678994feb 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -405,12 +405,10 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
- [CC_OP_SUBB_64] = "CC_OP_SUBB_64",
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
- [CC_OP_SUBB_32] = "CC_OP_SUBB_32",
[CC_OP_ABS_32] = "CC_OP_ABS_32",
[CC_OP_NABS_32] = "CC_OP_NABS_32",
[CC_OP_COMP_32] = "CC_OP_COMP_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 48494a86cc..0d8235a5fb 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -601,10 +601,8 @@ static void gen_op_calc_cc(DisasContext *s)
/* FALLTHRU */
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBB_32:
local_cc_op = tcg_const_i32(s->cc_op);
break;
case CC_OP_CONST0:
@@ -663,10 +661,8 @@ static void gen_op_calc_cc(DisasContext *s)
break;
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBB_32:
/* 3 arguments */
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
break;
@@ -4744,29 +4740,49 @@ static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
-static DisasJumpType op_subb(DisasContext *s, DisasOps *o)
+/* Compute borrow (0, -1) into cc_src. */
+static void compute_borrow(DisasContext *s)
{
- DisasCompare cmp;
- TCGv_i64 borrow;
-
- tcg_gen_sub_i64(o->out, o->in1, o->in2);
-
- /* The !borrow flag is the msb of CC. Since we want the inverse of
- that, we ask for a comparison of CC=0 | CC=1 -> mask of 8 | 4. */
- disas_jcc(s, &cmp, 8 | 4);
- borrow = tcg_temp_new_i64();
- if (cmp.is_64) {
- tcg_gen_setcond_i64(cmp.cond, borrow, cmp.u.s64.a, cmp.u.s64.b);
- } else {
- TCGv_i32 t = tcg_temp_new_i32();
- tcg_gen_setcond_i32(cmp.cond, t, cmp.u.s32.a, cmp.u.s32.b);
- tcg_gen_extu_i32_i64(borrow, t);
- tcg_temp_free_i32(t);
+ switch (s->cc_op) {
+ case CC_OP_SUBU:
+ break;
+ default:
+ gen_op_calc_cc(s);
+ /* fall through */
+ case CC_OP_STATIC:
+ /* The carry flag is the msb of CC; compute into cc_src. */
+ tcg_gen_extu_i32_i64(cc_src, cc_op);
+ tcg_gen_shri_i64(cc_src, cc_src, 1);
+ /* fall through */
+ case CC_OP_ADDU:
+ tcg_gen_subi_i64(cc_src, cc_src, 1);
+ break;
}
- free_compare(&cmp);
+}
+
+static DisasJumpType op_subb32(DisasContext *s, DisasOps *o)
+{
+ compute_borrow(s);
+
+ /* Borrow is {0, -1}, so add to subtract. */
+ tcg_gen_add_i64(o->out, o->in1, cc_src);
+ tcg_gen_sub_i64(o->out, o->out, o->in2);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_subb64(DisasContext *s, DisasOps *o)
+{
+ compute_borrow(s);
+
+ /*
+ * Borrow is {0, -1}, so add to subtract; replicate the
+ * borrow input to produce 128-bit -1 for the addition.
+ */
+ TCGv_i64 zero = tcg_const_i64(0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, cc_src);
+ tcg_gen_sub2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+ tcg_temp_free_i64(zero);
- tcg_gen_sub_i64(o->out, o->out, borrow);
- tcg_temp_free_i64(borrow);
return DISAS_NEXT;
}
@@ -5305,16 +5321,6 @@ static void cout_subu64(DisasContext *s, DisasOps *o)
gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
}
-static void cout_subb32(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_SUBB_32, o->in1, o->in2, o->out);
-}
-
-static void cout_subb64(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_SUBB_64, o->in1, o->in2, o->out);
-}
-
static void cout_tm32(DisasContext *s, DisasOps *o)
{
gen_op_update2_cc_i64(s, CC_OP_TM_32, o->in1, o->in2);
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 65ee998484..d91051843c 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -916,10 +916,10 @@
C(0xc205, SLFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, sub, subu32)
C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, subu64, subu64)
/* SUBTRACT LOGICAL WITH BORROW */
- C(0xb999, SLBR, RRE, Z, r1, r2, new, r1_32, subb, subb32)
- C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb, subb64)
- C(0xe399, SLB, RXY_a, Z, r1, m2_32u, new, r1_32, subb, subb32)
- C(0xe389, SLBG, RXY_a, Z, r1, m2_64, r1, 0, subb, subb64)
+ C(0xb999, SLBR, RRE, Z, r1_32u, r2_32u, new, r1_32, subb32, subu32)
+ C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb64, subu64)
+ C(0xe399, SLB, RXY_a, Z, r1_32u, m2_32u, new, r1_32, subb32, subu32)
+ C(0xe389, SLBG, RXY_a, Z, r1, m2_64, r1, 0, subb64, subu64)
/* SUPERVISOR CALL */
C(0x0a00, SVC, I, Z, 0, 0, 0, 0, svc, 0)
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW
2020-10-17 2:29 ` [PATCH 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW Richard Henderson
@ 2020-10-20 14:17 ` David Hildenbrand
2020-10-20 15:11 ` Richard Henderson
0 siblings, 1 reply; 11+ messages in thread
From: David Hildenbrand @ 2020-10-20 14:17 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 17.10.20 04:29, Richard Henderson wrote:
> Now that SUB LOGICAL outputs carry, we can use that as input directly.
> It also means we can re-use CC_OP_ZC and produce an output carry
> directly from SUB LOGICAL WITH BORROW.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/s390x/internal.h | 2 --
> target/s390x/cc_helper.c | 32 -----------------
> target/s390x/helper.c | 2 --
> target/s390x/translate.c | 74 ++++++++++++++++++++------------------
> target/s390x/insn-data.def | 8 ++---
> 5 files changed, 44 insertions(+), 74 deletions(-)
>
> diff --git a/target/s390x/internal.h b/target/s390x/internal.h
> index 4077047494..11515bb617 100644
> --- a/target/s390x/internal.h
> +++ b/target/s390x/internal.h
> @@ -172,14 +172,12 @@ enum cc_op {
>
> CC_OP_ADD_64, /* overflow on add (64bit) */
> CC_OP_SUB_64, /* overflow on subtraction (64bit) */
> - CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
> CC_OP_ABS_64, /* sign eval on abs (64bit) */
> CC_OP_NABS_64, /* sign eval on nabs (64bit) */
> CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
>
> CC_OP_ADD_32, /* overflow on add (32bit) */
> CC_OP_SUB_32, /* overflow on subtraction (32bit) */
> - CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
> CC_OP_ABS_32, /* sign eval on abs (64bit) */
> CC_OP_NABS_32, /* sign eval on nabs (64bit) */
> CC_OP_MULS_32, /* overflow on signed multiply (32bit) */
> diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
> index c7728d1225..e7039d0d18 100644
> --- a/target/s390x/cc_helper.c
> +++ b/target/s390x/cc_helper.c
> @@ -164,19 +164,6 @@ static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
> }
> }
>
> -static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar)
> -{
> - int borrow_out;
> -
> - if (ar != a1 - a2) { /* difference means borrow-in */
> - borrow_out = (a2 >= a1);
> - } else {
> - borrow_out = (a2 > a1);
> - }
> -
> - return (ar != 0) + 2 * !borrow_out;
> -}
> -
> static uint32_t cc_calc_abs_64(int64_t dst)
> {
> if ((uint64_t)dst == 0x8000000000000000ULL) {
> @@ -237,19 +224,6 @@ static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
> }
> }
>
> -static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar)
> -{
> - int borrow_out;
> -
> - if (ar != a1 - a2) { /* difference means borrow-in */
> - borrow_out = (a2 >= a1);
> - } else {
> - borrow_out = (a2 > a1);
> - }
> -
> - return (ar != 0) + 2 * !borrow_out;
> -}
> -
> static uint32_t cc_calc_abs_32(int32_t dst)
> {
> if ((uint32_t)dst == 0x80000000UL) {
> @@ -450,9 +424,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
> case CC_OP_SUB_64:
> r = cc_calc_sub_64(src, dst, vr);
> break;
> - case CC_OP_SUBB_64:
> - r = cc_calc_subb_64(src, dst, vr);
> - break;
> case CC_OP_ABS_64:
> r = cc_calc_abs_64(dst);
> break;
> @@ -472,9 +443,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
> case CC_OP_SUB_32:
> r = cc_calc_sub_32(src, dst, vr);
> break;
> - case CC_OP_SUBB_32:
> - r = cc_calc_subb_32(src, dst, vr);
> - break;
> case CC_OP_ABS_32:
> r = cc_calc_abs_32(dst);
> break;
> diff --git a/target/s390x/helper.c b/target/s390x/helper.c
> index fa3aa500e5..7678994feb 100644
> --- a/target/s390x/helper.c
> +++ b/target/s390x/helper.c
> @@ -405,12 +405,10 @@ const char *cc_name(enum cc_op cc_op)
> [CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
> [CC_OP_ADD_64] = "CC_OP_ADD_64",
> [CC_OP_SUB_64] = "CC_OP_SUB_64",
> - [CC_OP_SUBB_64] = "CC_OP_SUBB_64",
> [CC_OP_ABS_64] = "CC_OP_ABS_64",
> [CC_OP_NABS_64] = "CC_OP_NABS_64",
> [CC_OP_ADD_32] = "CC_OP_ADD_32",
> [CC_OP_SUB_32] = "CC_OP_SUB_32",
> - [CC_OP_SUBB_32] = "CC_OP_SUBB_32",
> [CC_OP_ABS_32] = "CC_OP_ABS_32",
> [CC_OP_NABS_32] = "CC_OP_NABS_32",
> [CC_OP_COMP_32] = "CC_OP_COMP_32",
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 48494a86cc..0d8235a5fb 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -601,10 +601,8 @@ static void gen_op_calc_cc(DisasContext *s)
> /* FALLTHRU */
> case CC_OP_ADD_64:
> case CC_OP_SUB_64:
> - case CC_OP_SUBB_64:
> case CC_OP_ADD_32:
> case CC_OP_SUB_32:
> - case CC_OP_SUBB_32:
> local_cc_op = tcg_const_i32(s->cc_op);
> break;
> case CC_OP_CONST0:
> @@ -663,10 +661,8 @@ static void gen_op_calc_cc(DisasContext *s)
> break;
> case CC_OP_ADD_64:
> case CC_OP_SUB_64:
> - case CC_OP_SUBB_64:
> case CC_OP_ADD_32:
> case CC_OP_SUB_32:
> - case CC_OP_SUBB_32:
> /* 3 arguments */
> gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
> break;
> @@ -4744,29 +4740,49 @@ static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
> return DISAS_NEXT;
> }
>
> -static DisasJumpType op_subb(DisasContext *s, DisasOps *o)
> +/* Compute borrow (0, -1) into cc_src. */
> +static void compute_borrow(DisasContext *s)
> {
> - DisasCompare cmp;
> - TCGv_i64 borrow;
> -
> - tcg_gen_sub_i64(o->out, o->in1, o->in2);
> -
> - /* The !borrow flag is the msb of CC. Since we want the inverse of
> - that, we ask for a comparison of CC=0 | CC=1 -> mask of 8 | 4. */
> - disas_jcc(s, &cmp, 8 | 4);
> - borrow = tcg_temp_new_i64();
> - if (cmp.is_64) {
> - tcg_gen_setcond_i64(cmp.cond, borrow, cmp.u.s64.a, cmp.u.s64.b);
> - } else {
> - TCGv_i32 t = tcg_temp_new_i32();
> - tcg_gen_setcond_i32(cmp.cond, t, cmp.u.s32.a, cmp.u.s32.b);
> - tcg_gen_extu_i32_i64(borrow, t);
> - tcg_temp_free_i32(t);
> + switch (s->cc_op) {
> + case CC_OP_SUBU:
> + break;
> + default:
> + gen_op_calc_cc(s);
> + /* fall through */
> + case CC_OP_STATIC:
> + /* The carry flag is the msb of CC; compute into cc_src. */
> + tcg_gen_extu_i32_i64(cc_src, cc_op);
> + tcg_gen_shri_i64(cc_src, cc_src, 1);
> + /* fall through */
> + case CC_OP_ADDU:
Can you give me a hint how we're converting the carry into a borrow?
Can we apply something similar to compute_carry()?
> + tcg_gen_subi_i64(cc_src, cc_src, 1);
> + break;
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW
2020-10-20 14:17 ` David Hildenbrand
@ 2020-10-20 15:11 ` Richard Henderson
2020-10-20 15:12 ` David Hildenbrand
0 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2020-10-20 15:11 UTC (permalink / raw)
To: David Hildenbrand, qemu-devel
On 10/20/20 7:17 AM, David Hildenbrand wrote:
>> + case CC_OP_ADDU:
>
> Can you give me a hint how we're converting the carry into a borrow?
>
> Can we apply something similar to compute_carry()?
>
>> + tcg_gen_subi_i64(cc_src, cc_src, 1);
Right here: subtract one.
carry = {1,0} -> borrow = {0,-1}
I'll add some more comments for v2.
r~
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW
2020-10-20 15:11 ` Richard Henderson
@ 2020-10-20 15:12 ` David Hildenbrand
0 siblings, 0 replies; 11+ messages in thread
From: David Hildenbrand @ 2020-10-20 15:12 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 20.10.20 17:11, Richard Henderson wrote:
> On 10/20/20 7:17 AM, David Hildenbrand wrote:
>>> + case CC_OP_ADDU:
>>
>> Can you give me a hint how we're converting the carry into a borrow?
>>
>> Can we apply something similar to compute_carry()?
>>
>>> + tcg_gen_subi_i64(cc_src, cc_src, 1);
>
> Right here: subtract one.
>
> carry = {1,0} -> borrow = {0,-1}
Ok, so it's really that simple :)
Thanks!
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 11+ messages in thread