* [PATCH 01/10] tcg/s390x: Distinguish RRF-a and RRF-c formats
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 02/10] tcg/s390x: Distinguish RIE formats Richard Henderson
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
One has 3 register arguments; the other has 2 plus an m3 field.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 57 +++++++++++++++++++++-----------------
1 file changed, 32 insertions(+), 25 deletions(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index d56c1e51e4..b9859251a4 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -183,18 +183,19 @@ typedef enum S390Opcode {
RRE_SLBGR = 0xb989,
RRE_XGR = 0xb982,
- RRF_LOCR = 0xb9f2,
- RRF_LOCGR = 0xb9e2,
- RRF_NRK = 0xb9f4,
- RRF_NGRK = 0xb9e4,
- RRF_ORK = 0xb9f6,
- RRF_OGRK = 0xb9e6,
- RRF_SRK = 0xb9f9,
- RRF_SGRK = 0xb9e9,
- RRF_SLRK = 0xb9fb,
- RRF_SLGRK = 0xb9eb,
- RRF_XRK = 0xb9f7,
- RRF_XGRK = 0xb9e7,
+ RRFa_NRK = 0xb9f4,
+ RRFa_NGRK = 0xb9e4,
+ RRFa_ORK = 0xb9f6,
+ RRFa_OGRK = 0xb9e6,
+ RRFa_SRK = 0xb9f9,
+ RRFa_SGRK = 0xb9e9,
+ RRFa_SLRK = 0xb9fb,
+ RRFa_SLGRK = 0xb9eb,
+ RRFa_XRK = 0xb9f7,
+ RRFa_XGRK = 0xb9e7,
+
+ RRFc_LOCR = 0xb9f2,
+ RRFc_LOCGR = 0xb9e2,
RR_AR = 0x1a,
RR_ALR = 0x1e,
@@ -547,8 +548,14 @@ static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
tcg_out32(s, (op << 16) | (r1 << 4) | r2);
}
-static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
- TCGReg r1, TCGReg r2, int m3)
+static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2, TCGReg r3)
+{
+ tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2, int m3)
{
tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
}
@@ -1520,7 +1527,7 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
/* Emit: d = 0, t = 1, d = (cc ? t : d). */
tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
- tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
+ tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
} else {
/* Emit: d = 1; if (cc) goto over; d = 0; over: */
tcg_out_movi(s, type, dest, 1);
@@ -1539,7 +1546,7 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
if (v3const) {
tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
} else {
- tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
+ tcg_out_insn(s, RRFc, LOCGR, dest, v3, cc);
}
} else {
c = tcg_invert_cond(c);
@@ -1569,7 +1576,7 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
}
if (HAVE_FACILITY(LOAD_ON_COND)) {
/* Emit: if (one bit found) dest = r0. */
- tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
+ tcg_out_insn(s, RRFc, LOCGR, dest, TCG_REG_R0, 2);
} else {
/* Emit: if (no one bit found) goto over; dest = r0; over: */
tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
@@ -2201,7 +2208,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RR, SR, a0, a2);
} else {
- tcg_out_insn(s, RRF, SRK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
}
break;
@@ -2213,7 +2220,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RR, NR, a0, a2);
} else {
- tcg_out_insn(s, RRF, NRK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
}
break;
case INDEX_op_or_i32:
@@ -2224,7 +2231,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RR, OR, a0, a2);
} else {
- tcg_out_insn(s, RRF, ORK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
}
break;
case INDEX_op_xor_i32:
@@ -2235,7 +2242,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RR, XR, args[0], args[2]);
} else {
- tcg_out_insn(s, RRF, XRK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
}
break;
@@ -2455,7 +2462,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RRE, SGR, a0, a2);
} else {
- tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
}
break;
@@ -2467,7 +2474,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RRE, NGR, args[0], args[2]);
} else {
- tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
}
break;
case INDEX_op_or_i64:
@@ -2478,7 +2485,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RRE, OGR, a0, a2);
} else {
- tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
}
break;
case INDEX_op_xor_i64:
@@ -2489,7 +2496,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
} else if (a0 == a1) {
tcg_out_insn(s, RRE, XGR, a0, a2);
} else {
- tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
+ tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
}
break;
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 02/10] tcg/s390x: Distinguish RIE formats
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
2022-02-24 15:43 ` [PATCH 01/10] tcg/s390x: Distinguish RRF-a and RRF-c formats Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 03/10] tcg/s390x: Support MIE2 multiply single instructions Richard Henderson
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
There are multiple variations, with different fields.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 47 +++++++++++++++++++++-----------------
1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index b9859251a4..c1cea8b1fe 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -139,16 +139,19 @@ typedef enum S390Opcode {
RI_OILL = 0xa50b,
RI_TMLL = 0xa701,
- RIE_CGIJ = 0xec7c,
- RIE_CGRJ = 0xec64,
- RIE_CIJ = 0xec7e,
- RIE_CLGRJ = 0xec65,
- RIE_CLIJ = 0xec7f,
- RIE_CLGIJ = 0xec7d,
- RIE_CLRJ = 0xec77,
- RIE_CRJ = 0xec76,
- RIE_LOCGHI = 0xec46,
- RIE_RISBG = 0xec55,
+ RIEb_CGRJ = 0xec64,
+ RIEb_CLGRJ = 0xec65,
+ RIEb_CLRJ = 0xec77,
+ RIEb_CRJ = 0xec76,
+
+ RIEc_CGIJ = 0xec7c,
+ RIEc_CIJ = 0xec7e,
+ RIEc_CLGIJ = 0xec7d,
+ RIEc_CLIJ = 0xec7f,
+
+ RIEf_RISBG = 0xec55,
+
+ RIEg_LOCGHI = 0xec46,
RRE_AGR = 0xb908,
RRE_ALGR = 0xb90a,
@@ -565,7 +568,7 @@ static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
}
-static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
+static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
int i2, int m3)
{
tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
@@ -1058,9 +1061,9 @@ static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
int msb, int lsb, int ofs, int z)
{
/* Format RIE-f */
- tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+ tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
tcg_out16(s, (msb << 8) | (z << 7) | lsb);
- tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+ tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
}
static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
@@ -1451,7 +1454,7 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
/* Emit: d = 0, d = (cc ? 1 : d). */
cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
- tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
+ tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc);
return;
}
@@ -1544,7 +1547,7 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
if (HAVE_FACILITY(LOAD_ON_COND)) {
cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
if (v3const) {
- tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
+ tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
} else {
tcg_out_insn(s, RRFc, LOCGR, dest, v3, cc);
}
@@ -1631,6 +1634,7 @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
TCGReg r1, TCGReg r2, TCGLabel *l)
{
tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
+ /* Format RIE-b */
tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
tcg_out16(s, 0);
tcg_out16(s, cc << 12 | (opc & 0xff));
@@ -1640,6 +1644,7 @@ static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
TCGReg r1, int i2, TCGLabel *l)
{
tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
+ /* Format RIE-c */
tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
tcg_out16(s, 0);
tcg_out16(s, (i2 << 8) | (opc & 0xff));
@@ -1659,8 +1664,8 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
if (!c2const) {
opc = (type == TCG_TYPE_I32
- ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
- : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
+ ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
+ : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
tgen_compare_branch(s, opc, cc, r1, c2, l);
return;
}
@@ -1671,18 +1676,18 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
larger comparison range afforded by COMPARE IMMEDIATE. */
if (type == TCG_TYPE_I32) {
if (is_unsigned) {
- opc = RIE_CLIJ;
+ opc = RIEc_CLIJ;
in_range = (uint32_t)c2 == (uint8_t)c2;
} else {
- opc = RIE_CIJ;
+ opc = RIEc_CIJ;
in_range = (int32_t)c2 == (int8_t)c2;
}
} else {
if (is_unsigned) {
- opc = RIE_CLGIJ;
+ opc = RIEc_CLGIJ;
in_range = (uint64_t)c2 == (uint8_t)c2;
} else {
- opc = RIE_CGIJ;
+ opc = RIEc_CGIJ;
in_range = (int64_t)c2 == (int8_t)c2;
}
}
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 03/10] tcg/s390x: Support MIE2 multiply single instructions
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
2022-02-24 15:43 ` [PATCH 01/10] tcg/s390x: Distinguish RRF-a and RRF-c formats Richard Henderson
2022-02-24 15:43 ` [PATCH 02/10] tcg/s390x: Distinguish RIE formats Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 04/10] tcg/s390x: Support MIE2 MGRK instruction Richard Henderson
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
The MIE2 facility adds 3-operand versions of multiply.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 1 +
tcg/s390x/tcg-target.h | 1 +
tcg/s390x/tcg-target.c.inc | 34 ++++++++++++++++++++++++----------
3 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 426dd92e51..685739329e 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -23,6 +23,7 @@ C_O1_I2(r, 0, ri)
C_O1_I2(r, 0, rI)
C_O1_I2(r, 0, rJ)
C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rJ)
C_O1_I2(r, rZ, r)
C_O1_I2(v, v, r)
C_O1_I2(v, v, v)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 69217d995b..a625ef63ac 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -63,6 +63,7 @@ typedef enum TCGReg {
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
#define FACILITY_LOAD_ON_COND2 53
+#define FACILITY_MISC_INSN_EXT2 58
#define FACILITY_VECTOR 129
#define FACILITY_VECTOR_ENH1 135
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index c1cea8b1fe..ab92a2a82c 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -186,6 +186,8 @@ typedef enum S390Opcode {
RRE_SLBGR = 0xb989,
RRE_XGR = 0xb982,
+ RRFa_MSRKC = 0xb9fd,
+ RRFa_MSGRKC = 0xb9ed,
RRFa_NRK = 0xb9f4,
RRFa_NGRK = 0xb9e4,
RRFa_ORK = 0xb9f6,
@@ -2256,14 +2258,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_mul_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
if (const_args[2]) {
- if ((int32_t)args[2] == (int16_t)args[2]) {
- tcg_out_insn(s, RI, MHI, args[0], args[2]);
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, MHI, a0, a2);
} else {
- tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
+ tcg_out_insn(s, RIL, MSFI, a0, a2);
}
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, MSR, a0, a2);
} else {
- tcg_out_insn(s, RRE, MSR, args[0], args[2]);
+ tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
}
break;
@@ -2513,14 +2519,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_mul_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- if (args[2] == (int16_t)args[2]) {
- tcg_out_insn(s, RI, MGHI, args[0], args[2]);
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, MGHI, a0, a2);
} else {
- tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
+ tcg_out_insn(s, RIL, MSGFI, a0, a2);
}
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, MSGR, a0, a2);
} else {
- tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
+ tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
}
break;
@@ -3154,12 +3164,16 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
return (HAVE_FACILITY(GEN_INST_EXT)
- ? C_O1_I2(r, 0, ri)
+ ? (HAVE_FACILITY(MISC_INSN_EXT2)
+ ? C_O1_I2(r, r, ri)
+ : C_O1_I2(r, 0, ri))
: C_O1_I2(r, 0, rI));
case INDEX_op_mul_i64:
return (HAVE_FACILITY(GEN_INST_EXT)
- ? C_O1_I2(r, 0, rJ)
+ ? (HAVE_FACILITY(MISC_INSN_EXT2)
+ ? C_O1_I2(r, r, rJ)
+ : C_O1_I2(r, 0, rJ))
: C_O1_I2(r, 0, rI));
case INDEX_op_shl_i32:
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 04/10] tcg/s390x: Support MIE2 MGRK instruction
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
` (2 preceding siblings ...)
2022-02-24 15:43 ` [PATCH 03/10] tcg/s390x: Support MIE2 multiply single instructions Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 05/10] tcg/s390x: Support MIE3 logical operations Richard Henderson
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
The MIE2 facility adds a 3-operand signed 64x64->128 multiply.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 1 +
tcg/s390x/tcg-target.h | 2 +-
tcg/s390x/tcg-target.c.inc | 6 ++++++
3 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 685739329e..fea73b6ed0 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -31,6 +31,7 @@ C_O1_I3(v, v, v, v)
C_O1_I4(r, r, ri, r, 0)
C_O1_I4(r, r, ri, rI, 0)
C_O2_I2(b, a, 0, r)
+C_O2_I2(b, a, r, r)
C_O2_I3(b, a, 0, 1, r)
C_O2_I4(r, r, 0, 1, rA, r)
C_O2_I4(r, r, 0, 1, ri, r)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index a625ef63ac..280e752d94 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -136,7 +136,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
-#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 HAVE_FACILITY(MISC_INSN_EXT2)
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index ab92a2a82c..77d7bb6cf5 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -186,6 +186,7 @@ typedef enum S390Opcode {
RRE_SLBGR = 0xb989,
RRE_XGR = 0xb982,
+ RRFa_MGRK = 0xb9ec,
RRFa_MSRKC = 0xb9fd,
RRFa_MSGRKC = 0xb9ed,
RRFa_NRK = 0xb9f4,
@@ -2547,6 +2548,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_mulu2_i64:
tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
break;
+ case INDEX_op_muls2_i64:
+ tcg_out_insn(s, RRFa, MGRK, TCG_REG_R2, args[2], args[3]);
+ break;
case INDEX_op_shl_i64:
op = RSY_SLLG;
@@ -3235,6 +3239,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_mulu2_i64:
return C_O2_I2(b, a, 0, r);
+ case INDEX_op_muls2_i64:
+ return C_O2_I2(b, a, r, r);
case INDEX_op_add2_i32:
case INDEX_op_sub2_i32:
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 05/10] tcg/s390x: Support MIE3 logical operations
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
` (3 preceding siblings ...)
2022-02-24 15:43 ` [PATCH 04/10] tcg/s390x: Support MIE2 MGRK instruction Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 06/10] tcg/s390x: Create tgen_cmp2 to simplify movcond Richard Henderson
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
This is andc, orc, nand, nor, eqv.
We can use nor for implementing not.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 1 +
tcg/s390x/tcg-target.h | 25 +++++----
tcg/s390x/tcg-target.c.inc | 100 +++++++++++++++++++++++++++++++++
3 files changed, 114 insertions(+), 12 deletions(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index fea73b6ed0..37801983f1 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -22,6 +22,7 @@ C_O1_I1(v, vr)
C_O1_I2(r, 0, ri)
C_O1_I2(r, 0, rI)
C_O1_I2(r, 0, rJ)
+C_O1_I2(r, r, r)
C_O1_I2(r, r, ri)
C_O1_I2(r, r, rJ)
C_O1_I2(r, rZ, r)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 280e752d94..53c4da7730 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -64,6 +64,7 @@ typedef enum TCGReg {
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
#define FACILITY_LOAD_ON_COND2 53
#define FACILITY_MISC_INSN_EXT2 58
+#define FACILITY_MISC_INSN_EXT3 61
#define FACILITY_VECTOR 129
#define FACILITY_VECTOR_ENH1 135
@@ -81,13 +82,13 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_ext16u_i32 1
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_not_i32 0
+#define TCG_TARGET_HAS_not_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_neg_i32 1
-#define TCG_TARGET_HAS_andc_i32 0
-#define TCG_TARGET_HAS_orc_i32 0
-#define TCG_TARGET_HAS_eqv_i32 0
-#define TCG_TARGET_HAS_nand_i32 0
-#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_andc_i32 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_orc_i32 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_eqv_i32 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
#define TCG_TARGET_HAS_ctz_i32 0
#define TCG_TARGET_HAS_ctpop_i32 0
@@ -118,13 +119,13 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_not_i64 0
+#define TCG_TARGET_HAS_not_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_neg_i64 1
-#define TCG_TARGET_HAS_andc_i64 0
-#define TCG_TARGET_HAS_orc_i64 0
-#define TCG_TARGET_HAS_eqv_i64 0
-#define TCG_TARGET_HAS_nand_i64 0
-#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_andc_i64 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_orc_i64 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_eqv_i64 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3)
+#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
#define TCG_TARGET_HAS_ctz_i64 0
#define TCG_TARGET_HAS_ctpop_i64 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 77d7bb6cf5..58ebb925d9 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -189,8 +189,18 @@ typedef enum S390Opcode {
RRFa_MGRK = 0xb9ec,
RRFa_MSRKC = 0xb9fd,
RRFa_MSGRKC = 0xb9ed,
+ RRFa_NCRK = 0xb9f5,
+ RRFa_NCGRK = 0xb9e5,
+ RRFa_NNRK = 0xb974,
+ RRFa_NNGRK = 0xb964,
+ RRFa_NORK = 0xb976,
+ RRFa_NOGRK = 0xb966,
RRFa_NRK = 0xb9f4,
RRFa_NGRK = 0xb9e4,
+ RRFa_NXRK = 0xb977,
+ RRFa_NXGRK = 0xb967,
+ RRFa_OCRK = 0xb975,
+ RRFa_OCGRK = 0xb965,
RRFa_ORK = 0xb9f6,
RRFa_OGRK = 0xb9e6,
RRFa_SRK = 0xb9f9,
@@ -2254,9 +2264,46 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
+ case INDEX_op_andc_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen_andi(s, TCG_TYPE_I32, a0, ~a2);
+ } else {
+ tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_orc_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen_ori(s, TCG_TYPE_I32, a0, ~a2);
+ } else {
+ tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_eqv_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen_xori(s, TCG_TYPE_I32, a0, ~a2);
+ } else {
+ tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_nand_i32:
+ tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_nor_i32:
+ tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
+ break;
+
case INDEX_op_neg_i32:
tcg_out_insn(s, RR, LCR, args[0], args[1]);
break;
+ case INDEX_op_not_i32:
+ tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
+ break;
case INDEX_op_mul_i32:
a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
@@ -2512,9 +2559,46 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
break;
+ case INDEX_op_andc_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
+ } else {
+ tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_orc_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen_ori(s, TCG_TYPE_I64, a0, ~a2);
+ } else {
+ tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_eqv_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen_xori(s, TCG_TYPE_I64, a0, ~a2);
+ } else {
+ tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_nand_i64:
+ tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_nor_i64:
+ tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
+ break;
+
case INDEX_op_neg_i64:
tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
break;
+ case INDEX_op_not_i64:
+ tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
+ break;
case INDEX_op_bswap64_i64:
tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
break;
@@ -3163,6 +3247,20 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
? C_O1_I2(r, r, ri)
: C_O1_I2(r, 0, ri));
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ case INDEX_op_orc_i32:
+ case INDEX_op_orc_i64:
+ case INDEX_op_eqv_i32:
+ case INDEX_op_eqv_i64:
+ return C_O1_I2(r, r, ri);
+
+ case INDEX_op_nand_i32:
+ case INDEX_op_nand_i64:
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ return C_O1_I2(r, r, r);
+
case INDEX_op_mul_i32:
/* If we have the general-instruction-extensions, then we have
MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
@@ -3198,6 +3296,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_bswap64_i64:
case INDEX_op_neg_i32:
case INDEX_op_neg_i64:
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
case INDEX_op_ext8s_i32:
case INDEX_op_ext8s_i64:
case INDEX_op_ext8u_i32:
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 06/10] tcg/s390x: Create tgen_cmp2 to simplify movcond
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
` (4 preceding siblings ...)
2022-02-24 15:43 ` [PATCH 05/10] tcg/s390x: Support MIE3 logical operations Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 07/10] tcg/s390x: Support SELGR instruction in MOVCOND Richard Henderson
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
Return both regular and inverted condition codes from tgen_cmp2.
This lets us choose after the fact which comparision we want.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.c.inc | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 58ebb925d9..18b8ca3132 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -1376,10 +1376,11 @@ static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
}
}
-static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
- TCGArg c2, bool c2const, bool need_carry)
+static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
+ TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
{
bool is_unsigned = is_unsigned_cond(c);
+ TCGCond inv_c = tcg_invert_cond(c);
S390Opcode op;
if (c2const) {
@@ -1390,6 +1391,7 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
} else {
tcg_out_insn(s, RRE, LTGR, r1, r1);
}
+ *inv_cc = tcg_cond_to_ltr_cond[inv_c];
return tcg_cond_to_ltr_cond[c];
}
}
@@ -1453,9 +1455,17 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
}
exit:
+ *inv_cc = tcg_cond_to_s390_cond[inv_c];
return tcg_cond_to_s390_cond[c];
}
+static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
+ TCGArg c2, bool c2const, bool need_carry)
+{
+ int inv_cc;
+ return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
+}
+
static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
{
@@ -1556,20 +1566,19 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
TCGReg c1, TCGArg c2, int c2const,
TCGArg v3, int v3const)
{
- int cc;
+ int cc, inv_cc;
+
+ cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
+
if (HAVE_FACILITY(LOAD_ON_COND)) {
- cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
if (v3const) {
tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
} else {
tcg_out_insn(s, RRFc, LOCGR, dest, v3, cc);
}
} else {
- c = tcg_invert_cond(c);
- cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
-
/* Emit: if (cc) goto over; dest = r3; over: */
- tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+ tcg_out_insn(s, RI, BRC, inv_cc, (4 + 4) >> 1);
tcg_out_insn(s, RRE, LGR, dest, v3);
}
}
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 07/10] tcg/s390x: Support SELGR instruction in MOVCOND
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
` (5 preceding siblings ...)
2022-02-24 15:43 ` [PATCH 06/10] tcg/s390x: Create tgen_cmp2 to simplify movcond Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 08/10] tcg/s390x: Use tgen_movcond_int in tgen_clz Richard Henderson
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
The new select instruction provides two separate register inputs,
whereas the old load-on-condition instruction overlaps one of the
register inputs with the destination.
Generalize movcond to support pre-computed conditions, and the same
set of arguments at all times. This allows, but does not require,
MIE3 and LOC2 facilities at the same time. It will also be assumed
by a following patch, which needs to reuse tgen_movcond_int.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 3 +-
tcg/s390x/tcg-target.c.inc | 99 +++++++++++++++++++++++++++-------
2 files changed, 82 insertions(+), 20 deletions(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 37801983f1..38ebce3594 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -29,8 +29,7 @@ C_O1_I2(r, rZ, r)
C_O1_I2(v, v, r)
C_O1_I2(v, v, v)
C_O1_I3(v, v, v, v)
-C_O1_I4(r, r, ri, r, 0)
-C_O1_I4(r, r, ri, rI, 0)
+C_O1_I4(r, r, ri, rI, r)
C_O2_I2(b, a, 0, r)
C_O2_I2(b, a, r, r)
C_O2_I3(b, a, 0, 1, r)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 18b8ca3132..8edad2c390 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -210,6 +210,8 @@ typedef enum S390Opcode {
RRFa_XRK = 0xb9f7,
RRFa_XGRK = 0xb9e7,
+ RRFa4_SELGR = 0xb9e3, /* RRF-a with the m4 field */
+
RRFc_LOCR = 0xb9f2,
RRFc_LOCGR = 0xb9e2,
@@ -564,12 +566,20 @@ static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
tcg_out32(s, (op << 16) | (r1 << 4) | r2);
}
+/* RRF-a without the m4 field */
static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
TCGReg r1, TCGReg r2, TCGReg r3)
{
tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
}
+/* RRF-a with the m4 field */
+static void tcg_out_insn_RRFa4(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2, TCGReg r3, int m4)
+{
+ tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
+}
+
static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
TCGReg r1, TCGReg r2, int m3)
{
@@ -1562,25 +1572,80 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
}
}
+static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
+ TCGArg v3, int v3const, TCGReg v4,
+ int cc, int inv_cc)
+{
+ TCGReg src;
+
+ if (v3const && HAVE_FACILITY(LOAD_ON_COND2)) {
+ tcg_out_mov(s, type, dest, v4);
+ /* Emit: if (cc) dest = v3. */
+ tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
+ return;
+ }
+
+ /* Note that while MIE3 implies LOC, it does not imply LOC2. */
+ if (HAVE_FACILITY(MISC_INSN_EXT3)) {
+ if (v3const) {
+ tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
+ v3 = TCG_TMP0;
+ }
+ /* Emit: dest = cc ? v3 : v4. */
+ tcg_out_insn(s, RRFa4, SELGR, dest, v3, v4, cc);
+ return;
+ }
+
+ if (HAVE_FACILITY(LOAD_ON_COND)) {
+ if (v3const) {
+ if (dest == v4) {
+ tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
+ src = TCG_TMP0;
+ } else {
+ tcg_out_insn(s, RI, LGHI, dest, v3);
+ cc = inv_cc;
+ src = v4;
+ }
+ } else if (dest == v3) {
+ cc = inv_cc;
+ src = v4;
+ } else {
+ tcg_out_mov(s, type, dest, v4);
+ src = v3;
+ }
+ /* Emit: if (cc) dest = v3. */
+ tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
+ return;
+ }
+
+ if (v3const) {
+ tcg_out_mov(s, type, dest, v4);
+ /* Emit: if (!cc) goto over; dest = r3; over: */
+ tcg_out_insn(s, RI, BRC, inv_cc, (4 + 4) >> 1);
+ tcg_out_insn(s, RI, LGHI, dest, v3);
+ return;
+ }
+
+ if (dest == v3) {
+ src = v4;
+ inv_cc = cc;
+ } else {
+ tcg_out_mov(s, type, dest, v4);
+ src = v3;
+ }
+ /* Emit: if (!cc) goto over; dest = r3; over: */
+ tcg_out_insn(s, RI, BRC, inv_cc, (4 + 4) >> 1);
+ tcg_out_insn(s, RRE, LGR, dest, src);
+}
+
static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
TCGReg c1, TCGArg c2, int c2const,
- TCGArg v3, int v3const)
+ TCGArg v3, int v3const, TCGReg v4)
{
int cc, inv_cc;
cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
-
- if (HAVE_FACILITY(LOAD_ON_COND)) {
- if (v3const) {
- tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
- } else {
- tcg_out_insn(s, RRFc, LOCGR, dest, v3, cc);
- }
- } else {
- /* Emit: if (cc) goto over; dest = r3; over: */
- tcg_out_insn(s, RI, BRC, inv_cc, (4 + 4) >> 1);
- tcg_out_insn(s, RRE, LGR, dest, v3);
- }
+ tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
}
static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
@@ -2460,7 +2525,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_movcond_i32:
tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
- args[2], const_args[2], args[3], const_args[3]);
+ args[2], const_args[2], args[3], const_args[3], args[4]);
break;
case INDEX_op_qemu_ld_i32:
@@ -2737,7 +2802,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_movcond_i64:
tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
- args[2], const_args[2], args[3], const_args[3]);
+ args[2], const_args[2], args[3], const_args[3], args[4]);
break;
OP_32_64(deposit):
@@ -3336,9 +3401,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64:
- return (HAVE_FACILITY(LOAD_ON_COND2)
- ? C_O1_I4(r, r, ri, rI, 0)
- : C_O1_I4(r, r, ri, r, 0));
+ return C_O1_I4(r, r, ri, rI, r);
case INDEX_op_div2_i32:
case INDEX_op_div2_i64:
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 08/10] tcg/s390x: Use tgen_movcond_int in tgen_clz
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
` (6 preceding siblings ...)
2022-02-24 15:43 ` [PATCH 07/10] tcg/s390x: Support SELGR instruction in MOVCOND Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 09/10] tcg/s390x: Use vector ctz for integer ctz Richard Henderson
2022-02-24 15:43 ` [PATCH 10/10] tcg/s390x: Implement ctpop operation Richard Henderson
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
Reuse code from movcond to conditionally copy a2 to dest,
based on the condition codes produced by FLOGR.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target-con-set.h | 1 +
tcg/s390x/tcg-target.c.inc | 26 +++++++++++---------------
2 files changed, 12 insertions(+), 15 deletions(-)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 38ebce3594..a49e91bfcc 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -24,6 +24,7 @@ C_O1_I2(r, 0, rI)
C_O1_I2(r, 0, rJ)
C_O1_I2(r, r, r)
C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
C_O1_I2(r, r, rJ)
C_O1_I2(r, rZ, r)
C_O1_I2(v, v, r)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 8edad2c390..e32eddf584 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -1658,21 +1658,15 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
if (a2const && a2 == 64) {
tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
- } else {
- if (a2const) {
- tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
- } else {
- tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
- }
- if (HAVE_FACILITY(LOAD_ON_COND)) {
- /* Emit: if (one bit found) dest = r0. */
- tcg_out_insn(s, RRFc, LOCGR, dest, TCG_REG_R0, 2);
- } else {
- /* Emit: if (no one bit found) goto over; dest = r0; over: */
- tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
- tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
- }
+ return;
}
+
+ /*
+ * Conditions from FLOGR are:
+ * 2 -> one bit found
+ * 8 -> no one bit found
+ */
+ tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
}
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
@@ -3304,11 +3298,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_rotl_i64:
case INDEX_op_rotr_i32:
case INDEX_op_rotr_i64:
- case INDEX_op_clz_i64:
case INDEX_op_setcond_i32:
case INDEX_op_setcond_i64:
return C_O1_I2(r, r, ri);
+ case INDEX_op_clz_i64:
+ return C_O1_I2(r, r, rI);
+
case INDEX_op_sub_i32:
case INDEX_op_sub_i64:
case INDEX_op_and_i32:
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 09/10] tcg/s390x: Use vector ctz for integer ctz
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
` (7 preceding siblings ...)
2022-02-24 15:43 ` [PATCH 08/10] tcg/s390x: Use tgen_movcond_int in tgen_clz Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
2022-02-24 15:43 ` [PATCH 10/10] tcg/s390x: Implement ctpop operation Richard Henderson
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
There is no integer version of ctz, but there is a vector one.
Push the values to and fro, then fix up as required for the
semantics of the tcg operation.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 4 ++--
tcg/s390x/tcg-target.c.inc | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+), 2 deletions(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 53c4da7730..4aff59b7c0 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -90,7 +90,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nand_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
-#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT)
@@ -127,7 +127,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nand_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
-#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctz_i64 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index e32eddf584..9c3f8f365e 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -64,6 +64,7 @@
/* A scratch register that may be be used throughout the backend. */
#define TCG_TMP0 TCG_REG_R1
+#define TCG_TMPV TCG_REG_V31
/* A scratch register that holds a pointer to the beginning of the TB.
We don't need this when we have pc-relative loads with the general
@@ -291,6 +292,7 @@ typedef enum S390Opcode {
VRIb_VGM = 0xe746,
VRIc_VREP = 0xe74d,
+ VRRa_VCTZ = 0xe752,
VRRa_VLC = 0xe7de,
VRRa_VLP = 0xe7df,
VRRa_VLR = 0xe756,
@@ -1669,6 +1671,29 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
}
+static void tgen_ctz(TCGContext *s, TCGType type, TCGReg dest,
+ TCGReg a1, TCGArg a2, int a2const)
+{
+ MemOp vece = type == TCG_TYPE_I32 ? MO_32 : MO_64;
+ int width = 8 << vece;
+ int cc, inv_cc;
+ TCGReg src;
+
+ tcg_out_mov(s, type, TCG_TMPV, a1);
+ tcg_out_insn(s, VRRa, VCTZ, TCG_TMPV, TCG_TMPV, vece);
+
+ if (a2const && a2 == width) {
+ tcg_out_mov(s, type, dest, TCG_TMPV);
+ return;
+ }
+
+ cc = tgen_cmp2(s, type, TCG_COND_EQ, a1, 0, true, false, &inv_cc);
+
+ src = (a2const || dest != a2 ? dest : TCG_TMP0);
+ tcg_out_mov(s, type, src, TCG_TMPV);
+ tgen_movcond_int(s, type, dest, a2, a2const, src, cc, inv_cc);
+}
+
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@@ -2826,6 +2851,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_clz(s, args[0], args[1], args[2], const_args[2]);
break;
+ case INDEX_op_ctz_i32:
+ tgen_ctz(s, TCG_TYPE_I32, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_ctz_i64:
+ tgen_ctz(s, TCG_TYPE_I64, args[0], args[1], args[2], const_args[2]);
+ break;
+
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@@ -3303,6 +3335,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
return C_O1_I2(r, r, ri);
case INDEX_op_clz_i64:
+ case INDEX_op_ctz_i32:
+ case INDEX_op_ctz_i64:
return C_O1_I2(r, r, rI);
case INDEX_op_sub_i32:
@@ -3557,6 +3591,7 @@ static void tcg_target_init(TCGContext *s)
s->reserved_regs = 0;
tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMPV);
/* XXX many insns can't be used with R0, so we better avoid it for now */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 10/10] tcg/s390x: Implement ctpop operation
2022-02-24 15:43 [PATCH 00/10] tcg/s390x: updates for mie2 and mie3 Richard Henderson
` (8 preceding siblings ...)
2022-02-24 15:43 ` [PATCH 09/10] tcg/s390x: Use vector ctz for integer ctz Richard Henderson
@ 2022-02-24 15:43 ` Richard Henderson
9 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2022-02-24 15:43 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-s390x, david, dmiller423
There is an older form that produces per-byte results,
and a newer form that produces per-register results,
and a vector form that produces per-element results.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/s390x/tcg-target.h | 5 ++--
tcg/s390x/tcg-target.c.inc | 54 ++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 4aff59b7c0..42cb900c6d 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -62,6 +62,7 @@ typedef enum TCGReg {
#define FACILITY_LOAD_ON_COND 45
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
+#define FACILITY_POPCOUNT FACILITY_LOAD_ON_COND
#define FACILITY_LOAD_ON_COND2 53
#define FACILITY_MISC_INSN_EXT2 58
#define FACILITY_MISC_INSN_EXT3 61
@@ -91,7 +92,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i32 0
#define TCG_TARGET_HAS_ctz_i32 HAVE_FACILITY(VECTOR)
-#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 HAVE_FACILITY(POPCOUNT)
#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_sextract_i32 0
@@ -128,7 +129,7 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
#define TCG_TARGET_HAS_ctz_i64 HAVE_FACILITY(VECTOR)
-#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 HAVE_FACILITY(POPCOUNT)
#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT)
#define TCG_TARGET_HAS_sextract_i64 0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 9c3f8f365e..4b877c70fe 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -187,6 +187,7 @@ typedef enum S390Opcode {
RRE_SLBGR = 0xb989,
RRE_XGR = 0xb982,
+ RRFa_ALHHLR = 0xb9da,
RRFa_MGRK = 0xb9ec,
RRFa_MSRKC = 0xb9fd,
RRFa_MSGRKC = 0xb9ed,
@@ -215,6 +216,7 @@ typedef enum S390Opcode {
RRFc_LOCR = 0xb9f2,
RRFc_LOCGR = 0xb9e2,
+ RRFc_POPCNT = 0xb9e1,
RR_AR = 0x1a,
RR_ALR = 0x1e,
@@ -315,6 +317,7 @@ typedef enum S390Opcode {
VRRc_VO = 0xe76a,
VRRc_VOC = 0xe76f,
VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
+ VRRa_VPOPCT = 0xe750,
VRRc_VS = 0xe7f7,
VRRa_VUPH = 0xe7d7,
VRRa_VUPL = 0xe7d6,
@@ -1694,6 +1697,48 @@ static void tgen_ctz(TCGContext *s, TCGType type, TCGReg dest,
tgen_movcond_int(s, type, dest, a2, a2const, src, cc, inv_cc);
}
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg a1)
+{
+ /* With MIE3, POPCNT can produce the complete result. */
+ if (HAVE_FACILITY(MISC_INSN_EXT3)) {
+ if (type == TCG_TYPE_I32) {
+ tgen_ext32u(s, dest, a1);
+ a1 = dest;
+ }
+ tcg_out_insn(s, RRFc, POPCNT, dest, a1, 8);
+ return;
+ }
+
+ /* Failing that, the vector facility can produce the complete result. */
+ if (HAVE_FACILITY(VECTOR)) {
+ tcg_out_mov(s, type, TCG_TMPV, a1);
+ tcg_out_insn(s, VRRa, VPOPCT, TCG_TMPV, TCG_TMPV,
+ type == TCG_TYPE_I32 ? MO_32 : MO_64);
+ tcg_out_mov(s, type, dest, TCG_TMPV);
+ return;
+ }
+
+ /*
+ * Failing that, POPCNT produces one byte per byte.
+ * Fold to intermediate results to produce the final value.
+ */
+ tcg_out_insn(s, RRFc, POPCNT, dest, a1, 0);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, dest, TCG_REG_NONE, 16);
+ tcg_out_insn(s, RR, ALR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, dest, TCG_REG_NONE, 8);
+ tcg_out_insn(s, RR, ALR, dest, TCG_TMP0);
+ tgen_ext8u(s, TCG_TYPE_I32, dest, dest);
+ } else {
+ tcg_out_insn(s, RRFa, ALHHLR, dest, dest, dest);
+ tcg_out_sh64(s, RSY_SLLG, TCG_TMP0, dest, TCG_REG_NONE, 16);
+ tcg_out_insn(s, RRE, ALGR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SLLG, TCG_TMP0, dest, TCG_REG_NONE, 8);
+ tcg_out_insn(s, RRE, ALGR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
+ }
+}
+
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@@ -2858,6 +2903,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tgen_ctz(s, TCG_TYPE_I64, args[0], args[1], args[2], const_args[2]);
break;
+ case INDEX_op_ctpop_i32:
+ tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ctpop_i64:
+ tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@@ -3416,6 +3468,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_extu_i32_i64:
case INDEX_op_extract_i32:
case INDEX_op_extract_i64:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32:
--
2.25.1
^ permalink raw reply related [flat|nested] 11+ messages in thread