From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: agraf@suse.de, cohuck@redhat.com, borntraeger@de.ibm.com
Subject: [Qemu-devel] [PATCH 6/8] tcg/s390: Use distinct-operands facility
Date: Thu, 3 Aug 2017 22:28:31 -0700 [thread overview]
Message-ID: <20170804052833.10187-10-rth@twiddle.net> (raw)
In-Reply-To: <20170804052833.10187-1-rth@twiddle.net>
This allows using a 3-operand insn form for some arithmetic,
logicals and shifts.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/s390/tcg-target.h | 1 +
tcg/s390/tcg-target.inc.c | 118 +++++++++++++++++++++++++++++++++++-----------
2 files changed, 91 insertions(+), 28 deletions(-)
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 957f0c0afe..1b5eb22c26 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -58,6 +58,7 @@ typedef enum TCGReg {
#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
+#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
extern uint64_t s390_facilities;
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
index 5414c9d879..a80b07db65 100644
--- a/tcg/s390/tcg-target.inc.c
+++ b/tcg/s390/tcg-target.inc.c
@@ -159,6 +159,16 @@ typedef enum S390Opcode {
RRF_LOCR = 0xb9f2,
RRF_LOCGR = 0xb9e2,
+ RRF_NRK = 0xb9f4,
+ RRF_NGRK = 0xb9e4,
+ RRF_ORK = 0xb9f6,
+ RRF_OGRK = 0xb9e6,
+ RRF_SRK = 0xb9f9,
+ RRF_SGRK = 0xb9e9,
+ RRF_SLRK = 0xb9fb,
+ RRF_SLGRK = 0xb9eb,
+ RRF_XRK = 0xb9f7,
+ RRF_XGRK = 0xb9e7,
RR_AR = 0x1a,
RR_ALR = 0x1e,
@@ -179,8 +189,11 @@ typedef enum S390Opcode {
RSY_RLL = 0xeb1d,
RSY_RLLG = 0xeb1c,
RSY_SLLG = 0xeb0d,
+ RSY_SLLK = 0xebdf,
RSY_SRAG = 0xeb0a,
+ RSY_SRAK = 0xebdc,
RSY_SRLG = 0xeb0c,
+ RSY_SRLK = 0xebde,
RS_SLL = 0x89,
RS_SRA = 0x8a,
@@ -1065,23 +1078,29 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
case TCG_COND_GEU:
do_geu:
/* We need "real" carry semantics, so use SUBTRACT LOGICAL
- instead of COMPARE LOGICAL. This needs an extra move. */
- tcg_out_mov(s, type, TCG_TMP0, c1);
+ instead of COMPARE LOGICAL. This may need an extra move. */
if (c2const) {
- tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+ tcg_out_mov(s, type, TCG_TMP0, c1);
if (type == TCG_TYPE_I32) {
tcg_out_insn(s, RIL, SLFI, TCG_TMP0, c2);
} else {
tcg_out_insn(s, RIL, SLGFI, TCG_TMP0, c2);
}
+ } else if (s390_facilities & FACILITY_DISTINCT_OPS) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RRF, SLRK, TCG_TMP0, c1, c2);
+ } else {
+ tcg_out_insn(s, RRF, SLGRK, TCG_TMP0, c1, c2);
+ }
} else {
+ tcg_out_mov(s, type, TCG_TMP0, c1);
if (type == TCG_TYPE_I32) {
tcg_out_insn(s, RR, SLR, TCG_TMP0, c2);
} else {
tcg_out_insn(s, RRE, SLGR, TCG_TMP0, c2);
}
- tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
}
+ tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
tcg_out_insn(s, RRE, ALCGR, dest, dest);
return;
@@ -1648,7 +1667,7 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
- S390Opcode op;
+ S390Opcode op, op2;
TCGArg a0, a1, a2;
switch (opc) {
@@ -1753,29 +1772,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (const_args[2]) {
a2 = -a2;
goto do_addi_32;
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, SR, a0, a2);
+ } else {
+ tcg_out_insn(s, RRF, SRK, a0, a1, a2);
}
- tcg_out_insn(s, RR, SR, args[0], args[2]);
break;
case INDEX_op_and_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
if (const_args[2]) {
- tgen_andi(s, TCG_TYPE_I32, args[0], args[2]);
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen_andi(s, TCG_TYPE_I32, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, NR, a0, a2);
} else {
- tcg_out_insn(s, RR, NR, args[0], args[2]);
+ tcg_out_insn(s, RRF, NRK, a0, a1, a2);
}
break;
case INDEX_op_or_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
if (const_args[2]) {
- tgen64_ori(s, args[0], args[2] & 0xffffffff);
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen64_ori(s, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, OR, a0, a2);
} else {
- tcg_out_insn(s, RR, OR, args[0], args[2]);
+ tcg_out_insn(s, RRF, ORK, a0, a1, a2);
}
break;
case INDEX_op_xor_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
if (const_args[2]) {
- tgen64_xori(s, args[0], args[2] & 0xffffffff);
- } else {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen64_xori(s, a0, a2);
+ } else if (a0 == a1) {
tcg_out_insn(s, RR, XR, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRF, XRK, a0, a1, a2);
}
break;
@@ -1804,18 +1838,31 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_shl_i32:
op = RS_SLL;
+ op2 = RSY_SLLK;
do_shift32:
- if (const_args[2]) {
- tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]);
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ if (a0 == a1) {
+ if (const_args[2]) {
+ tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
+ } else {
+ tcg_out_sh32(s, op, a0, a2, 0);
+ }
} else {
- tcg_out_sh32(s, op, args[0], args[2], 0);
+ /* Using tcg_out_sh64 here for the format; it is a 32-bit shift. */
+ if (const_args[2]) {
+ tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
+ } else {
+ tcg_out_sh64(s, op2, a0, a1, a2, 0);
+ }
}
break;
case INDEX_op_shr_i32:
op = RS_SRL;
+ op2 = RSY_SRLK;
goto do_shift32;
case INDEX_op_sar_i32:
op = RS_SRA;
+ op2 = RSY_SRAK;
goto do_shift32;
case INDEX_op_rotl_i32:
@@ -1957,30 +2004,44 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
if (const_args[2]) {
a2 = -a2;
goto do_addi_64;
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, SGR, a0, a2);
} else {
- tcg_out_insn(s, RRE, SGR, args[0], args[2]);
+ tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
}
break;
case INDEX_op_and_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
- } else {
+ } else if (a0 == a1) {
tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
}
break;
case INDEX_op_or_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tgen64_ori(s, args[0], args[2]);
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen64_ori(s, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, OGR, a0, a2);
} else {
- tcg_out_insn(s, RRE, OGR, args[0], args[2]);
+ tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
}
break;
case INDEX_op_xor_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tgen64_xori(s, args[0], args[2]);
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen64_xori(s, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, XGR, a0, a2);
} else {
- tcg_out_insn(s, RRE, XGR, args[0], args[2]);
+ tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
}
break;
@@ -2168,6 +2229,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
static const TCGTargetOpDef r_rC = { .args_ct_str = { "r", "rC" } };
static const TCGTargetOpDef r_rZ = { .args_ct_str = { "r", "rZ" } };
static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
+ static const TCGTargetOpDef r_r_rM = { .args_ct_str = { "r", "r", "rM" } };
static const TCGTargetOpDef r_0_r = { .args_ct_str = { "r", "0", "r" } };
static const TCGTargetOpDef r_0_ri = { .args_ct_str = { "r", "0", "ri" } };
static const TCGTargetOpDef r_0_rI = { .args_ct_str = { "r", "0", "rI" } };
@@ -2211,7 +2273,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
return &r_r_ri;
case INDEX_op_sub_i32:
case INDEX_op_sub_i64:
- return &r_0_ri;
+ return (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri);
case INDEX_op_mul_i32:
/* If we have the general-instruction-extensions, then we have
@@ -2227,32 +2289,32 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
OI[LH][LH] instructions. By rejecting certain negative ranges,
the immediate load plus the reg-reg OR is smaller. */
return (s390_facilities & FACILITY_EXT_IMM
- ? &r_0_ri
+ ? (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri)
: &r_0_rN);
case INDEX_op_or_i64:
return (s390_facilities & FACILITY_EXT_IMM
- ? &r_0_rM
+ ? (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_rM : &r_0_rM)
: &r_0_rN);
case INDEX_op_xor_i32:
/* Without EXT_IMM, no immediates are supported. Otherwise,
rejecting certain negative ranges leads to smaller code. */
return (s390_facilities & FACILITY_EXT_IMM
- ? &r_0_ri
+ ? (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri)
: &r_0_r);
case INDEX_op_xor_i64:
return (s390_facilities & FACILITY_EXT_IMM
- ? &r_0_rM
+ ? (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_rM : &r_0_rM)
: &r_0_r);
case INDEX_op_and_i32:
case INDEX_op_and_i64:
- return &r_0_ri;
+ return (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri);
case INDEX_op_shl_i32:
case INDEX_op_shr_i32:
case INDEX_op_sar_i32:
- return &r_0_ri;
+ return (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri);
case INDEX_op_shl_i64:
case INDEX_op_shr_i64:
--
2.13.3
next prev parent reply other threads:[~2017-08-04 5:28 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-04 5:28 [Qemu-devel] [PATCH for-2.11 0/8] tcg/s390 improvments Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PULL for-2.10 1/3] tcg/arm: Fix runtime overalignment test Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PATCH 1/8] tcg/s390: Fully convert tcg_target_op_def Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PULL for-2.10 2/3] target/s390x: Fix CSST for 16-byte store Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PATCH 2/8] tcg/s390: Merge cmpi facilities check to tcg_target_op_def Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PULL for-2.10 3/3] tcg: Increase minimum alignment from tcg_malloc to 8 Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PATCH 3/8] tcg/s390: Merge muli facilities check to tcg_target_op_def Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PATCH 4/8] tcg/s390: Merge add2i " Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PATCH 5/8] tcg/s390: Merge ori+xori " Richard Henderson
2017-08-04 5:28 ` Richard Henderson [this message]
2017-08-04 5:28 ` [Qemu-devel] [PATCH 7/8] tcg/s390: Use load-on-condition-2 facility Richard Henderson
2017-08-04 5:28 ` [Qemu-devel] [PATCH 8/8] tcg/s390: Use slbgr for setcond le and leu Richard Henderson
2017-08-04 5:50 ` [Qemu-devel] [PATCH for-2.11 0/8] tcg/s390 improvments no-reply
2017-08-04 7:20 ` Fam Zheng
2017-08-04 5:53 ` no-reply
-- strict thread matches above, loose matches on Subject: below --
2017-08-29 20:47 [Qemu-devel] [PATCH 0/8] tcg/s390 improvements Richard Henderson
2017-08-29 20:47 ` [Qemu-devel] [PATCH 6/8] tcg/s390: Use distinct-operands facility Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170804052833.10187-10-rth@twiddle.net \
--to=rth@twiddle.net \
--cc=agraf@suse.de \
--cc=borntraeger@de.ibm.com \
--cc=cohuck@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.