From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic operations.
Date: Fri, 21 May 2010 11:38:49 +0200 [thread overview]
Message-ID: <20100521093849.GD1950@volta.aurel32.net> (raw)
In-Reply-To: <f9a54fb8dd383110104b9287fc246873c33c74f5.1272479073.git.rth@twiddle.net>
On Wed, Apr 14, 2010 at 10:16:33AM -0700, Richard Henderson wrote:
> Add more OPC values, and tgen_arithr. Use the later throughout.
>
> Note that normal reg/reg arithmetic now uses the Gv,Ev opcode form
> instead of the Ev,Gv opcode form used previously. Both forms
> disassemble properly, and so there's no visible change when diffing
> log files before and after the change. This change makes the operand
> ordering within the output routines more natural, and avoids the need
> to define an OPC_ARITH_EvGv since a read-modify-write with memory is
> not needed within TCG.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
Acked-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
> tcg/i386/tcg-target.c | 78 ++++++++++++++++++++++++++++++-------------------
> 1 files changed, 48 insertions(+), 30 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index df1bdfc..b4e8e74 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -165,7 +165,12 @@ static inline int tcg_target_const_match(tcg_target_long val,
>
> #define OPC_ARITH_EvIz (0x81)
> #define OPC_ARITH_EvIb (0x83)
> +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
> +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
> +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
> +#define OPC_DEC_r32 (0x48)
> #define OPC_BSWAP (0xc8 | P_EXT)
> +#define OPC_INC_r32 (0x40)
> #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
> #define OPC_JCC_short (0x70) /* ... plus condition code */
> #define OPC_JMP_long (0xe9)
> @@ -180,6 +185,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
> #define OPC_SHIFT_1 (0xd1)
> #define OPC_SHIFT_Ib (0xc1)
> #define OPC_SHIFT_cl (0xd3)
> +#define OPC_TESTL (0x85)
>
> /* Group 1 opcode extensions for 0x80-0x83. */
> #define ARITH_ADD 0
> @@ -280,6 +286,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r, int rm,
> }
> }
>
> +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
> +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
> +{
> + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
> +}
> +
> static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
> {
> if (arg != ret) {
> @@ -291,8 +303,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
> int ret, int32_t arg)
> {
> if (arg == 0) {
> - /* xor r0,r0 */
> - tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
> + tgen_arithr(s, ARITH_XOR, ret, ret);
> } else {
> tcg_out8(s, 0xb8 + ret);
> tcg_out32(s, arg);
> @@ -374,14 +385,15 @@ static inline void tcg_out_rolw_8(TCGContext *s, int reg)
> tcg_out_shifti(s, SHIFT_ROL, reg, 8);
> }
>
> -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
> +static inline void tgen_arithi(TCGContext *s, int c, int r0,
> + int32_t val, int cf)
> {
> - if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
> - /* inc */
> - tcg_out_opc(s, 0x40 + r0);
> - } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && val == 1))) {
> - /* dec */
> - tcg_out_opc(s, 0x48 + r0);
> + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
> + partial flags update stalls on Pentium4 and are not recommended
> + by current Intel optimization manuals. */
> + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
> + int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
> + tcg_out_opc(s, opc + r0);
> } else if (val == (int8_t)val) {
> tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
> tcg_out8(s, val);
> @@ -454,12 +466,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
> if (const_arg2) {
> if (arg2 == 0) {
> /* test r, r */
> - tcg_out_modrm(s, 0x85, arg1, arg1);
> + tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
> } else {
> tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
> }
> } else {
> - tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
> + tgen_arithr(s, ARITH_CMP, arg1, arg2);
> }
> }
>
> @@ -674,7 +686,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
>
> /* cmp 0(r1), r0 */
> - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>
> tcg_out_mov(s, r0, addr_reg);
>
> @@ -690,7 +702,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> s->code_ptr++;
>
> /* cmp 4(r1), addr_reg2 */
> - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>
> /* je label1 */
> tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -749,7 +761,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> *label1_ptr = s->code_ptr - label1_ptr - 1;
>
> /* add x(r1), r0 */
> - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) -
> + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> + offsetof(CPUTLBEntry, addend) -
> offsetof(CPUTLBEntry, addr_read));
> #else
> r0 = addr_reg;
> @@ -864,7 +877,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
>
> /* cmp 0(r1), r0 */
> - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>
> tcg_out_mov(s, r0, addr_reg);
>
> @@ -880,7 +893,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> s->code_ptr++;
>
> /* cmp 4(r1), addr_reg2 */
> - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>
> /* je label1 */
> tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -961,7 +974,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> *label1_ptr = s->code_ptr - label1_ptr - 1;
>
> /* add x(r1), r0 */
> - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) -
> + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> + offsetof(CPUTLBEntry, addend) -
> offsetof(CPUTLBEntry, addr_write));
> #else
> r0 = addr_reg;
> @@ -1113,7 +1127,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> if (const_args[2]) {
> tgen_arithi(s, c, args[0], args[2], 0);
> } else {
> - tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
> + tgen_arithr(s, c, args[0], args[2]);
> }
> break;
> case INDEX_op_mul_i32:
> @@ -1163,24 +1177,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> goto gen_shift32;
>
> case INDEX_op_add2_i32:
> - if (const_args[4])
> + if (const_args[4]) {
> tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
> - if (const_args[5])
> + } else {
> + tgen_arithr(s, ARITH_ADD, args[0], args[4]);
> + }
> + if (const_args[5]) {
> tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
> + } else {
> + tgen_arithr(s, ARITH_ADC, args[0], args[5]);
> + }
> break;
> case INDEX_op_sub2_i32:
> - if (const_args[4])
> + if (const_args[4]) {
> tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
> - if (const_args[5])
> + } else {
> + tgen_arithr(s, ARITH_SUB, args[0], args[4]);
> + }
> + if (const_args[5]) {
> tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
> + } else {
> + tgen_arithr(s, ARITH_SBB, args[1], args[5]);
> + }
> break;
> case INDEX_op_brcond_i32:
> tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> --
> 1.6.6.1
>
>
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
next prev parent reply other threads:[~2010-05-21 9:39 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-28 18:24 [Qemu-devel] [PATCH 00/22] tcg-i386 cleanup and improvement, v2 Richard Henderson
2010-04-13 22:23 ` [Qemu-devel] [PATCH 01/22] tcg-i386: Allocate call-saved registers first Richard Henderson
2010-05-19 6:46 ` Aurelien Jarno
2010-04-13 22:26 ` [Qemu-devel] [PATCH 02/22] tcg-i386: Tidy initialization of tcg_target_call_clobber_regs Richard Henderson
2010-05-19 6:46 ` Aurelien Jarno
2010-04-13 22:59 ` [Qemu-devel] [PATCH 03/22] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
2010-05-19 6:47 ` Aurelien Jarno
2010-05-19 18:31 ` Richard Henderson
2010-05-20 13:39 ` Aurelien Jarno
2010-05-20 14:04 ` Aurelien Jarno
2010-05-20 14:40 ` Richard Henderson
2010-05-20 18:50 ` Aurelien Jarno
2010-04-13 23:13 ` [Qemu-devel] [PATCH 04/22] tcg-i386: Tidy ext8s and ext16s operations Richard Henderson
2010-05-20 18:52 ` Aurelien Jarno
2010-04-14 14:58 ` [Qemu-devel] [PATCH 07/22] tcg-i386: Tidy move operations Richard Henderson
2010-04-14 15:06 ` [Qemu-devel] [PATCH 08/22] tcg-i386: Eliminate extra move from qemu_ld64 Richard Henderson
2010-04-14 15:26 ` [Qemu-devel] [PATCH 09/22] tcg-i386: Tidy jumps Richard Henderson
2010-04-14 15:38 ` [Qemu-devel] [PATCH 10/22] tcg-i386: Tidy immediate arithmetic operations Richard Henderson
2010-05-21 9:38 ` Aurelien Jarno
2010-04-14 17:16 ` [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate " Richard Henderson
2010-05-21 9:38 ` Aurelien Jarno [this message]
2010-04-14 17:20 ` [Qemu-devel] [PATCH 12/22] tcg-i386: Tidy movi Richard Henderson
2010-05-21 9:38 ` Aurelien Jarno
2010-04-14 17:59 ` [Qemu-devel] [PATCH 13/22] tcg-i386: Tidy push/pop Richard Henderson
2010-05-21 9:38 ` Aurelien Jarno
2010-04-14 18:02 ` [Qemu-devel] [PATCH 14/22] tcg-i386: Tidy calls Richard Henderson
2010-05-21 9:40 ` Aurelien Jarno
2010-04-14 18:04 ` [Qemu-devel] [PATCH 15/22] tcg-i386: Tidy ret Richard Henderson
2010-05-21 9:40 ` Aurelien Jarno
2010-04-14 18:07 ` [Qemu-devel] [PATCH 16/22] tcg-i386: Tidy setcc Richard Henderson
2010-05-21 9:40 ` Aurelien Jarno
2010-04-14 18:22 ` [Qemu-devel] [PATCH 17/22] tcg-i386: Tidy unary arithmetic Richard Henderson
2010-05-21 9:41 ` Aurelien Jarno
2010-04-14 18:29 ` [Qemu-devel] [PATCH 18/22] tcg-i386: Tidy multiply Richard Henderson
2010-05-21 9:41 ` Aurelien Jarno
2010-04-14 18:32 ` [Qemu-devel] [PATCH 19/22] tcg-i386: Tidy xchg Richard Henderson
2010-05-21 9:42 ` Aurelien Jarno
2010-04-14 19:08 ` [Qemu-devel] [PATCH 20/22] tcg-i386: Tidy lea Richard Henderson
2010-05-21 9:43 ` Aurelien Jarno
2010-04-14 20:29 ` [Qemu-devel] [PATCH 21/22] tcg-i386: Use lea for three-operand add Richard Henderson
2010-05-21 9:44 ` Aurelien Jarno
2010-04-28 17:31 ` [Qemu-devel] [PATCH 05/22] tcg-i386: Tidy bswap operations Richard Henderson
2010-04-28 17:38 ` [Qemu-devel] [PATCH 06/22] tcg-i386: Tidy shift operations Richard Henderson
2010-04-28 18:23 ` [Qemu-devel] [PATCH 22/22] tcg-i386: Tidy data16 prefixes Richard Henderson
2010-05-21 9:45 ` Aurelien Jarno
2010-05-17 18:26 ` [Qemu-devel] [PATCH 00/22] tcg-i386 cleanup and improvement, v2 Richard Henderson
2010-05-17 19:54 ` Aurelien Jarno
2010-05-21 9:46 ` Aurelien Jarno
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100521093849.GD1950@volta.aurel32.net \
--to=aurelien@aurel32.net \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).