From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations.
Date: Mon, 19 Apr 2010 00:13:02 +0200 [thread overview]
Message-ID: <20100418221302.GA26784@volta.aurel32.net> (raw)
In-Reply-To: <e36e7f72dbd7724145773f759814a3c0a184c667.1271277329.git.rth@twiddle.net>
On Tue, Apr 13, 2010 at 04:33:59PM -0700, Richard Henderson wrote:
> Define OPC_BSWAP. Factor opcode emission to separate functions.
> Use bswap+shift to implement 16-bit swap instead of a rolw; this
> gets the proper zero-extension required by INDEX_op_bswap16_i32.
This is not required by INDEX_op_bswap16_i32. What is need is that the
value in the input register has the 16 upper bits set to 0. Considering
that, the rolw instruction is faster than bswap + shift.
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/i386/tcg-target.c | 53 +++++++++++++++++++++++++------------------------
> 1 files changed, 27 insertions(+), 26 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 75b9915..0bafd00 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -163,6 +163,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
>
> #define P_EXT 0x100 /* 0x0f opcode prefix */
>
> +#define OPC_BSWAP (0xc8 | P_EXT)
> #define OPC_MOVZBL (0xb6 | P_EXT)
> #define OPC_MOVZWL (0xb7 | P_EXT)
> #define OPC_MOVSBL (0xbe | P_EXT)
> @@ -339,6 +340,22 @@ static inline void tcg_out_ext16s(TCGContext *s, int dest, int src)
> tcg_out_modrm(s, OPC_MOVSWL, dest, src);
> }
>
> +static inline void tcg_out_bswap32(TCGContext *s, int reg)
> +{
> + tcg_out_opc(s, OPC_BSWAP + reg);
> +}
> +
> +static inline void tcg_out_bswap16(TCGContext *s, int reg, int sign)
> +{
> + /* This swap+shift combination guarantees that the high part contains
> + the sign or zero extension required. It also doesn't suffer the
> + problem of partial register stalls that using rolw does. */
> + tcg_out_bswap32(s, reg);
> + /* shr $16, dest */
> + tcg_out_modrm(s, 0xc1, (sign ? SHIFT_SAR : SHIFT_SHR), reg);
> + tcg_out8(s, 16);
> +}
> +
> static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int cf)
> {
> if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == -1))) {
> @@ -745,31 +762,21 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> /* movzwl */
> tcg_out_modrm_offset(s, OPC_MOVZWL, data_reg, r0, GUEST_BASE);
> if (bswap) {
> - /* rolw $8, data_reg */
> - tcg_out8(s, 0x66);
> - tcg_out_modrm(s, 0xc1, 0, data_reg);
> - tcg_out8(s, 8);
> + tcg_out_bswap16(s, data_reg, 0);
> }
> break;
> case 1 | 4:
> /* movswl */
> tcg_out_modrm_offset(s, OPC_MOVSWL, data_reg, r0, GUEST_BASE);
> if (bswap) {
> - /* rolw $8, data_reg */
> - tcg_out8(s, 0x66);
> - tcg_out_modrm(s, 0xc1, 0, data_reg);
> - tcg_out8(s, 8);
> -
> - /* movswl data_reg, data_reg */
> - tcg_out_modrm(s, OPC_MOVSWL, data_reg, data_reg);
> + tcg_out_bswap16(s, data_reg, 1);
> }
> break;
> case 2:
> /* movl (r0), data_reg */
> tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
> if (bswap) {
> - /* bswap */
> - tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
> + tcg_out_bswap32(s, data_reg);
> }
> break;
> case 3:
> @@ -786,11 +793,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
> tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE + 4);
> } else {
> tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE + 4);
> - tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
> + tcg_out_bswap32(s, data_reg);
>
> tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE);
> - /* bswap */
> - tcg_out_opc(s, (0xc8 + data_reg2) | P_EXT);
> + tcg_out_bswap32(s, data_reg2);
> }
> break;
> default:
> @@ -982,8 +988,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> case 2:
> if (bswap) {
> tcg_out_mov(s, r1, data_reg);
> - /* bswap data_reg */
> - tcg_out_opc(s, (0xc8 + r1) | P_EXT);
> + tcg_out_bswap32(s, r1);
> data_reg = r1;
> }
> /* movl */
> @@ -992,12 +997,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
> case 3:
> if (bswap) {
> tcg_out_mov(s, r1, data_reg2);
> - /* bswap data_reg */
> - tcg_out_opc(s, (0xc8 + r1) | P_EXT);
> + tcg_out_bswap32(s, r1);
> tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE);
> tcg_out_mov(s, r1, data_reg);
> - /* bswap data_reg */
> - tcg_out_opc(s, (0xc8 + r1) | P_EXT);
> + tcg_out_bswap32(s, r1);
> tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE + 4);
> } else {
> tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
> @@ -1195,12 +1198,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> break;
>
> case INDEX_op_bswap16_i32:
> - tcg_out8(s, 0x66);
> - tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]);
> - tcg_out8(s, 8);
> + tcg_out_bswap16(s, args[0], 0);
> break;
> case INDEX_op_bswap32_i32:
> - tcg_out_opc(s, (0xc8 + args[0]) | P_EXT);
> + tcg_out_bswap32(s, args[0]);
> break;
>
> case INDEX_op_neg_i32:
> --
> 1.6.2.5
>
>
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
next prev parent reply other threads:[~2010-04-18 22:13 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-14 20:35 [Qemu-devel] [PATCH 00/21] tcg-i386 cleanup and improvement Richard Henderson
2010-04-13 22:23 ` [Qemu-devel] [PATCH 01/21] tcg-i386: Allocate call-saved registers first Richard Henderson
2010-04-13 22:26 ` [Qemu-devel] [PATCH 02/21] tcg-i386: Tidy initialization of tcg_target_call_clobber_regs Richard Henderson
2010-04-13 22:59 ` [Qemu-devel] [PATCH 03/21] tcg-i386: Tidy ext8u and ext16u operations Richard Henderson
2010-04-13 23:13 ` [Qemu-devel] [PATCH 04/21] tcg-i386: Tidy ext8s and ext16s operations Richard Henderson
2010-04-13 23:33 ` [Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations Richard Henderson
2010-04-18 22:13 ` Aurelien Jarno [this message]
2010-04-19 13:56 ` Richard Henderson
2010-04-19 16:05 ` malc
2010-04-19 19:19 ` Richard Henderson
2010-04-13 23:44 ` [Qemu-devel] [PATCH 06/21] tcg-i386: Tidy shift operations Richard Henderson
2010-04-14 14:58 ` [Qemu-devel] [PATCH 07/21] tcg-i386: Tidy move operations Richard Henderson
2010-04-14 15:06 ` [Qemu-devel] [PATCH 08/21] tcg-i386: Eliminate extra move from qemu_ld64 Richard Henderson
2010-04-14 15:26 ` [Qemu-devel] [PATCH 09/21] tcg-i386: Tidy jumps Richard Henderson
2010-04-14 15:38 ` [Qemu-devel] [PATCH 10/21] tcg-i386: Tidy immediate arithmetic operations Richard Henderson
2010-04-14 17:16 ` [Qemu-devel] [PATCH 11/21] tcg-i386: Tidy non-immediate " Richard Henderson
2010-04-14 17:20 ` [Qemu-devel] [PATCH 12/21] tcg-i386: Tidy movi Richard Henderson
2010-04-14 17:59 ` [Qemu-devel] [PATCH 13/21] tcg-i386: Tidy push/pop Richard Henderson
2010-04-14 18:02 ` [Qemu-devel] [PATCH 14/21] tcg-i386: Tidy calls Richard Henderson
2010-04-14 18:04 ` [Qemu-devel] [PATCH 15/21] tcg-i386: Tidy ret Richard Henderson
2010-04-14 18:07 ` [Qemu-devel] [PATCH 16/21] tcg-i386: Tidy setcc Richard Henderson
2010-04-14 18:22 ` [Qemu-devel] [PATCH 17/21] tcg-i386: Tidy unary arithmetic Richard Henderson
2010-04-14 18:29 ` [Qemu-devel] [PATCH 18/21] tcg-i386: Tidy multiply Richard Henderson
2010-04-14 18:32 ` [Qemu-devel] [PATCH 19/21] tcg-i386: Tidy xchg Richard Henderson
2010-04-14 19:08 ` [Qemu-devel] [PATCH 20/21] tcg-i386: Tidy lea Richard Henderson
2010-04-14 20:29 ` [Qemu-devel] [PATCH 21/21] tcg-i386: Use lea for three-operand add Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100418221302.GA26784@volta.aurel32.net \
--to=aurelien@aurel32.net \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).