From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43)
	id 1NLbBE-0004o5-JE
	for qemu-devel@nongnu.org; Fri, 18 Dec 2009 06:39:32 -0500
Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43)
	id 1NLbBA-0004ka-Ng
	for qemu-devel@nongnu.org; Fri, 18 Dec 2009 06:39:32 -0500
Received: from [199.232.76.173] (port=46966 helo=monty-python.gnu.org)
	by lists.gnu.org with esmtp (Exim 4.43) id 1NLbBA-0004kQ-De
	for qemu-devel@nongnu.org; Fri, 18 Dec 2009 06:39:28 -0500
Received: from mail-px0-f189.google.com ([209.85.216.189]:62682)
	by monty-python.gnu.org with esmtp (Exim 4.60)
	(envelope-from <laurent.desnogues@gmail.com>) id 1NLbB9-0007QO-Jf
	for qemu-devel@nongnu.org; Fri, 18 Dec 2009 06:39:28 -0500
Received: by pxi27 with SMTP id 27so363575pxi.4
	for <qemu-devel@nongnu.org>; Fri, 18 Dec 2009 03:39:25 -0800 (PST)
MIME-Version: 1.0
In-Reply-To: <b3a1f6980056160646b35d50a673a851383a3bc5.1261078375.git.rth@twiddle.net>
References: <761ea48b0912170620l534dcb02m8ea6b59524d76dbe@mail.gmail.com>
	<cover.1261078375.git.rth@twiddle.net>
	<b3a1f6980056160646b35d50a673a851383a3bc5.1261078375.git.rth@twiddle.net>
Date: Fri, 18 Dec 2009 12:39:25 +0100
Message-ID: <761ea48b0912180339k18573822wea90289345c58a84@mail.gmail.com>
From: Laurent Desnogues <laurent.desnogues@gmail.com>
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Subject: [Qemu-devel] Re: [PATCH 3/6] tcg-x86_64: Implement setcond and
	movcond.
List-Id: qemu-devel.nongnu.org
List-Unsubscribe: <http://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.gnu.org/pipermail/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <http://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org

On Thu, Dec 17, 2009 at 6:32 PM, Richard Henderson <rth@twiddle.net> wrote:
> Implement conditional moves in the x86_64 backend.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> =A0tcg/x86_64/tcg-target.c | =A0 65 +++++++++++++++++++++++++++++++++++++=
+++++++--
> =A01 files changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
> index 2339091..e411755 100644
> --- a/tcg/x86_64/tcg-target.c
> +++ b/tcg/x86_64/tcg-target.c
> @@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int l=
abel_index)
> =A0 =A0 }
> =A0}
>
> -static void tcg_out_brcond(TCGContext *s, int cond,
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 TCGArg arg1, TCGArg=
 arg2, int const_arg2,
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int label_index, in=
t rexw)
> +static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 TCGArg arg2, int const_=
arg2, int rexw)
> =A0{
> =A0 =A0 if (const_arg2) {
> =A0 =A0 =A0 =A0 if (arg2 =3D=3D 0) {
> @@ -508,9 +507,45 @@ static void tcg_out_brcond(TCGContext *s, int cond,
> =A0 =A0 } else {
> =A0 =A0 =A0 =A0 tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, ar=
g1);
> =A0 =A0 }
> +}
> +
> +static void tcg_out_brcond(TCGContext *s, int cond,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 TCGArg arg1, TCGArg=
 arg2, int const_arg2,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int label_index, in=
t rexw)
> +{
> + =A0 =A0tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
> =A0 =A0 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
> =A0}
>
> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0TCGArg arg1, TCG=
Arg arg2, int const_arg2, int rexw)

Perhaps renaming arg0 to dest would make things slightly
more readable.

> +{
> + =A0 =A0int use_xor =3D (arg0 !=3D arg1 && (const_arg2 || arg0 !=3D arg2=
));
> +
> + =A0 =A0if (use_xor)
> + =A0 =A0 =A0 =A0tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
> + =A0 =A0tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
> + =A0 =A0tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, =
0, arg0);

A comment saying this is a setcc would be nice.

Also note that tcg_out_modrm will generate an unneeded prefix
for some registers. cf. the patch I sent to the list months ago.

> + =A0 =A0if (!use_xor)
> + =A0 =A0 =A0 =A0tgen_arithi32(s, ARITH_AND, arg0, 0xff);

Wouldn't movzbl be better?

Regarding the xor optimization, I tested it on my i7 and it was
(very) slightly slower running a 64-bit SPEC2k gcc.

> +}
> +
> +static void tcg_out_movcond(TCGContext *s, int cond, TCGArg arg0,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0TCGArg arg1, TCG=
Arg arg2, int const_arg2,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0TCGArg arg3, TCG=
Arg arg4, int rexw)

Perhaps renaming arg0 to dest would make things slightly
more readable.

You should also add a note stating that arg3 !=3D arg4.

> +{
> + =A0 =A0if (arg0 =3D=3D arg3) {
> + =A0 =A0 =A0 =A0cond =3D tcg_invert_cond(cond);
> + =A0 =A0 =A0 =A0arg3 =3D arg4;
> + =A0 =A0 =A0 =A0arg4 =3D arg0;
> + =A0 =A0}
> +
> + =A0 =A0tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
> + =A0 =A0if (arg0 !=3D arg4)
> + =A0 =A0 =A0 =A0tcg_out_mov(s, arg0, arg4);
> + =A0 =A0tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, ar=
g0, arg3);

A comment saying this is cmovcc would be nice.

Note, I didn't check the correctness, though it looks OK. I'll
make real tests at the next iteration :-)

> +}
> +
> =A0#if defined(CONFIG_SOFTMMU)
>
> =A0#include "../../softmmu_defs.h"
> @@ -1197,6 +1232,24 @@ static inline void tcg_out_op(TCGContext *s, int o=
pc, const TCGArg *args,
> =A0 =A0 =A0 =A0 tcg_out_modrm(s, 0x8b, args[0], args[1]);
> =A0 =A0 =A0 =A0 break;
>
> + =A0 =A0case INDEX_op_setcond_i32:
> + =A0 =A0 =A0 =A0tcg_out_setcond(s, args[3], args[0], args[1], args[2],
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0const_args[2], 0);
> + =A0 =A0 =A0 =A0break;
> + =A0 =A0case INDEX_op_setcond_i64:
> + =A0 =A0 =A0 =A0tcg_out_setcond(s, args[3], args[0], args[1], args[2],
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0const_args[2], P_REXW);
> + =A0 =A0 =A0 =A0break;
> +
> + =A0 =A0case INDEX_op_movcond_i32:
> + =A0 =A0 =A0 =A0tcg_out_movcond(s, args[5], args[0], args[1], args[2],
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0const_args[2], args[3], =
args[4], 0);
> + =A0 =A0 =A0 =A0break;
> + =A0 =A0case INDEX_op_movcond_i64:
> + =A0 =A0 =A0 =A0tcg_out_movcond(s, args[5], args[0], args[1], args[2],
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0const_args[2], args[3], =
args[4], P_REXW);
> + =A0 =A0 =A0 =A0break;
> +
> =A0 =A0 case INDEX_op_qemu_ld8u:
> =A0 =A0 =A0 =A0 tcg_out_qemu_ld(s, args, 0);
> =A0 =A0 =A0 =A0 break;
> @@ -1376,6 +1429,12 @@ static const TCGTargetOpDef x86_64_op_defs[] =3D {
> =A0 =A0 { INDEX_op_ext16u_i64, { "r", "r"} },
> =A0 =A0 { INDEX_op_ext32u_i64, { "r", "r"} },
>
> + =A0 =A0{ INDEX_op_setcond_i32, { "r", "r", "re" } },
> + =A0 =A0{ INDEX_op_setcond_i64, { "r", "r", "re" } },
> +
> + =A0 =A0{ INDEX_op_movcond_i32, { "r", "r", "re", "r", "r" } },
> + =A0 =A0{ INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },

For the i32 variants, "ri" instead of "re" is enough.


Laurent

> +
> =A0 =A0 { INDEX_op_qemu_ld8u, { "r", "L" } },
> =A0 =A0 { INDEX_op_qemu_ld8s, { "r", "L" } },
> =A0 =A0 { INDEX_op_qemu_ld16u, { "r", "L" } },
> --
> 1.6.5.2
>
>