qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops
@ 2009-09-30 21:09 Aurelien Jarno
  2009-09-30 21:44 ` [Qemu-devel] [PATCH 2/3] tcg/x86_64: add support for ext{8, 16, 32}u_i{32, 64} " Aurelien Jarno
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Aurelien Jarno @ 2009-09-30 21:09 UTC (permalink / raw)


Currently zero extensions ops are implemented by a and op with a
constant. This is then catched in some backend, and replaced by
a zero extension instruction. While this works well on RISC
machines, this adds a useless register move on non-RISC machines.

Example on x86:
  ext16u_i32 r1, r2
is translated into
  mov    %eax,%ebx
  movzwl %bx, %ebx
while the optimized version should be:
  movzwl %ax, %ebx

This patch adds ext{8,16,32}u_i{32,64} TCG ops that can be
implemented in the backends to avoid emitting useless register
moves.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/tcg-op.h  |   24 +++++++++++++++++++++---
 tcg/tcg-opc.h |   15 +++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 7cb6934..faf2e8b 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -1189,16 +1189,22 @@ static inline void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
 #endif
 }
 
-/* These are currently just for convenience.
-   We assume a target will recognise these automatically .  */
 static inline void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
+#ifdef TCG_TARGET_HAS_ext8u_i32
+    tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
+#else
     tcg_gen_andi_i32(ret, arg, 0xffu);
+#endif
 }
 
 static inline void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
+#ifdef TCG_TARGET_HAS_ext16u_i32
+    tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
+#else
     tcg_gen_andi_i32(ret, arg, 0xffffu);
+#endif
 }
 
 /* Note: we assume the two high bytes are set to zero */
@@ -1358,17 +1364,29 @@ static inline void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
 
 static inline void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
+#ifdef TCG_TARGET_HAS_ext8u_i64
+    tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
+#else
     tcg_gen_andi_i64(ret, arg, 0xffu);
+#endif
 }
 
 static inline void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
+#ifdef TCG_TARGET_HAS_ext16u_i64
+    tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
+#else
     tcg_gen_andi_i64(ret, arg, 0xffffu);
+#endif
 }
 
 static inline void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
 {
+#ifdef TCG_TARGET_HAS_ext32u_i64
+    tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
+#else
     tcg_gen_andi_i64(ret, arg, 0xffffffffu);
+#endif
 }
 
 /* Note: we assume the target supports move between 32 and 64 bit
@@ -1382,7 +1400,7 @@ static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
    registers */
 static inline void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
 {
-    tcg_gen_andi_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)), 0xffffffffu);
+    tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
 }
 
 /* Note: we assume the target supports move between 32 and 64 bit
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 3a095fc..b7f3fd7 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -89,6 +89,12 @@ DEF2(ext8s_i32, 1, 1, 0, 0)
 #ifdef TCG_TARGET_HAS_ext16s_i32
 DEF2(ext16s_i32, 1, 1, 0, 0)
 #endif
+#ifdef TCG_TARGET_HAS_ext8u_i32
+DEF2(ext8u_i32, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext16u_i32
+DEF2(ext16u_i32, 1, 1, 0, 0)
+#endif
 #ifdef TCG_TARGET_HAS_bswap16_i32
 DEF2(bswap16_i32, 1, 1, 0, 0)
 #endif
@@ -152,6 +158,15 @@ DEF2(ext16s_i64, 1, 1, 0, 0)
 #ifdef TCG_TARGET_HAS_ext32s_i64
 DEF2(ext32s_i64, 1, 1, 0, 0)
 #endif
+#ifdef TCG_TARGET_HAS_ext8u_i64
+DEF2(ext8u_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext16u_i64
+DEF2(ext16u_i64, 1, 1, 0, 0)
+#endif
+#ifdef TCG_TARGET_HAS_ext32u_i64
+DEF2(ext32u_i64, 1, 1, 0, 0)
+#endif
 #ifdef TCG_TARGET_HAS_bswap16_i64
 DEF2(bswap16_i64, 1, 1, 0, 0)
 #endif
-- 
1.6.1.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 2/3] tcg/x86_64: add support for ext{8, 16, 32}u_i{32, 64} TCG ops
  2009-09-30 21:09 [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops Aurelien Jarno
@ 2009-09-30 21:44 ` Aurelien Jarno
  2009-10-02 18:55 ` [Qemu-devel] [PATCH 3/3] tcg/i386: " Aurelien Jarno
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Aurelien Jarno @ 2009-09-30 21:44 UTC (permalink / raw)


Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/x86_64/tcg-target.c |   20 ++++++++++++++++++++
 tcg/x86_64/tcg-target.h |    6 ++++++
 2 files changed, 26 insertions(+), 0 deletions(-)

diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index b4ba65f..5c829e7 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -1181,6 +1181,21 @@ static inline void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
     case INDEX_op_ext32s_i64:
         tcg_out_modrm(s, 0x63 | P_REXW, args[0], args[1]);
         break;
+    case INDEX_op_ext8u_i32:
+        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, args[0], args[1]);
+        break;
+    case INDEX_op_ext16u_i32:
+        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
+        break;
+    case INDEX_op_ext8u_i64:
+        tcg_out_modrm(s, 0xb6 | P_EXT | P_REXW, args[0], args[1]);
+        break;
+    case INDEX_op_ext16u_i64:
+        tcg_out_modrm(s, 0xb7 | P_EXT | P_REXW, args[0], args[1]);
+        break;
+    case INDEX_op_ext32u_i64:
+        tcg_out_modrm(s, 0x8b, args[0], args[1]);
+        break;
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
@@ -1355,6 +1370,11 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
     { INDEX_op_ext8s_i64, { "r", "r"} },
     { INDEX_op_ext16s_i64, { "r", "r"} },
     { INDEX_op_ext32s_i64, { "r", "r"} },
+    { INDEX_op_ext8u_i32, { "r", "r"} },
+    { INDEX_op_ext16u_i32, { "r", "r"} },
+    { INDEX_op_ext8u_i64, { "r", "r"} },
+    { INDEX_op_ext16u_i64, { "r", "r"} },
+    { INDEX_op_ext32u_i64, { "r", "r"} },
 
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
diff --git a/tcg/x86_64/tcg-target.h b/tcg/x86_64/tcg-target.h
index 8d47e78..3ca392f 100644
--- a/tcg/x86_64/tcg-target.h
+++ b/tcg/x86_64/tcg-target.h
@@ -70,6 +70,12 @@ enum {
 #define TCG_TARGET_HAS_ext8s_i64
 #define TCG_TARGET_HAS_ext16s_i64
 #define TCG_TARGET_HAS_ext32s_i64
+#define TCG_TARGET_HAS_ext8u_i32
+#define TCG_TARGET_HAS_ext16u_i32
+#define TCG_TARGET_HAS_ext8u_i64
+#define TCG_TARGET_HAS_ext16u_i64
+#define TCG_TARGET_HAS_ext32u_i64
+
 #define TCG_TARGET_HAS_rot_i32
 #define TCG_TARGET_HAS_rot_i64
 
-- 
1.6.1.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [Qemu-devel] [PATCH 3/3] tcg/i386: add support for ext{8, 16, 32}u_i{32, 64} TCG ops
  2009-09-30 21:09 [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops Aurelien Jarno
  2009-09-30 21:44 ` [Qemu-devel] [PATCH 2/3] tcg/x86_64: add support for ext{8, 16, 32}u_i{32, 64} " Aurelien Jarno
@ 2009-10-02 18:55 ` Aurelien Jarno
  2009-10-02 20:07 ` [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} " Nathan Froyd
  2009-11-10 14:51 ` Paul Brook
  3 siblings, 0 replies; 7+ messages in thread
From: Aurelien Jarno @ 2009-10-02 18:55 UTC (permalink / raw)


Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/i386/tcg-target.c |    8 ++++++++
 tcg/i386/tcg-target.h |    2 ++
 2 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index b4e3b6f..dd4d4e0 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1073,6 +1073,12 @@ static inline void tcg_out_op(TCGContext *s, int opc,
     case INDEX_op_ext16s_i32:
         tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
         break;
+    case INDEX_op_ext8u_i32:
+        tcg_out_modrm(s, 0xb6 | P_EXT, args[0], args[1]);
+        break;
+    case INDEX_op_ext16u_i32:
+        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
+        break;
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
@@ -1160,6 +1166,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_ext8s_i32, { "r", "q" } },
     { INDEX_op_ext16s_i32, { "r", "r" } },
+    { INDEX_op_ext8u_i32, { "r", "q"} },
+    { INDEX_op_ext16u_i32, { "r", "r"} },
 
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 461ef31..69227c3 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -52,6 +52,8 @@ enum {
 #define TCG_TARGET_HAS_ext8s_i32
 #define TCG_TARGET_HAS_ext16s_i32
 #define TCG_TARGET_HAS_rot_i32
+#define TCG_TARGET_HAS_ext8u_i32
+#define TCG_TARGET_HAS_ext16u_i32
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
-- 
1.6.1.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops
  2009-09-30 21:09 [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops Aurelien Jarno
  2009-09-30 21:44 ` [Qemu-devel] [PATCH 2/3] tcg/x86_64: add support for ext{8, 16, 32}u_i{32, 64} " Aurelien Jarno
  2009-10-02 18:55 ` [Qemu-devel] [PATCH 3/3] tcg/i386: " Aurelien Jarno
@ 2009-10-02 20:07 ` Nathan Froyd
  2009-10-02 20:54   ` Aurelien Jarno
  2009-11-10 14:51 ` Paul Brook
  3 siblings, 1 reply; 7+ messages in thread
From: Nathan Froyd @ 2009-10-02 20:07 UTC (permalink / raw)
  To: Aurelien Jarno; +Cc: qemu-devel

On Wed, Sep 30, 2009 at 11:09:35PM +0200, Aurelien Jarno wrote:
> Currently zero extensions ops are implemented by a and op with a
> constant. This is then catched in some backend, and replaced by
> a zero extension instruction. While this works well on RISC
> machines, this adds a useless register move on non-RISC machines.
> 
> This patch adds ext{8,16,32}u_i{32,64} TCG ops that can be
> implemented in the backends to avoid emitting useless register
> moves.

I have to ask--does this make things go faster?

-Nathan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops
  2009-10-02 20:07 ` [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} " Nathan Froyd
@ 2009-10-02 20:54   ` Aurelien Jarno
  0 siblings, 0 replies; 7+ messages in thread
From: Aurelien Jarno @ 2009-10-02 20:54 UTC (permalink / raw)
  To: qemu-devel; +Cc: Nathan Froyd

On Fri, Oct 02, 2009 at 01:07:12PM -0700, Nathan Froyd wrote:
> On Wed, Sep 30, 2009 at 11:09:35PM +0200, Aurelien Jarno wrote:
> > Currently zero extensions ops are implemented by a and op with a
> > constant. This is then catched in some backend, and replaced by
> > a zero extension instruction. While this works well on RISC
> > machines, this adds a useless register move on non-RISC machines.
> > 
> > This patch adds ext{8,16,32}u_i{32,64} TCG ops that can be
> > implemented in the backends to avoid emitting useless register
> > moves.
> 
> I have to ask--does this make things go faster?
> 

It depends on the target, it needs to use zero extension (MIPS for
example almost only does sign extension). It gives a 1.5% gain on
my test with qemu-86_64.

It should also give a gain on 64 bit system targets running a 32 
bit OS (i386 on x86_64, ppc on ppc64), as they are doing a zero 
extension on a lot of instruction.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops
  2009-09-30 21:09 [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops Aurelien Jarno
                   ` (2 preceding siblings ...)
  2009-10-02 20:07 ` [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} " Nathan Froyd
@ 2009-11-10 14:51 ` Paul Brook
  2009-11-10 15:38   ` Aurelien Jarno
  3 siblings, 1 reply; 7+ messages in thread
From: Paul Brook @ 2009-11-10 14:51 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

On Wednesday 30 September 2009, Aurelien Jarno wrote:
> Currently zero extensions ops are implemented by a and op with a
> constant. This is then catched in some backend, and replaced by
> a zero extension instruction. While this works well on RISC
> machines, this adds a useless register move on non-RISC machines.
> 
> Example on x86:
>   ext16u_i32 r1, r2
> is translated into
>   mov    %eax,%ebx
>   movzwl %bx, %ebx
> while the optimized version should be:
>   movzwl %ax, %ebx

I don't like your solution. Having two operations that do the same thing is 
bad, especially when we have no way of converting one into the other, and no 
clear indication which is best.

I think we need to understand why does the original code introduces an extra 
copy. At first glance there's no good reason for it to be there.

Paul

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops
  2009-11-10 14:51 ` Paul Brook
@ 2009-11-10 15:38   ` Aurelien Jarno
  0 siblings, 0 replies; 7+ messages in thread
From: Aurelien Jarno @ 2009-11-10 15:38 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

On Tue, Nov 10, 2009 at 02:51:21PM +0000, Paul Brook wrote:
> On Wednesday 30 September 2009, Aurelien Jarno wrote:
> > Currently zero extensions ops are implemented by a and op with a
> > constant. This is then catched in some backend, and replaced by
> > a zero extension instruction. While this works well on RISC
> > machines, this adds a useless register move on non-RISC machines.
> > 
> > Example on x86:
> >   ext16u_i32 r1, r2
> > is translated into
> >   mov    %eax,%ebx
> >   movzwl %bx, %ebx
> > while the optimized version should be:
> >   movzwl %ax, %ebx
> 
> I don't like your solution. Having two operations that do the same thing is 
> bad, especially when we have no way of converting one into the other, and no 
> clear indication which is best.

We don't have to operations, we have a few optional operations (ext*u) 
that are implemented with andi on TCG targets that don't support it. This
is the same as ext*s which is not always implemented natively.

> I think we need to understand why does the original code introduces an extra 
> copy. At first glance there's no good reason for it to be there.

That's easy to understand. ext16u_i32 r1, r2 is translated into 
andi r1, r2, 0xffff, that is:

  movi_i32 tmp0, 0xffff
  and r1, r2, tmp0

On x86*, and is defined as:

  { INDEX_op_and_i32, { "r", "0", "ri" } },

That is the first the argument is an alias to the output. This is
therefore converted into (even if this is never represented this way
inside TCG):

  movi_i32 tmp0, 0xffff
  mov r1, r2
  and r1, r1, tmp0

The x86 TCG target has some special cases for the and implementation
and converts it to the movzwl instruction instead of and + immediate.

-- 
Aurelien Jarno	                        GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-11-10 15:39 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-30 21:09 [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} TCG ops Aurelien Jarno
2009-09-30 21:44 ` [Qemu-devel] [PATCH 2/3] tcg/x86_64: add support for ext{8, 16, 32}u_i{32, 64} " Aurelien Jarno
2009-10-02 18:55 ` [Qemu-devel] [PATCH 3/3] tcg/i386: " Aurelien Jarno
2009-10-02 20:07 ` [Qemu-devel] [PATCH 1/3] tcg: add ext{8,16,32}u_i{32,64} " Nathan Froyd
2009-10-02 20:54   ` Aurelien Jarno
2009-11-10 14:51 ` Paul Brook
2009-11-10 15:38   ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).