* [Qemu-devel] [PATCH 01/14] i386: use OT_* consistently
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 18:50 ` Blue Swirl
2012-10-09 18:58 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 02/14] i386: introduce gen_ext_tl Paolo Bonzini
` (12 subsequent siblings)
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 74 ++++++++++++++++++++++++-------------------------
1 file modificato, 37 inserzioni(+), 37 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 0a7e4e3..e2ef410 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -323,17 +323,17 @@ static inline void gen_op_mov_reg_T1(int ot, int reg)
static inline void gen_op_mov_reg_A0(int size, int reg)
{
switch(size) {
- case 0:
+ case OT_BYTE:
tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16);
break;
default: /* XXX this shouldn't be reached; abort? */
- case 1:
+ case OT_WORD:
/* For x86_64, this sets the higher half of register to zero.
For i386, this is equivalent to a mov. */
tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
break;
#ifdef TARGET_X86_64
- case 2:
+ case OT_LONG:
tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
break;
#endif
@@ -398,11 +398,11 @@ static inline void gen_op_jmp_T0(void)
static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
{
switch(size) {
- case 0:
+ case OT_BYTE:
tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
break;
- case 1:
+ case OT_WORD:
tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
/* For x86_64, this sets the higher half of register to zero.
For i386, this is equivalent to a nop. */
@@ -410,7 +410,7 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
break;
#ifdef TARGET_X86_64
- case 2:
+ case OT_LONG:
tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
break;
#endif
@@ -420,11 +420,11 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
static inline void gen_op_add_reg_T0(int size, int reg)
{
switch(size) {
- case 0:
+ case OT_BYTE:
tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
break;
- case 1:
+ case OT_WORD:
tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
/* For x86_64, this sets the higher half of register to zero.
For i386, this is equivalent to a nop. */
@@ -432,7 +432,7 @@ static inline void gen_op_add_reg_T0(int size, int reg)
tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
break;
#ifdef TARGET_X86_64
- case 2:
+ case OT_LONG:
tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
break;
#endif
@@ -506,14 +506,14 @@ static inline void gen_op_lds_T0_A0(int idx)
{
int mem_index = (idx >> 2) - 1;
switch(idx & 3) {
- case 0:
+ case OT_BYTE:
tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
break;
- case 1:
+ case OT_WORD:
tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
break;
default:
- case 2:
+ case OT_LONG:
tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
break;
}
@@ -523,17 +523,17 @@ static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
{
int mem_index = (idx >> 2) - 1;
switch(idx & 3) {
- case 0:
+ case OT_BYTE:
tcg_gen_qemu_ld8u(t0, a0, mem_index);
break;
- case 1:
+ case OT_WORD:
tcg_gen_qemu_ld16u(t0, a0, mem_index);
break;
- case 2:
+ case OT_LONG:
tcg_gen_qemu_ld32u(t0, a0, mem_index);
break;
default:
- case 3:
+ case OT_QUAD:
/* Should never happen on 32-bit targets. */
#ifdef TARGET_X86_64
tcg_gen_qemu_ld64(t0, a0, mem_index);
@@ -562,17 +562,17 @@ static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
{
int mem_index = (idx >> 2) - 1;
switch(idx & 3) {
- case 0:
+ case OT_BYTE:
tcg_gen_qemu_st8(t0, a0, mem_index);
break;
- case 1:
+ case OT_WORD:
tcg_gen_qemu_st16(t0, a0, mem_index);
break;
- case 2:
+ case OT_LONG:
tcg_gen_qemu_st32(t0, a0, mem_index);
break;
default:
- case 3:
+ case OT_QUAD:
/* Should never happen on 32-bit targets. */
#ifdef TARGET_X86_64
tcg_gen_qemu_st64(t0, a0, mem_index);
@@ -710,9 +710,9 @@ static inline void gen_op_jz_ecx(int size, int label1)
static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
{
switch (ot) {
- case 0: gen_helper_inb(v, n); break;
- case 1: gen_helper_inw(v, n); break;
- case 2: gen_helper_inl(v, n); break;
+ case OT_BYTE: gen_helper_inb(v, n); break;
+ case OT_WORD: gen_helper_inw(v, n); break;
+ case OT_LONG: gen_helper_inl(v, n); break;
}
}
@@ -720,9 +720,9 @@ static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
{
switch (ot) {
- case 0: gen_helper_outb(v, n); break;
- case 1: gen_helper_outw(v, n); break;
- case 2: gen_helper_outl(v, n); break;
+ case OT_BYTE: gen_helper_outb(v, n); break;
+ case OT_WORD: gen_helper_outw(v, n); break;
+ case OT_LONG: gen_helper_outl(v, n); break;
}
}
@@ -741,13 +741,13 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
state_saved = 1;
tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
switch (ot) {
- case 0:
+ case OT_BYTE:
gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
break;
- case 1:
+ case OT_WORD:
gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
break;
- case 2:
+ case OT_LONG:
gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
break;
}
@@ -1781,34 +1781,34 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
if (is_right) {
switch (ot) {
- case 0:
+ case OT_BYTE:
gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 1:
+ case OT_WORD:
gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 2:
+ case OT_LONG:
gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#ifdef TARGET_X86_64
- case 3:
+ case OT_QUAD:
gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#endif
}
} else {
switch (ot) {
- case 0:
+ case OT_BYTE:
gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 1:
+ case OT_WORD:
gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
- case 2:
+ case OT_LONG:
gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#ifdef TARGET_X86_64
- case 3:
+ case OT_QUAD:
gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
break;
#endif
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 01/14] i386: use OT_* consistently
2012-10-06 12:30 ` [Qemu-devel] [PATCH 01/14] i386: use OT_* consistently Paolo Bonzini
@ 2012-10-07 18:50 ` Blue Swirl
2012-10-09 18:58 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 18:50 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 74 ++++++++++++++++++++++++-------------------------
> 1 file modificato, 37 inserzioni(+), 37 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 0a7e4e3..e2ef410 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -323,17 +323,17 @@ static inline void gen_op_mov_reg_T1(int ot, int reg)
> static inline void gen_op_mov_reg_A0(int size, int reg)
> {
> switch(size) {
> - case 0:
> + case OT_BYTE:
> tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16);
> break;
> default: /* XXX this shouldn't be reached; abort? */
> - case 1:
> + case OT_WORD:
> /* For x86_64, this sets the higher half of register to zero.
> For i386, this is equivalent to a mov. */
> tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
> break;
> #ifdef TARGET_X86_64
> - case 2:
> + case OT_LONG:
> tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
> break;
> #endif
> @@ -398,11 +398,11 @@ static inline void gen_op_jmp_T0(void)
> static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> {
> switch(size) {
> - case 0:
> + case OT_BYTE:
> tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
> tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
> break;
> - case 1:
> + case OT_WORD:
> tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
> /* For x86_64, this sets the higher half of register to zero.
> For i386, this is equivalent to a nop. */
> @@ -410,7 +410,7 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> break;
> #ifdef TARGET_X86_64
> - case 2:
> + case OT_LONG:
> tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
> break;
> #endif
> @@ -420,11 +420,11 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> static inline void gen_op_add_reg_T0(int size, int reg)
> {
> switch(size) {
> - case 0:
> + case OT_BYTE:
> tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
> tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16);
> break;
> - case 1:
> + case OT_WORD:
> tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
> /* For x86_64, this sets the higher half of register to zero.
> For i386, this is equivalent to a nop. */
> @@ -432,7 +432,7 @@ static inline void gen_op_add_reg_T0(int size, int reg)
> tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> break;
> #ifdef TARGET_X86_64
> - case 2:
> + case OT_LONG:
> tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
> break;
> #endif
> @@ -506,14 +506,14 @@ static inline void gen_op_lds_T0_A0(int idx)
> {
> int mem_index = (idx >> 2) - 1;
> switch(idx & 3) {
> - case 0:
> + case OT_BYTE:
> tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index);
> break;
> - case 1:
> + case OT_WORD:
> tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index);
> break;
> default:
> - case 2:
> + case OT_LONG:
> tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index);
> break;
> }
> @@ -523,17 +523,17 @@ static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0)
> {
> int mem_index = (idx >> 2) - 1;
> switch(idx & 3) {
> - case 0:
> + case OT_BYTE:
> tcg_gen_qemu_ld8u(t0, a0, mem_index);
> break;
> - case 1:
> + case OT_WORD:
> tcg_gen_qemu_ld16u(t0, a0, mem_index);
> break;
> - case 2:
> + case OT_LONG:
> tcg_gen_qemu_ld32u(t0, a0, mem_index);
> break;
> default:
> - case 3:
> + case OT_QUAD:
> /* Should never happen on 32-bit targets. */
> #ifdef TARGET_X86_64
> tcg_gen_qemu_ld64(t0, a0, mem_index);
> @@ -562,17 +562,17 @@ static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0)
> {
> int mem_index = (idx >> 2) - 1;
> switch(idx & 3) {
> - case 0:
> + case OT_BYTE:
> tcg_gen_qemu_st8(t0, a0, mem_index);
> break;
> - case 1:
> + case OT_WORD:
> tcg_gen_qemu_st16(t0, a0, mem_index);
> break;
> - case 2:
> + case OT_LONG:
> tcg_gen_qemu_st32(t0, a0, mem_index);
> break;
> default:
> - case 3:
> + case OT_QUAD:
> /* Should never happen on 32-bit targets. */
> #ifdef TARGET_X86_64
> tcg_gen_qemu_st64(t0, a0, mem_index);
> @@ -710,9 +710,9 @@ static inline void gen_op_jz_ecx(int size, int label1)
> static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
> {
> switch (ot) {
> - case 0: gen_helper_inb(v, n); break;
> - case 1: gen_helper_inw(v, n); break;
> - case 2: gen_helper_inl(v, n); break;
> + case OT_BYTE: gen_helper_inb(v, n); break;
> + case OT_WORD: gen_helper_inw(v, n); break;
> + case OT_LONG: gen_helper_inl(v, n); break;
> }
>
> }
> @@ -720,9 +720,9 @@ static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n)
> static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n)
> {
> switch (ot) {
> - case 0: gen_helper_outb(v, n); break;
> - case 1: gen_helper_outw(v, n); break;
> - case 2: gen_helper_outl(v, n); break;
> + case OT_BYTE: gen_helper_outb(v, n); break;
> + case OT_WORD: gen_helper_outw(v, n); break;
> + case OT_LONG: gen_helper_outl(v, n); break;
> }
>
> }
> @@ -741,13 +741,13 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
> state_saved = 1;
> tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> switch (ot) {
> - case 0:
> + case OT_BYTE:
> gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
> break;
> - case 1:
> + case OT_WORD:
> gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
> break;
> - case 2:
> + case OT_LONG:
> gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
> break;
> }
> @@ -1781,34 +1781,34 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
>
> if (is_right) {
> switch (ot) {
> - case 0:
> + case OT_BYTE:
> gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> - case 1:
> + case OT_WORD:
> gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> - case 2:
> + case OT_LONG:
> gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> #ifdef TARGET_X86_64
> - case 3:
> + case OT_QUAD:
> gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> #endif
> }
> } else {
> switch (ot) {
> - case 0:
> + case OT_BYTE:
> gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> - case 1:
> + case OT_WORD:
> gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> - case 2:
> + case OT_LONG:
> gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> #ifdef TARGET_X86_64
> - case 3:
> + case OT_QUAD:
> gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> break;
> #endif
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 01/14] i386: use OT_* consistently
2012-10-06 12:30 ` [Qemu-devel] [PATCH 01/14] i386: use OT_* consistently Paolo Bonzini
2012-10-07 18:50 ` Blue Swirl
@ 2012-10-09 18:58 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 18:58 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target-i386/translate.c | 74 ++++++++++++++++++++++++-------------------------
> 1 file modificato, 37 inserzioni(+), 37 rimozioni(-)
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 02/14] i386: introduce gen_ext_tl
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
2012-10-06 12:30 ` [Qemu-devel] [PATCH 01/14] i386: use OT_* consistently Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 18:53 ` Blue Swirl
2012-10-09 18:58 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 03/14] i386: factor setting of s->cc_op handling for string functions Paolo Bonzini
` (11 subsequent siblings)
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Introduce a function that abstracts extracting an 8, 16, 32 or 64-bit value
with or without sign, generalizing gen_extu and gen_exts.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 146 ++++++++++++------------------------------------
1 file modificato, 37 inserzioni(+), 109 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index e2ef410..671303d 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -659,38 +659,45 @@ static inline void gen_op_movl_T0_Dshift(int ot)
tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
};
-static void gen_extu(int ot, TCGv reg)
+static inline TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
{
- switch(ot) {
+ switch(size) {
case OT_BYTE:
- tcg_gen_ext8u_tl(reg, reg);
- break;
+ if (sign) {
+ tcg_gen_ext8s_tl(dst, src);
+ } else {
+ tcg_gen_ext8u_tl(dst, src);
+ }
+ return dst;
case OT_WORD:
- tcg_gen_ext16u_tl(reg, reg);
- break;
+ if (sign) {
+ tcg_gen_ext16s_tl(dst, src);
+ } else {
+ tcg_gen_ext16u_tl(dst, src);
+ }
+ return dst;
+#ifdef TARGET_X86_64
case OT_LONG:
- tcg_gen_ext32u_tl(reg, reg);
- break;
+ if (sign) {
+ tcg_gen_ext32s_tl(dst, src);
+ } else {
+ tcg_gen_ext32u_tl(dst, src);
+ }
+ return dst;
+#endif
default:
- break;
+ return src;
}
}
+static void gen_extu(int ot, TCGv reg)
+{
+ gen_ext_tl(reg, reg, ot, false);
+}
+
static void gen_exts(int ot, TCGv reg)
{
- switch(ot) {
- case OT_BYTE:
- tcg_gen_ext8s_tl(reg, reg);
- break;
- case OT_WORD:
- tcg_gen_ext16s_tl(reg, reg);
- break;
- case OT_LONG:
- tcg_gen_ext32s_tl(reg, reg);
- break;
- default:
- break;
- }
+ gen_ext_tl(reg, reg, ot, true);
}
static inline void gen_op_jnz_ecx(int size, int label1)
@@ -956,54 +963,15 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
switch(jcc_op) {
case JCC_Z:
fast_jcc_z:
- switch(size) {
- case 0:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
- t0 = cpu_tmp0;
- break;
- case 1:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
- t0 = cpu_tmp0;
- break;
-#ifdef TARGET_X86_64
- case 2:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
- t0 = cpu_tmp0;
- break;
-#endif
- default:
- t0 = cpu_cc_dst;
- break;
- }
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, false);
tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
break;
case JCC_S:
fast_jcc_s:
- switch(size) {
- case 0:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
- 0, l1);
- break;
- case 1:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
- 0, l1);
- break;
-#ifdef TARGET_X86_64
- case 2:
- tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
- tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
- 0, l1);
- break;
-#endif
- default:
- tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
- 0, l1);
- break;
- }
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, true);
+ tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, t0, 0, l1);
break;
-
+
case JCC_B:
cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
goto fast_jcc_b;
@@ -1011,28 +979,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
fast_jcc_b:
tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
- switch(size) {
- case 0:
- t0 = cpu_tmp0;
- tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
- tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
- break;
- case 1:
- t0 = cpu_tmp0;
- tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
- tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
- break;
-#ifdef TARGET_X86_64
- case 2:
- t0 = cpu_tmp0;
- tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
- tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
- break;
-#endif
- default:
- t0 = cpu_cc_src;
- break;
- }
+ gen_extu(size, cpu_tmp4);
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
break;
@@ -1043,28 +991,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
cond = inv ? TCG_COND_GT : TCG_COND_LE;
fast_jcc_l:
tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
- switch(size) {
- case 0:
- t0 = cpu_tmp0;
- tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
- tcg_gen_ext8s_tl(t0, cpu_cc_src);
- break;
- case 1:
- t0 = cpu_tmp0;
- tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
- tcg_gen_ext16s_tl(t0, cpu_cc_src);
- break;
-#ifdef TARGET_X86_64
- case 2:
- t0 = cpu_tmp0;
- tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
- tcg_gen_ext32s_tl(t0, cpu_cc_src);
- break;
-#endif
- default:
- t0 = cpu_cc_src;
- break;
- }
+ gen_exts(size, cpu_tmp4);
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
break;
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 02/14] i386: introduce gen_ext_tl
2012-10-06 12:30 ` [Qemu-devel] [PATCH 02/14] i386: introduce gen_ext_tl Paolo Bonzini
@ 2012-10-07 18:53 ` Blue Swirl
2012-10-09 18:58 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 18:53 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Introduce a function that abstracts extracting an 8, 16, 32 or 64-bit value
> with or without sign, generalizing gen_extu and gen_exts.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 146 ++++++++++++------------------------------------
> 1 file modificato, 37 inserzioni(+), 109 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index e2ef410..671303d 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -659,38 +659,45 @@ static inline void gen_op_movl_T0_Dshift(int ot)
> tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
> };
>
> -static void gen_extu(int ot, TCGv reg)
> +static inline TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign)
> {
> - switch(ot) {
> + switch(size) {
> case OT_BYTE:
> - tcg_gen_ext8u_tl(reg, reg);
> - break;
> + if (sign) {
> + tcg_gen_ext8s_tl(dst, src);
> + } else {
> + tcg_gen_ext8u_tl(dst, src);
> + }
> + return dst;
> case OT_WORD:
> - tcg_gen_ext16u_tl(reg, reg);
> - break;
> + if (sign) {
> + tcg_gen_ext16s_tl(dst, src);
> + } else {
> + tcg_gen_ext16u_tl(dst, src);
> + }
> + return dst;
> +#ifdef TARGET_X86_64
> case OT_LONG:
> - tcg_gen_ext32u_tl(reg, reg);
> - break;
> + if (sign) {
> + tcg_gen_ext32s_tl(dst, src);
> + } else {
> + tcg_gen_ext32u_tl(dst, src);
> + }
> + return dst;
> +#endif
> default:
> - break;
> + return src;
> }
> }
>
> +static void gen_extu(int ot, TCGv reg)
> +{
> + gen_ext_tl(reg, reg, ot, false);
> +}
> +
> static void gen_exts(int ot, TCGv reg)
> {
> - switch(ot) {
> - case OT_BYTE:
> - tcg_gen_ext8s_tl(reg, reg);
> - break;
> - case OT_WORD:
> - tcg_gen_ext16s_tl(reg, reg);
> - break;
> - case OT_LONG:
> - tcg_gen_ext32s_tl(reg, reg);
> - break;
> - default:
> - break;
> - }
> + gen_ext_tl(reg, reg, ot, true);
> }
>
> static inline void gen_op_jnz_ecx(int size, int label1)
> @@ -956,54 +963,15 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
> switch(jcc_op) {
> case JCC_Z:
> fast_jcc_z:
> - switch(size) {
> - case 0:
> - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff);
> - t0 = cpu_tmp0;
> - break;
> - case 1:
> - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff);
> - t0 = cpu_tmp0;
> - break;
> -#ifdef TARGET_X86_64
> - case 2:
> - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff);
> - t0 = cpu_tmp0;
> - break;
> -#endif
> - default:
> - t0 = cpu_cc_dst;
> - break;
> - }
> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, false);
> tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1);
> break;
> case JCC_S:
> fast_jcc_s:
> - switch(size) {
> - case 0:
> - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80);
> - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
> - 0, l1);
> - break;
> - case 1:
> - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000);
> - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
> - 0, l1);
> - break;
> -#ifdef TARGET_X86_64
> - case 2:
> - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000);
> - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0,
> - 0, l1);
> - break;
> -#endif
> - default:
> - tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst,
> - 0, l1);
> - break;
> - }
> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, true);
> + tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, t0, 0, l1);
> break;
> -
> +
> case JCC_B:
> cond = inv ? TCG_COND_GEU : TCG_COND_LTU;
> goto fast_jcc_b;
> @@ -1011,28 +979,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
> cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
> fast_jcc_b:
> tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
> - switch(size) {
> - case 0:
> - t0 = cpu_tmp0;
> - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff);
> - tcg_gen_andi_tl(t0, cpu_cc_src, 0xff);
> - break;
> - case 1:
> - t0 = cpu_tmp0;
> - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff);
> - tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff);
> - break;
> -#ifdef TARGET_X86_64
> - case 2:
> - t0 = cpu_tmp0;
> - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff);
> - tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff);
> - break;
> -#endif
> - default:
> - t0 = cpu_cc_src;
> - break;
> - }
> + gen_extu(size, cpu_tmp4);
> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
> break;
>
> @@ -1043,28 +991,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
> cond = inv ? TCG_COND_GT : TCG_COND_LE;
> fast_jcc_l:
> tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
> - switch(size) {
> - case 0:
> - t0 = cpu_tmp0;
> - tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4);
> - tcg_gen_ext8s_tl(t0, cpu_cc_src);
> - break;
> - case 1:
> - t0 = cpu_tmp0;
> - tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4);
> - tcg_gen_ext16s_tl(t0, cpu_cc_src);
> - break;
> -#ifdef TARGET_X86_64
> - case 2:
> - t0 = cpu_tmp0;
> - tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4);
> - tcg_gen_ext32s_tl(t0, cpu_cc_src);
> - break;
> -#endif
> - default:
> - t0 = cpu_cc_src;
> - break;
> - }
> + gen_exts(size, cpu_tmp4);
> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
> tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1);
> break;
>
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 02/14] i386: introduce gen_ext_tl
2012-10-06 12:30 ` [Qemu-devel] [PATCH 02/14] i386: introduce gen_ext_tl Paolo Bonzini
2012-10-07 18:53 ` Blue Swirl
@ 2012-10-09 18:58 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 18:58 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> Introduce a function that abstracts extracting an 8, 16, 32 or 64-bit value
> with or without sign, generalizing gen_extu and gen_exts.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 03/14] i386: factor setting of s->cc_op handling for string functions
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
2012-10-06 12:30 ` [Qemu-devel] [PATCH 01/14] i386: use OT_* consistently Paolo Bonzini
2012-10-06 12:30 ` [Qemu-devel] [PATCH 02/14] i386: introduce gen_ext_tl Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-09 18:59 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 04/14] i386: drop cc_op argument of gen_jcc1 Paolo Bonzini
` (10 subsequent siblings)
13 siblings, 1 reply; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Set it to the appropriate CC_OP_SUBx constant in gen_scas/gen_cmps.
In the repz case it can be overridden to CC_OP_DYNAMIC after generating
the code.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 9 +++++----
1 file modificato, 5 inserzioni(+), 4 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 671303d..0297b9a 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1102,6 +1102,7 @@ static inline void gen_scas(DisasContext *s, int ot)
gen_op_cmpl_T0_T1_cc();
gen_op_movl_T0_Dshift(ot);
gen_op_add_reg_T0(s->aflag, R_EDI);
+ s->cc_op = CC_OP_SUBB + ot;
}
static inline void gen_cmps(DisasContext *s, int ot)
@@ -1114,6 +1115,7 @@ static inline void gen_cmps(DisasContext *s, int ot)
gen_op_movl_T0_Dshift(ot);
gen_op_add_reg_T0(s->aflag, R_ESI);
gen_op_add_reg_T0(s->aflag, R_EDI);
+ s->cc_op = CC_OP_SUBB + ot;
}
static inline void gen_ins(DisasContext *s, int ot)
@@ -1184,11 +1186,12 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \
l2 = gen_jz_ecx_string(s, next_eip); \
gen_ ## op(s, ot); \
gen_op_add_reg_im(s->aflag, R_ECX, -1); \
- gen_op_set_cc_op(CC_OP_SUBB + ot); \
- gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \
+ gen_op_set_cc_op(s->cc_op); \
+ gen_jcc1(s, s->cc_op, (JCC_Z << 1) | (nz ^ 1), l2); \
if (!s->jmp_opt) \
gen_op_jz_ecx(s->aflag, l2); \
gen_jmp(s, cur_eip); \
+ s->cc_op = CC_OP_DYNAMIC; \
}
GEN_REPZ(movs)
@@ -6074,7 +6077,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
} else {
gen_scas(s, ot);
- s->cc_op = CC_OP_SUBB + ot;
}
break;
@@ -6090,7 +6092,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
} else {
gen_cmps(s, ot);
- s->cc_op = CC_OP_SUBB + ot;
}
break;
case 0x6c: /* insS */
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 04/14] i386: drop cc_op argument of gen_jcc1
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (2 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 03/14] i386: factor setting of s->cc_op handling for string functions Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-09 18:59 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 05/14] i386: move eflags computation closer to gen_op_set_cc_op Paolo Bonzini
` (9 subsequent siblings)
13 siblings, 1 reply; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
As in the gen_repz_scas/gen_repz_cmps case, delay setting
CC_OP_DYNAMIC in gen_jcc until after code generation. All of
gen_jcc1/is_fast_jcc/gen_setcc_slow_T0 now work on s->cc_op, which makes
things a bit easier to follow and to patch.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 33 ++++++++++++++++++---------------
1 file modificato, 18 inserzioni(+), 15 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 0297b9a..38f62eb 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -944,7 +944,7 @@ static int is_fast_jcc_case(DisasContext *s, int b)
/* generate a conditional jump to label 'l1' according to jump opcode
value 'b'. In the fast case, T0 is guaranted not to be used. */
-static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
+static inline void gen_jcc1(DisasContext *s, int b, int l1)
{
int inv, jcc_op, size, cond;
TCGv t0;
@@ -952,14 +952,14 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
inv = b & 1;
jcc_op = (b >> 1) & 7;
- switch(cc_op) {
+ switch(s->cc_op) {
/* we optimize the cmp/jcc case */
case CC_OP_SUBB:
case CC_OP_SUBW:
case CC_OP_SUBL:
case CC_OP_SUBQ:
- size = cc_op - CC_OP_SUBB;
+ size = s->cc_op - CC_OP_SUBB;
switch(jcc_op) {
case JCC_Z:
fast_jcc_z:
@@ -1043,10 +1043,10 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1)
case CC_OP_SARQ:
switch(jcc_op) {
case JCC_Z:
- size = (cc_op - CC_OP_ADDB) & 3;
+ size = (s->cc_op - CC_OP_ADDB) & 3;
goto fast_jcc_z;
case JCC_S:
- size = (cc_op - CC_OP_ADDB) & 3;
+ size = (s->cc_op - CC_OP_ADDB) & 3;
goto fast_jcc_s;
default:
goto slow_jcc;
@@ -1187,7 +1187,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \
gen_ ## op(s, ot); \
gen_op_add_reg_im(s->aflag, R_ECX, -1); \
gen_op_set_cc_op(s->cc_op); \
- gen_jcc1(s, s->cc_op, (JCC_Z << 1) | (nz ^ 1), l2); \
+ gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \
if (!s->jmp_opt) \
gen_op_jz_ecx(s->aflag, l2); \
gen_jmp(s, cur_eip); \
@@ -2291,13 +2291,15 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
static inline void gen_jcc(DisasContext *s, int b,
target_ulong val, target_ulong next_eip)
{
- int l1, l2, cc_op;
+ int l1, l2;
- cc_op = s->cc_op;
- gen_update_cc_op(s);
+ if (s->cc_op != CC_OP_DYNAMIC) {
+ gen_op_set_cc_op(s->cc_op);
+ }
if (s->jmp_opt) {
l1 = gen_new_label();
- gen_jcc1(s, cc_op, b, l1);
+ gen_jcc1(s, b, l1);
+ s->cc_op = CC_OP_DYNAMIC;
gen_goto_tb(s, 0, next_eip);
@@ -2308,7 +2310,8 @@ static inline void gen_jcc(DisasContext *s, int b,
l1 = gen_new_label();
l2 = gen_new_label();
- gen_jcc1(s, cc_op, b, l1);
+ gen_jcc1(s, b, l1);
+ s->cc_op = CC_OP_DYNAMIC;
gen_jmp_im(next_eip);
tcg_gen_br(l2);
@@ -2331,7 +2334,7 @@ static void gen_setcc(DisasContext *s, int b)
t0 = tcg_temp_local_new();
tcg_gen_movi_tl(t0, 0);
l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ gen_jcc1(s, b ^ 1, l1);
tcg_gen_movi_tl(t0, 1);
gen_set_label(l1);
tcg_gen_mov_tl(cpu_T[0], t0);
@@ -6013,7 +6016,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
};
op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, op1, l1);
+ gen_jcc1(s, op1, l1);
gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
gen_set_label(l1);
}
@@ -6404,7 +6407,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
if (ot == OT_LONG) {
/* XXX: specific Intel behaviour ? */
l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ gen_jcc1(s, b ^ 1, l1);
tcg_gen_mov_tl(cpu_regs[reg], t0);
gen_set_label(l1);
tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
@@ -6412,7 +6415,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
#endif
{
l1 = gen_new_label();
- gen_jcc1(s, s->cc_op, b ^ 1, l1);
+ gen_jcc1(s, b ^ 1, l1);
gen_op_mov_reg_v(ot, reg, t0);
gen_set_label(l1);
}
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 05/14] i386: move eflags computation closer to gen_op_set_cc_op
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (3 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 04/14] i386: drop cc_op argument of gen_jcc1 Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-09 19:02 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 06/14] i386: factor gen_op_set_cc_op/tcg_gen_discard_tl around computing flags Paolo Bonzini
` (8 subsequent siblings)
13 siblings, 1 reply; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
In some cases this is just simple code movement, ensuring the invariant
that cpu_cc_op matches s->cc_op when calling the helpers. The next patches
need this because gen_compute_eflags and gen_compute_eflags_c will take
care of setting cpu_cc_op.
Also, for shifts, always compute EFLAGS first since it is needed whenever
the shift is non-zero, i.e. most of the time. This makes it possible
to remove some writes of CC_OP_EFLAGS to cpu_cc_op and more importantly
removes cases where s->cc_op becomes CC_OP_DYNAMIC. These are slow and
we want to avoid them: CC_OP_EFLAGS is quite efficient once we paid the
initial cost of computing the flags.
Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op
and discarding cpu_cc_dst.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 32 +++++++++++++++-----------------
1 file modificato, 15 inserzioni(+), 17 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 38f62eb..0821468 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1363,6 +1363,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
gen_op_ld_T0_A0(ot + s1->mem_index);
if (s1->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s1->cc_op);
+ gen_compute_eflags_c(cpu_cc_src);
if (c > 0) {
tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
s1->cc_op = CC_OP_INCB + ot;
@@ -1374,7 +1375,6 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
gen_op_mov_reg_T0(ot, d);
else
gen_op_st_T0_A0(ot + s1->mem_index);
- gen_compute_eflags_c(cpu_cc_src);
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
@@ -1587,14 +1587,16 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
gen_op_mov_reg_v(ot, op1, t0);
}
- /* update eflags */
+ /* update eflags. It is needed anyway most of the time, do it always. */
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_discard_tl(cpu_cc_dst);
+ s->cc_op = CC_OP_EFLAGS;
label2 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
- gen_compute_eflags(cpu_cc_src);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
tcg_gen_xor_tl(cpu_tmp0, t2, t0);
tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1605,12 +1607,8 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
}
tcg_gen_andi_tl(t0, t0, CC_C);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
- tcg_gen_discard_tl(cpu_cc_dst);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
+
gen_set_label(label2);
- s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
tcg_temp_free(t0);
tcg_temp_free(t1);
@@ -1674,6 +1672,9 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
gen_op_set_cc_op(s->cc_op);
gen_compute_eflags(cpu_cc_src);
+ tcg_gen_discard_tl(cpu_cc_dst);
+ s->cc_op = CC_OP_EFLAGS;
+
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
tcg_gen_xor_tl(cpu_tmp0, t1, t0);
tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1684,10 +1685,6 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
}
tcg_gen_andi_tl(t0, t0, CC_C);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
- tcg_gen_discard_tl(cpu_cc_dst);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
- s->cc_op = CC_OP_EFLAGS;
}
tcg_temp_free(t0);
@@ -1703,6 +1700,9 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
+ gen_compute_eflags(cpu_cc_src);
+ tcg_gen_discard_tl(cpu_cc_dst);
+ s->cc_op = CC_OP_EFLAGS;
/* load */
if (op1 == OR_TMP0)
@@ -1756,11 +1756,7 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
- tcg_gen_discard_tl(cpu_cc_dst);
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-
gen_set_label(label1);
- s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
}
/* XXX: add faster immediate case */
@@ -6501,10 +6497,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
gen_compute_eflags(cpu_cc_src);
+ tcg_gen_discard_tl(cpu_cc_dst);
+ s->cc_op = CC_OP_EFLAGS;
+
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0x9f: /* lahf */
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 05/14] i386: move eflags computation closer to gen_op_set_cc_op
2012-10-06 12:30 ` [Qemu-devel] [PATCH 05/14] i386: move eflags computation closer to gen_op_set_cc_op Paolo Bonzini
@ 2012-10-09 19:02 ` Richard Henderson
0 siblings, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 19:02 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> In some cases this is just simple code movement, ensuring the invariant
> that cpu_cc_op matches s->cc_op when calling the helpers. The next patches
> need this because gen_compute_eflags and gen_compute_eflags_c will take
> care of setting cpu_cc_op.
>
> Also, for shifts, always compute EFLAGS first since it is needed whenever
> the shift is non-zero, i.e. most of the time. This makes it possible
> to remove some writes of CC_OP_EFLAGS to cpu_cc_op and more importantly
> removes cases where s->cc_op becomes CC_OP_DYNAMIC. These are slow and
> we want to avoid them: CC_OP_EFLAGS is quite efficient once we paid the
> initial cost of computing the flags.
>
> Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op
> and discarding cpu_cc_dst.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
I was about to quibble with some of this, but I see you've
cleaned up all my quibbles with subsequent patches.
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 06/14] i386: factor gen_op_set_cc_op/tcg_gen_discard_tl around computing flags
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (4 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 05/14] i386: move eflags computation closer to gen_op_set_cc_op Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-09 19:03 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 07/14] i386: add helper functions to get other flags Paolo Bonzini
` (7 subsequent siblings)
13 siblings, 1 reply; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Before computing flags we need to store the cc_op to memory. Move this
to gen_compute_eflags_c and gen_compute_eflags rather than doing it all
over the place.
Alo, after computing the flags in cpu_cc_src we are in EFLAGS mode.
Set s->cc_op and discard cpu_cc_dst in gen_compute_eflags, rather than
doing it all over the place.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 103 +++++++++++++++++-------------------------------
1 file modificato, 37 inserzioni(+), 66 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 0821468..df81b78 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -824,55 +824,63 @@ static void gen_op_update_neg_cc(void)
}
/* compute eflags.C to reg */
-static void gen_compute_eflags_c(TCGv reg)
+static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
{
+ if (s->cc_op != CC_OP_DYNAMIC) {
+ gen_op_set_cc_op(s->cc_op);
+ }
gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
}
/* compute all eflags to cc_src */
-static void gen_compute_eflags(TCGv reg)
+static void gen_compute_eflags(DisasContext *s, TCGv reg)
{
+ if (s->cc_op != CC_OP_DYNAMIC) {
+ gen_op_set_cc_op(s->cc_op);
+ }
gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+ if (reg == cpu_cc_src) {
+ tcg_gen_discard_tl(cpu_cc_dst);
+ s->cc_op = CC_OP_EFLAGS;
+ }
tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
}
static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
{
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
switch(jcc_op) {
case JCC_O:
- gen_compute_eflags(cpu_T[0]);
+ gen_compute_eflags(s, cpu_T[0]);
tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_B:
- gen_compute_eflags_c(cpu_T[0]);
+ gen_compute_eflags_c(s, cpu_T[0]);
break;
case JCC_Z:
- gen_compute_eflags(cpu_T[0]);
+ gen_compute_eflags(s, cpu_T[0]);
tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_BE:
- gen_compute_eflags(cpu_tmp0);
+ gen_compute_eflags(s, cpu_tmp0);
tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_S:
- gen_compute_eflags(cpu_T[0]);
+ gen_compute_eflags(s, cpu_T[0]);
tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_P:
- gen_compute_eflags(cpu_T[0]);
+ gen_compute_eflags(s, cpu_T[0]);
tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_L:
- gen_compute_eflags(cpu_tmp0);
+ gen_compute_eflags(s, cpu_tmp0);
tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
@@ -880,7 +888,7 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
break;
default:
case JCC_LE:
- gen_compute_eflags(cpu_tmp0);
+ gen_compute_eflags(s, cpu_tmp0);
tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
@@ -1268,9 +1276,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
}
switch(op) {
case OP_ADCL:
- if (s1->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s1->cc_op);
- gen_compute_eflags_c(cpu_tmp4);
+ gen_compute_eflags_c(s1, cpu_tmp4);
tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
if (d != OR_TMP0)
@@ -1285,9 +1291,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
s1->cc_op = CC_OP_DYNAMIC;
break;
case OP_SBBL:
- if (s1->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s1->cc_op);
- gen_compute_eflags_c(cpu_tmp4);
+ gen_compute_eflags_c(s1, cpu_tmp4);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
if (d != OR_TMP0)
@@ -1361,9 +1365,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
gen_op_mov_TN_reg(ot, 0, d);
else
gen_op_ld_T0_A0(ot + s1->mem_index);
- if (s1->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s1->cc_op);
- gen_compute_eflags_c(cpu_cc_src);
+ gen_compute_eflags_c(s1, cpu_cc_src);
if (c > 0) {
tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
s1->cc_op = CC_OP_INCB + ot;
@@ -1588,11 +1590,8 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
}
/* update eflags. It is needed anyway most of the time, do it always. */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
- tcg_gen_discard_tl(cpu_cc_dst);
- s->cc_op = CC_OP_EFLAGS;
+ gen_compute_eflags(s, cpu_cc_src);
+ assert(s->cc_op == CC_OP_EFLAGS);
label2 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
@@ -1668,12 +1667,8 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
if (op2 != 0) {
/* update eflags */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
-
- gen_compute_eflags(cpu_cc_src);
- tcg_gen_discard_tl(cpu_cc_dst);
- s->cc_op = CC_OP_EFLAGS;
+ gen_compute_eflags(s, cpu_cc_src);
+ assert(s->cc_op == CC_OP_EFLAGS);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
tcg_gen_xor_tl(cpu_tmp0, t1, t0);
@@ -1700,9 +1695,7 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
if (s->cc_op != CC_OP_DYNAMIC)
gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
- tcg_gen_discard_tl(cpu_cc_dst);
- s->cc_op = CC_OP_EFLAGS;
+ gen_compute_eflags(s, cpu_cc_src);
/* load */
if (op1 == OR_TMP0)
@@ -1752,6 +1745,7 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
gen_op_mov_reg_T0(ot, op1);
/* update eflags */
+ assert(s->cc_op == CC_OP_EFLAGS);
label1 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
@@ -6494,12 +6488,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
goto illegal_op;
gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
- tcg_gen_discard_tl(cpu_cc_dst);
- s->cc_op = CC_OP_EFLAGS;
-
+ gen_compute_eflags(s, cpu_cc_src);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
@@ -6507,33 +6496,22 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
case 0x9f: /* lahf */
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_T[0]);
+ gen_compute_eflags(s, cpu_T[0]);
/* Note: gen_compute_eflags() only gives the condition codes */
tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
gen_op_mov_reg_T0(OT_BYTE, R_AH);
break;
case 0xf5: /* cmc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s, cpu_cc_src);
tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0xf8: /* clc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s, cpu_cc_src);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0xf9: /* stc */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s, cpu_cc_src);
tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
- s->cc_op = CC_OP_EFLAGS;
break;
case 0xfc: /* cld */
tcg_gen_movi_i32(cpu_tmp2_i32, 1);
@@ -6861,9 +6839,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
case 0xd6: /* salc */
if (CODE64(s))
goto illegal_op;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags_c(cpu_T[0]);
+ gen_compute_eflags_c(s, cpu_T[0]);
tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
gen_op_mov_reg_T0(OT_BYTE, R_EAX);
break;
@@ -6887,11 +6863,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
switch(b) {
case 0: /* loopnz */
case 1: /* loopz */
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
gen_op_add_reg_im(s->aflag, R_ECX, -1);
gen_op_jz_ecx(s->aflag, l3);
- gen_compute_eflags(cpu_tmp0);
+ gen_compute_eflags(s, cpu_tmp0);
tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
if (b == 0) {
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
@@ -7433,12 +7407,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
} else {
gen_op_mov_reg_v(ot, rm, t0);
}
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(cpu_cc_src);
+ gen_compute_eflags(s, cpu_cc_src);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
- s->cc_op = CC_OP_EFLAGS;
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 07/14] i386: add helper functions to get other flags
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (5 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 06/14] i386: factor gen_op_set_cc_op/tcg_gen_discard_tl around computing flags Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:04 ` Blue Swirl
2012-10-09 19:04 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 08/14] i386: do not compute eflags multiple times consecutively Paolo Bonzini
` (6 subsequent siblings)
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Introduce new functions to extract PF, SF, OF, ZF in addition to CF.
These provide single entry points for optimizing accesses to a single
flag.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 48 ++++++++++++++++++++++++++++++++++++------------
1 file modificato, 36 inserzioni(+), 12 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index df81b78..8f22119 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -847,21 +847,49 @@ static void gen_compute_eflags(DisasContext *s, TCGv reg)
tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
}
+/* compute eflags.P to reg */
+static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
+{
+ gen_compute_eflags(s, reg);
+ tcg_gen_shri_tl(reg, reg, 2);
+ tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.S to reg */
+static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
+{
+ gen_compute_eflags(s, reg);
+ tcg_gen_shri_tl(reg, reg, 7);
+ tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.O to reg */
+static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
+{
+ gen_compute_eflags(s, reg);
+ tcg_gen_shri_tl(reg, reg, 11);
+ tcg_gen_andi_tl(reg, reg, 1);
+}
+
+/* compute eflags.Z to reg */
+static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
+{
+ gen_compute_eflags(s, reg);
+ tcg_gen_shri_tl(reg, reg, 6);
+ tcg_gen_andi_tl(reg, reg, 1);
+}
+
static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
{
switch(jcc_op) {
case JCC_O:
- gen_compute_eflags(s, cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ gen_compute_eflags_o(s, cpu_T[0]);
break;
case JCC_B:
gen_compute_eflags_c(s, cpu_T[0]);
break;
case JCC_Z:
- gen_compute_eflags(s, cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ gen_compute_eflags_z(s, cpu_T[0]);
break;
case JCC_BE:
gen_compute_eflags(s, cpu_tmp0);
@@ -870,14 +898,10 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_S:
- gen_compute_eflags(s, cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ gen_compute_eflags_s(s, cpu_T[0]);
break;
case JCC_P:
- gen_compute_eflags(s, cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ gen_compute_eflags_p(s, cpu_T[0]);
break;
case JCC_L:
gen_compute_eflags(s, cpu_tmp0);
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 07/14] i386: add helper functions to get other flags
2012-10-06 12:30 ` [Qemu-devel] [PATCH 07/14] i386: add helper functions to get other flags Paolo Bonzini
@ 2012-10-07 19:04 ` Blue Swirl
2012-10-09 19:04 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:04 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Introduce new functions to extract PF, SF, OF, ZF in addition to CF.
> These provide single entry points for optimizing accesses to a single
> flag.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 48 ++++++++++++++++++++++++++++++++++++------------
> 1 file modificato, 36 inserzioni(+), 12 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index df81b78..8f22119 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -847,21 +847,49 @@ static void gen_compute_eflags(DisasContext *s, TCGv reg)
> tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
> }
>
> +/* compute eflags.P to reg */
> +static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
> +{
> + gen_compute_eflags(s, reg);
> + tcg_gen_shri_tl(reg, reg, 2);
> + tcg_gen_andi_tl(reg, reg, 1);
> +}
> +
> +/* compute eflags.S to reg */
> +static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
> +{
> + gen_compute_eflags(s, reg);
> + tcg_gen_shri_tl(reg, reg, 7);
> + tcg_gen_andi_tl(reg, reg, 1);
> +}
> +
> +/* compute eflags.O to reg */
> +static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
> +{
> + gen_compute_eflags(s, reg);
> + tcg_gen_shri_tl(reg, reg, 11);
> + tcg_gen_andi_tl(reg, reg, 1);
> +}
> +
> +/* compute eflags.Z to reg */
> +static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
> +{
> + gen_compute_eflags(s, reg);
> + tcg_gen_shri_tl(reg, reg, 6);
> + tcg_gen_andi_tl(reg, reg, 1);
> +}
> +
> static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> {
> switch(jcc_op) {
> case JCC_O:
> - gen_compute_eflags(s, cpu_T[0]);
> - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11);
> - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> + gen_compute_eflags_o(s, cpu_T[0]);
> break;
> case JCC_B:
> gen_compute_eflags_c(s, cpu_T[0]);
> break;
> case JCC_Z:
> - gen_compute_eflags(s, cpu_T[0]);
> - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6);
> - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> + gen_compute_eflags_z(s, cpu_T[0]);
> break;
> case JCC_BE:
> gen_compute_eflags(s, cpu_tmp0);
> @@ -870,14 +898,10 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> break;
> case JCC_S:
> - gen_compute_eflags(s, cpu_T[0]);
> - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7);
> - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> + gen_compute_eflags_s(s, cpu_T[0]);
> break;
> case JCC_P:
> - gen_compute_eflags(s, cpu_T[0]);
> - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2);
> - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> + gen_compute_eflags_p(s, cpu_T[0]);
> break;
> case JCC_L:
> gen_compute_eflags(s, cpu_tmp0);
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 07/14] i386: add helper functions to get other flags
2012-10-06 12:30 ` [Qemu-devel] [PATCH 07/14] i386: add helper functions to get other flags Paolo Bonzini
2012-10-07 19:04 ` Blue Swirl
@ 2012-10-09 19:04 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 19:04 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> Introduce new functions to extract PF, SF, OF, ZF in addition to CF.
> These provide single entry points for optimizing accesses to a single
> flag.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 08/14] i386: do not compute eflags multiple times consecutively
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (6 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 07/14] i386: add helper functions to get other flags Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:09 ` Blue Swirl
2012-10-09 19:14 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF Paolo Bonzini
` (5 subsequent siblings)
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
After calling gen_compute_eflags, leave the computed value in cc_reg_src
and set cc_op to CC_OP_EFLAGS. The next few patches will remove anyway
most calls to gen_compute_eflags.
As a result of this change it is more natural to remove the register
argument from gen_compute_eflags and change all the callers.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 73 ++++++++++++++++++++++++-------------------------
1 file modificato, 36 inserzioni(+), 37 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 8f22119..09512c3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -834,48 +834,49 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
}
/* compute all eflags to cc_src */
-static void gen_compute_eflags(DisasContext *s, TCGv reg)
+static void gen_compute_eflags(DisasContext *s)
{
if (s->cc_op != CC_OP_DYNAMIC) {
gen_op_set_cc_op(s->cc_op);
}
- gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
- if (reg == cpu_cc_src) {
- tcg_gen_discard_tl(cpu_cc_dst);
- s->cc_op = CC_OP_EFLAGS;
+ if (s->cc_op == CC_OP_EFLAGS) {
+ return;
}
- tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+ gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+ tcg_gen_discard_tl(cpu_cc_dst);
+ s->cc_op = CC_OP_EFLAGS;
+ tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
}
/* compute eflags.P to reg */
static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
{
- gen_compute_eflags(s, reg);
- tcg_gen_shri_tl(reg, reg, 2);
+ gen_compute_eflags(s);
+ tcg_gen_shri_tl(reg, cpu_cc_src, 2);
tcg_gen_andi_tl(reg, reg, 1);
}
/* compute eflags.S to reg */
static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
{
- gen_compute_eflags(s, reg);
- tcg_gen_shri_tl(reg, reg, 7);
+ gen_compute_eflags(s);
+ tcg_gen_shri_tl(reg, cpu_cc_src, 7);
tcg_gen_andi_tl(reg, reg, 1);
}
/* compute eflags.O to reg */
static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
{
- gen_compute_eflags(s, reg);
- tcg_gen_shri_tl(reg, reg, 11);
+ gen_compute_eflags(s);
+ tcg_gen_shri_tl(reg, cpu_cc_src, 11);
tcg_gen_andi_tl(reg, reg, 1);
}
/* compute eflags.Z to reg */
static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
{
- gen_compute_eflags(s, reg);
- tcg_gen_shri_tl(reg, reg, 6);
+ gen_compute_eflags(s);
+ tcg_gen_shri_tl(reg, cpu_cc_src, 6);
tcg_gen_andi_tl(reg, reg, 1);
}
@@ -892,9 +893,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
gen_compute_eflags_z(s, cpu_T[0]);
break;
case JCC_BE:
- gen_compute_eflags(s, cpu_tmp0);
- tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
- tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ gen_compute_eflags(s);
+ tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_S:
@@ -904,18 +905,18 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
gen_compute_eflags_p(s, cpu_T[0]);
break;
case JCC_L:
- gen_compute_eflags(s, cpu_tmp0);
- tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
- tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
+ gen_compute_eflags(s);
+ tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+ tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
default:
case JCC_LE:
- gen_compute_eflags(s, cpu_tmp0);
- tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
- tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
- tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
+ gen_compute_eflags(s);
+ tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+ tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
+ tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
@@ -1614,7 +1615,7 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
}
/* update eflags. It is needed anyway most of the time, do it always. */
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
assert(s->cc_op == CC_OP_EFLAGS);
label2 = gen_new_label();
@@ -1691,7 +1692,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
if (op2 != 0) {
/* update eflags */
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
assert(s->cc_op == CC_OP_EFLAGS);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
@@ -1717,9 +1718,7 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
{
int label1;
- if (s->cc_op != CC_OP_DYNAMIC)
- gen_op_set_cc_op(s->cc_op);
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
/* load */
if (op1 == OR_TMP0)
@@ -6512,7 +6511,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
goto illegal_op;
gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
@@ -6520,21 +6519,21 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
case 0x9f: /* lahf */
if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
goto illegal_op;
- gen_compute_eflags(s, cpu_T[0]);
+ gen_compute_eflags(s);
/* Note: gen_compute_eflags() only gives the condition codes */
- tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+ tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
gen_op_mov_reg_T0(OT_BYTE, R_AH);
break;
case 0xf5: /* cmc */
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
break;
case 0xf8: /* clc */
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
break;
case 0xf9: /* stc */
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
break;
case 0xfc: /* cld */
@@ -6889,7 +6888,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
case 1: /* loopz */
gen_op_add_reg_im(s->aflag, R_ECX, -1);
gen_op_jz_ecx(s->aflag, l3);
- gen_compute_eflags(s, cpu_tmp0);
+ gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
if (b == 0) {
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
@@ -7431,7 +7430,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
} else {
gen_op_mov_reg_v(ot, rm, t0);
}
- gen_compute_eflags(s, cpu_cc_src);
+ gen_compute_eflags(s);
tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
tcg_temp_free(t0);
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 08/14] i386: do not compute eflags multiple times consecutively
2012-10-06 12:30 ` [Qemu-devel] [PATCH 08/14] i386: do not compute eflags multiple times consecutively Paolo Bonzini
@ 2012-10-07 19:09 ` Blue Swirl
2012-10-09 19:14 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:09 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> After calling gen_compute_eflags, leave the computed value in cc_reg_src
> and set cc_op to CC_OP_EFLAGS. The next few patches will remove anyway
> most calls to gen_compute_eflags.
>
> As a result of this change it is more natural to remove the register
> argument from gen_compute_eflags and change all the callers.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 73 ++++++++++++++++++++++++-------------------------
> 1 file modificato, 36 inserzioni(+), 37 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 8f22119..09512c3 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -834,48 +834,49 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
> }
>
> /* compute all eflags to cc_src */
> -static void gen_compute_eflags(DisasContext *s, TCGv reg)
> +static void gen_compute_eflags(DisasContext *s)
> {
> if (s->cc_op != CC_OP_DYNAMIC) {
> gen_op_set_cc_op(s->cc_op);
> }
> - gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> - if (reg == cpu_cc_src) {
> - tcg_gen_discard_tl(cpu_cc_dst);
> - s->cc_op = CC_OP_EFLAGS;
> + if (s->cc_op == CC_OP_EFLAGS) {
> + return;
> }
> - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
> + gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> + tcg_gen_discard_tl(cpu_cc_dst);
> + s->cc_op = CC_OP_EFLAGS;
> + tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
> }
>
> /* compute eflags.P to reg */
> static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
> {
> - gen_compute_eflags(s, reg);
> - tcg_gen_shri_tl(reg, reg, 2);
> + gen_compute_eflags(s);
> + tcg_gen_shri_tl(reg, cpu_cc_src, 2);
> tcg_gen_andi_tl(reg, reg, 1);
> }
>
> /* compute eflags.S to reg */
> static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
> {
> - gen_compute_eflags(s, reg);
> - tcg_gen_shri_tl(reg, reg, 7);
> + gen_compute_eflags(s);
> + tcg_gen_shri_tl(reg, cpu_cc_src, 7);
> tcg_gen_andi_tl(reg, reg, 1);
> }
>
> /* compute eflags.O to reg */
> static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
> {
> - gen_compute_eflags(s, reg);
> - tcg_gen_shri_tl(reg, reg, 11);
> + gen_compute_eflags(s);
> + tcg_gen_shri_tl(reg, cpu_cc_src, 11);
> tcg_gen_andi_tl(reg, reg, 1);
> }
>
> /* compute eflags.Z to reg */
> static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
> {
> - gen_compute_eflags(s, reg);
> - tcg_gen_shri_tl(reg, reg, 6);
> + gen_compute_eflags(s);
> + tcg_gen_shri_tl(reg, cpu_cc_src, 6);
> tcg_gen_andi_tl(reg, reg, 1);
> }
>
> @@ -892,9 +893,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> gen_compute_eflags_z(s, cpu_T[0]);
> break;
> case JCC_BE:
> - gen_compute_eflags(s, cpu_tmp0);
> - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
> - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> + gen_compute_eflags(s);
> + tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
> + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
> tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> break;
> case JCC_S:
> @@ -904,18 +905,18 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> gen_compute_eflags_p(s, cpu_T[0]);
> break;
> case JCC_L:
> - gen_compute_eflags(s, cpu_tmp0);
> - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
> - tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
> + gen_compute_eflags(s);
> + tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
> + tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
> tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> break;
> default:
> case JCC_LE:
> - gen_compute_eflags(s, cpu_tmp0);
> - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
> - tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
> - tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
> + gen_compute_eflags(s);
> + tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
> + tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
> + tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
> tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
> tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> @@ -1614,7 +1615,7 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
> }
>
> /* update eflags. It is needed anyway most of the time, do it always. */
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
> assert(s->cc_op == CC_OP_EFLAGS);
>
> label2 = gen_new_label();
> @@ -1691,7 +1692,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
>
> if (op2 != 0) {
> /* update eflags */
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
> assert(s->cc_op == CC_OP_EFLAGS);
>
> tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
> @@ -1717,9 +1718,7 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
> {
> int label1;
>
> - if (s->cc_op != CC_OP_DYNAMIC)
> - gen_op_set_cc_op(s->cc_op);
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
>
> /* load */
> if (op1 == OR_TMP0)
> @@ -6512,7 +6511,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
> goto illegal_op;
> gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
> tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
> tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
> tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
> @@ -6520,21 +6519,21 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> case 0x9f: /* lahf */
> if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
> goto illegal_op;
> - gen_compute_eflags(s, cpu_T[0]);
> + gen_compute_eflags(s);
> /* Note: gen_compute_eflags() only gives the condition codes */
> - tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
> + tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
> gen_op_mov_reg_T0(OT_BYTE, R_AH);
> break;
> case 0xf5: /* cmc */
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
> tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
> break;
> case 0xf8: /* clc */
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
> tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
> break;
> case 0xf9: /* stc */
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
> tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
> break;
> case 0xfc: /* cld */
> @@ -6889,7 +6888,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> case 1: /* loopz */
> gen_op_add_reg_im(s->aflag, R_ECX, -1);
> gen_op_jz_ecx(s->aflag, l3);
> - gen_compute_eflags(s, cpu_tmp0);
> + gen_compute_eflags(s);
> tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
> if (b == 0) {
> tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
> @@ -7431,7 +7430,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> } else {
> gen_op_mov_reg_v(ot, rm, t0);
> }
> - gen_compute_eflags(s, cpu_cc_src);
> + gen_compute_eflags(s);
> tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
> tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
> tcg_temp_free(t0);
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 08/14] i386: do not compute eflags multiple times consecutively
2012-10-06 12:30 ` [Qemu-devel] [PATCH 08/14] i386: do not compute eflags multiple times consecutively Paolo Bonzini
2012-10-07 19:09 ` Blue Swirl
@ 2012-10-09 19:14 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 19:14 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> +static void gen_compute_eflags(DisasContext *s)
> {
> if (s->cc_op != CC_OP_DYNAMIC) {
> gen_op_set_cc_op(s->cc_op);
> }
> + if (s->cc_op == CC_OP_EFLAGS) {
> + return;
> }
> + gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> + tcg_gen_discard_tl(cpu_cc_dst);
> + s->cc_op = CC_OP_EFLAGS;
> + tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
> }
Can we at this point in the series assert that if s->cc_op == CC_OP_EFLAGS,
then cpu_cc_op has also been assigned CC_OP_EFLAGS? If so, then we can do
if (s->cc_op == CC_OP_EFLAGS) {
return;
}
if (s->cc_op != CC_OP_DYNAMIC) {
gen_op_set_cc_op(s->cc_op);
}
...
As-is it would appear that we get redundant assignments to cpu_cc_op when
calling this routine twice in a row. And with that helper call in between
we won't be able to eliminate the second assignment.
I'll also note that we'd probably get better code if gen_helper_cc_compute_all
took all of cpu_cc_{op,src,dst} as arguments so that it could be CONST+PURE.
With just that changed I think the redundant assignment to cpu_cc_op would
be eliminated.
All that said, I don't see anything wrong with the patch as-is, and probably
these other things I mention would want to be follow-on patches anyway.
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (7 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 08/14] i386: do not compute eflags multiple times consecutively Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:16 ` Blue Swirl
` (2 more replies)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 10/14] i386: use inverted setcond when computing NS or NZ Paolo Bonzini
` (4 subsequent siblings)
13 siblings, 3 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
ZF, SF and PF can always be computed from CC_DST except in the
CC_OP_EFLAGS case (and CC_OP_DYNAMIC, which just resolves to CC_OP_EFLAGS
in gen_compute_eflags). Use setcond to compute ZF and SF.
We could also use a table lookup to compute PF.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 28 ++++++++++++++++++++++------
1 file modificato, 22 inserzioni(+), 6 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 09512c3..daa36c1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -859,9 +859,17 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
/* compute eflags.S to reg */
static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
{
- gen_compute_eflags(s);
- tcg_gen_shri_tl(reg, cpu_cc_src, 7);
- tcg_gen_andi_tl(reg, reg, 1);
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ gen_compute_eflags(s);
+ }
+ if (s->cc_op == CC_OP_EFLAGS) {
+ tcg_gen_shri_tl(reg, cpu_cc_src, 7);
+ tcg_gen_andi_tl(reg, reg, 1);
+ } else {
+ int size = (s->cc_op - CC_OP_ADDB) & 3;
+ gen_ext_tl(reg, cpu_cc_dst, size, true);
+ tcg_gen_setcondi_tl(TCG_COND_LT, reg, reg, 0);
+ }
}
/* compute eflags.O to reg */
@@ -875,9 +883,17 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
/* compute eflags.Z to reg */
static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
{
- gen_compute_eflags(s);
- tcg_gen_shri_tl(reg, cpu_cc_src, 6);
- tcg_gen_andi_tl(reg, reg, 1);
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ gen_compute_eflags(s);
+ }
+ if (s->cc_op == CC_OP_EFLAGS) {
+ tcg_gen_shri_tl(reg, cpu_cc_src, 6);
+ tcg_gen_andi_tl(reg, reg, 1);
+ } else {
+ int size = (s->cc_op - CC_OP_ADDB) & 3;
+ gen_ext_tl(reg, cpu_cc_dst, size, false);
+ tcg_gen_setcondi_tl(TCG_COND_EQ, reg, cpu_cc_dst, 0);
+ }
}
static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF
2012-10-06 12:30 ` [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF Paolo Bonzini
@ 2012-10-07 19:16 ` Blue Swirl
2012-10-09 19:15 ` Richard Henderson
2012-10-09 19:16 ` Richard Henderson
2 siblings, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:16 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> ZF, SF and PF can always be computed from CC_DST except in the
> CC_OP_EFLAGS case (and CC_OP_DYNAMIC, which just resolves to CC_OP_EFLAGS
> in gen_compute_eflags). Use setcond to compute ZF and SF.
>
> We could also use a table lookup to compute PF.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 28 ++++++++++++++++++++++------
> 1 file modificato, 22 inserzioni(+), 6 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 09512c3..daa36c1 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -859,9 +859,17 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
> /* compute eflags.S to reg */
> static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
> {
> - gen_compute_eflags(s);
> - tcg_gen_shri_tl(reg, cpu_cc_src, 7);
> - tcg_gen_andi_tl(reg, reg, 1);
> + if (s->cc_op == CC_OP_DYNAMIC) {
> + gen_compute_eflags(s);
> + }
> + if (s->cc_op == CC_OP_EFLAGS) {
> + tcg_gen_shri_tl(reg, cpu_cc_src, 7);
> + tcg_gen_andi_tl(reg, reg, 1);
> + } else {
A comment would be nice here, like something extracted from commit message.
> + int size = (s->cc_op - CC_OP_ADDB) & 3;
> + gen_ext_tl(reg, cpu_cc_dst, size, true);
> + tcg_gen_setcondi_tl(TCG_COND_LT, reg, reg, 0);
> + }
> }
>
> /* compute eflags.O to reg */
> @@ -875,9 +883,17 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
> /* compute eflags.Z to reg */
> static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
> {
> - gen_compute_eflags(s);
> - tcg_gen_shri_tl(reg, cpu_cc_src, 6);
> - tcg_gen_andi_tl(reg, reg, 1);
> + if (s->cc_op == CC_OP_DYNAMIC) {
> + gen_compute_eflags(s);
> + }
> + if (s->cc_op == CC_OP_EFLAGS) {
> + tcg_gen_shri_tl(reg, cpu_cc_src, 6);
> + tcg_gen_andi_tl(reg, reg, 1);
> + } else {
Ditto.
> + int size = (s->cc_op - CC_OP_ADDB) & 3;
> + gen_ext_tl(reg, cpu_cc_dst, size, false);
> + tcg_gen_setcondi_tl(TCG_COND_EQ, reg, cpu_cc_dst, 0);
> + }
> }
>
> static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF
2012-10-06 12:30 ` [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF Paolo Bonzini
2012-10-07 19:16 ` Blue Swirl
@ 2012-10-09 19:15 ` Richard Henderson
2012-10-09 19:16 ` Richard Henderson
2 siblings, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 19:15 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> ZF, SF and PF can always be computed from CC_DST except in the
> CC_OP_EFLAGS case (and CC_OP_DYNAMIC, which just resolves to CC_OP_EFLAGS
> in gen_compute_eflags). Use setcond to compute ZF and SF.
>
> We could also use a table lookup to compute PF.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF
2012-10-06 12:30 ` [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF Paolo Bonzini
2012-10-07 19:16 ` Blue Swirl
2012-10-09 19:15 ` Richard Henderson
@ 2012-10-09 19:16 ` Richard Henderson
2012-10-10 6:42 ` Paolo Bonzini
2 siblings, 1 reply; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 19:16 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> + int size = (s->cc_op - CC_OP_ADDB) & 3;
> + gen_ext_tl(reg, cpu_cc_dst, size, false);
> + tcg_gen_setcondi_tl(TCG_COND_EQ, reg, cpu_cc_dst, 0);
I take that back. Should be (EQ, reg, reg, 0) here;
you've dropped the extension on the floor.
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF
2012-10-09 19:16 ` Richard Henderson
@ 2012-10-10 6:42 ` Paolo Bonzini
0 siblings, 0 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-10 6:42 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
Il 09/10/2012 21:16, Richard Henderson ha scritto:
>> > + int size = (s->cc_op - CC_OP_ADDB) & 3;
>> > + gen_ext_tl(reg, cpu_cc_dst, size, false);
>> > + tcg_gen_setcondi_tl(TCG_COND_EQ, reg, cpu_cc_dst, 0);
> I take that back. Should be (EQ, reg, reg, 0) here;
> you've dropped the extension on the floor.
More precisely it should be:
TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
tcg_gen_setcondi_tl(TCG_COND_EQ, reg, t0, 0);
and similarly for SF.
Paolo
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 10/14] i386: use inverted setcond when computing NS or NZ
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (8 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 09/14] i386: do not call helper to compute ZF/SF Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:19 ` Blue Swirl
2012-10-09 19:17 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG Paolo Bonzini
` (3 subsequent siblings)
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Make gen_compute_eflags_z and gen_compute_eflags_s able to compute the
inverted condition, and use this in gen_setcc_slow_T0. We cannot do it
yet in gen_compute_eflags_c, but prepare the code for it anyway. It is
not worthwhile for PF, as usual.
shr+and+xor could be replaced by and+setcond. I'm not doing it yet.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 51 +++++++++++++++++++++++++++++--------------------
1 file modificato, 30 inserzioni(+), 21 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index daa36c1..abcd944 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -824,13 +824,16 @@ static void gen_op_update_neg_cc(void)
}
/* compute eflags.C to reg */
-static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
+static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
{
if (s->cc_op != CC_OP_DYNAMIC) {
gen_op_set_cc_op(s->cc_op);
}
gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+ if (inv) {
+ tcg_gen_xori_tl(reg, reg, 1);
+ }
}
/* compute all eflags to cc_src */
@@ -857,7 +860,7 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
}
/* compute eflags.S to reg */
-static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
+static void gen_compute_eflags_s(DisasContext *s, TCGv reg, bool inv)
{
if (s->cc_op == CC_OP_DYNAMIC) {
gen_compute_eflags(s);
@@ -865,10 +868,13 @@ static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
if (s->cc_op == CC_OP_EFLAGS) {
tcg_gen_shri_tl(reg, cpu_cc_src, 7);
tcg_gen_andi_tl(reg, reg, 1);
+ if (inv) {
+ tcg_gen_xori_tl(reg, reg, 1);
+ }
} else {
int size = (s->cc_op - CC_OP_ADDB) & 3;
gen_ext_tl(reg, cpu_cc_dst, size, true);
- tcg_gen_setcondi_tl(TCG_COND_LT, reg, reg, 0);
+ tcg_gen_setcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, reg, reg, 0);
}
}
@@ -881,7 +887,7 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
}
/* compute eflags.Z to reg */
-static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
+static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
{
if (s->cc_op == CC_OP_DYNAMIC) {
gen_compute_eflags(s);
@@ -889,25 +895,28 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
if (s->cc_op == CC_OP_EFLAGS) {
tcg_gen_shri_tl(reg, cpu_cc_src, 6);
tcg_gen_andi_tl(reg, reg, 1);
+ if (inv) {
+ tcg_gen_xori_tl(reg, reg, 1);
+ }
} else {
int size = (s->cc_op - CC_OP_ADDB) & 3;
gen_ext_tl(reg, cpu_cc_dst, size, false);
- tcg_gen_setcondi_tl(TCG_COND_EQ, reg, cpu_cc_dst, 0);
+ tcg_gen_setcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, reg, cpu_cc_dst, 0);
}
}
-static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
+static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv)
{
switch(jcc_op) {
case JCC_O:
gen_compute_eflags_o(s, cpu_T[0]);
break;
case JCC_B:
- gen_compute_eflags_c(s, cpu_T[0]);
- break;
+ gen_compute_eflags_c(s, cpu_T[0], inv);
+ return;
case JCC_Z:
- gen_compute_eflags_z(s, cpu_T[0]);
- break;
+ gen_compute_eflags_z(s, cpu_T[0], inv);
+ return;
case JCC_BE:
gen_compute_eflags(s);
tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
@@ -915,8 +924,8 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
case JCC_S:
- gen_compute_eflags_s(s, cpu_T[0]);
- break;
+ gen_compute_eflags_s(s, cpu_T[0], inv);
+ return;
case JCC_P:
gen_compute_eflags_p(s, cpu_T[0]);
break;
@@ -938,6 +947,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
break;
}
+ if (inv) {
+ tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
+ }
}
/* return true if setcc_slow is not needed (WARNING: must be kept in
@@ -1103,7 +1115,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1)
break;
default:
slow_jcc:
- gen_setcc_slow_T0(s, jcc_op);
+ gen_setcc_slow_T0(s, jcc_op, false);
tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
cpu_T[0], 0, l1);
break;
@@ -1317,7 +1329,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
}
switch(op) {
case OP_ADCL:
- gen_compute_eflags_c(s1, cpu_tmp4);
+ gen_compute_eflags_c(s1, cpu_tmp4, false);
tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
if (d != OR_TMP0)
@@ -1332,7 +1344,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
s1->cc_op = CC_OP_DYNAMIC;
break;
case OP_SBBL:
- gen_compute_eflags_c(s1, cpu_tmp4);
+ gen_compute_eflags_c(s1, cpu_tmp4, false);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
if (d != OR_TMP0)
@@ -1406,7 +1418,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
gen_op_mov_TN_reg(ot, 0, d);
else
gen_op_ld_T0_A0(ot + s1->mem_index);
- gen_compute_eflags_c(s1, cpu_cc_src);
+ gen_compute_eflags_c(s1, cpu_cc_src, false);
if (c > 0) {
tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
s1->cc_op = CC_OP_INCB + ot;
@@ -2374,10 +2386,7 @@ static void gen_setcc(DisasContext *s, int b)
worth to */
inv = b & 1;
jcc_op = (b >> 1) & 7;
- gen_setcc_slow_T0(s, jcc_op);
- if (inv) {
- tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
- }
+ gen_setcc_slow_T0(s, jcc_op, inv);
}
}
@@ -6878,7 +6887,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
case 0xd6: /* salc */
if (CODE64(s))
goto illegal_op;
- gen_compute_eflags_c(s, cpu_T[0]);
+ gen_compute_eflags_c(s, cpu_T[0], false);
tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
gen_op_mov_reg_T0(OT_BYTE, R_EAX);
break;
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 10/14] i386: use inverted setcond when computing NS or NZ
2012-10-06 12:30 ` [Qemu-devel] [PATCH 10/14] i386: use inverted setcond when computing NS or NZ Paolo Bonzini
@ 2012-10-07 19:19 ` Blue Swirl
2012-10-09 19:17 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:19 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Make gen_compute_eflags_z and gen_compute_eflags_s able to compute the
> inverted condition, and use this in gen_setcc_slow_T0. We cannot do it
> yet in gen_compute_eflags_c, but prepare the code for it anyway. It is
> not worthwhile for PF, as usual.
>
> shr+and+xor could be replaced by and+setcond. I'm not doing it yet.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 51 +++++++++++++++++++++++++++++--------------------
> 1 file modificato, 30 inserzioni(+), 21 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index daa36c1..abcd944 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -824,13 +824,16 @@ static void gen_op_update_neg_cc(void)
> }
>
> /* compute eflags.C to reg */
> -static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
> +static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
> {
> if (s->cc_op != CC_OP_DYNAMIC) {
> gen_op_set_cc_op(s->cc_op);
> }
> gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
> + if (inv) {
> + tcg_gen_xori_tl(reg, reg, 1);
> + }
> }
>
> /* compute all eflags to cc_src */
> @@ -857,7 +860,7 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
> }
>
> /* compute eflags.S to reg */
> -static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
> +static void gen_compute_eflags_s(DisasContext *s, TCGv reg, bool inv)
> {
> if (s->cc_op == CC_OP_DYNAMIC) {
> gen_compute_eflags(s);
> @@ -865,10 +868,13 @@ static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
> if (s->cc_op == CC_OP_EFLAGS) {
> tcg_gen_shri_tl(reg, cpu_cc_src, 7);
> tcg_gen_andi_tl(reg, reg, 1);
> + if (inv) {
> + tcg_gen_xori_tl(reg, reg, 1);
> + }
> } else {
> int size = (s->cc_op - CC_OP_ADDB) & 3;
> gen_ext_tl(reg, cpu_cc_dst, size, true);
> - tcg_gen_setcondi_tl(TCG_COND_LT, reg, reg, 0);
> + tcg_gen_setcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, reg, reg, 0);
> }
> }
>
> @@ -881,7 +887,7 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
> }
>
> /* compute eflags.Z to reg */
> -static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
> +static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
> {
> if (s->cc_op == CC_OP_DYNAMIC) {
> gen_compute_eflags(s);
> @@ -889,25 +895,28 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
> if (s->cc_op == CC_OP_EFLAGS) {
> tcg_gen_shri_tl(reg, cpu_cc_src, 6);
> tcg_gen_andi_tl(reg, reg, 1);
> + if (inv) {
> + tcg_gen_xori_tl(reg, reg, 1);
> + }
> } else {
> int size = (s->cc_op - CC_OP_ADDB) & 3;
> gen_ext_tl(reg, cpu_cc_dst, size, false);
> - tcg_gen_setcondi_tl(TCG_COND_EQ, reg, cpu_cc_dst, 0);
> + tcg_gen_setcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, reg, cpu_cc_dst, 0);
> }
> }
>
> -static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> +static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv)
> {
> switch(jcc_op) {
> case JCC_O:
> gen_compute_eflags_o(s, cpu_T[0]);
> break;
> case JCC_B:
> - gen_compute_eflags_c(s, cpu_T[0]);
> - break;
> + gen_compute_eflags_c(s, cpu_T[0], inv);
> + return;
> case JCC_Z:
> - gen_compute_eflags_z(s, cpu_T[0]);
> - break;
> + gen_compute_eflags_z(s, cpu_T[0], inv);
> + return;
> case JCC_BE:
> gen_compute_eflags(s);
> tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
> @@ -915,8 +924,8 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> break;
> case JCC_S:
> - gen_compute_eflags_s(s, cpu_T[0]);
> - break;
> + gen_compute_eflags_s(s, cpu_T[0], inv);
> + return;
> case JCC_P:
> gen_compute_eflags_p(s, cpu_T[0]);
> break;
> @@ -938,6 +947,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
> tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> break;
> }
> + if (inv) {
> + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
> + }
> }
>
> /* return true if setcc_slow is not needed (WARNING: must be kept in
> @@ -1103,7 +1115,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1)
> break;
> default:
> slow_jcc:
> - gen_setcc_slow_T0(s, jcc_op);
> + gen_setcc_slow_T0(s, jcc_op, false);
> tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
> cpu_T[0], 0, l1);
> break;
> @@ -1317,7 +1329,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
> }
> switch(op) {
> case OP_ADCL:
> - gen_compute_eflags_c(s1, cpu_tmp4);
> + gen_compute_eflags_c(s1, cpu_tmp4, false);
> tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
> if (d != OR_TMP0)
> @@ -1332,7 +1344,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d)
> s1->cc_op = CC_OP_DYNAMIC;
> break;
> case OP_SBBL:
> - gen_compute_eflags_c(s1, cpu_tmp4);
> + gen_compute_eflags_c(s1, cpu_tmp4, false);
> tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
> if (d != OR_TMP0)
> @@ -1406,7 +1418,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c)
> gen_op_mov_TN_reg(ot, 0, d);
> else
> gen_op_ld_T0_A0(ot + s1->mem_index);
> - gen_compute_eflags_c(s1, cpu_cc_src);
> + gen_compute_eflags_c(s1, cpu_cc_src, false);
> if (c > 0) {
> tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
> s1->cc_op = CC_OP_INCB + ot;
> @@ -2374,10 +2386,7 @@ static void gen_setcc(DisasContext *s, int b)
> worth to */
> inv = b & 1;
> jcc_op = (b >> 1) & 7;
> - gen_setcc_slow_T0(s, jcc_op);
> - if (inv) {
> - tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
> - }
> + gen_setcc_slow_T0(s, jcc_op, inv);
> }
> }
>
> @@ -6878,7 +6887,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
> case 0xd6: /* salc */
> if (CODE64(s))
> goto illegal_op;
> - gen_compute_eflags_c(s, cpu_T[0]);
> + gen_compute_eflags_c(s, cpu_T[0], false);
> tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
> gen_op_mov_reg_T0(OT_BYTE, R_EAX);
> break;
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 10/14] i386: use inverted setcond when computing NS or NZ
2012-10-06 12:30 ` [Qemu-devel] [PATCH 10/14] i386: use inverted setcond when computing NS or NZ Paolo Bonzini
2012-10-07 19:19 ` Blue Swirl
@ 2012-10-09 19:17 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 19:17 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> Make gen_compute_eflags_z and gen_compute_eflags_s able to compute the
> inverted condition, and use this in gen_setcc_slow_T0. We cannot do it
> yet in gen_compute_eflags_c, but prepare the code for it anyway. It is
> not worthwhile for PF, as usual.
>
> shr+and+xor could be replaced by and+setcond. I'm not doing it yet.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (9 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 10/14] i386: use inverted setcond when computing NS or NZ Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:35 ` Blue Swirl
2012-10-09 20:07 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 12/14] i386: change gen_setcc_slow_T0 to gen_setcc_slow Paolo Bonzini
` (2 subsequent siblings)
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Do the switch at translation time, converting the helper templates to
TCG opcodes. In some cases CF can be computed with a single setcond,
though others it may require a little more work.
In the CC_OP_DYNAMIC case, compute the whole EFLAGS, same as for ZF/SF/PF.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/cc_helper.c | 118 ---------------------------------
target-i386/cc_helper_template.h | 76 ----------------------
target-i386/helper.h | 1 -
target-i386/translate.c | 137 +++++++++++++++++++++++++++++++++++----
4 file modificati, 124 inserzioni(+), 208 rimozioni(-)
diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 9422003..214d715 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -80,11 +80,6 @@ static int compute_all_eflags(CPUX86State *env)
return CC_SRC;
}
-static int compute_c_eflags(CPUX86State *env)
-{
- return CC_SRC & CC_C;
-}
-
uint32_t helper_cc_compute_all(CPUX86State *env, int op)
{
switch (op) {
@@ -203,119 +198,6 @@ uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
return helper_cc_compute_all(env, op);
}
-uint32_t helper_cc_compute_c(CPUX86State *env, int op)
-{
- switch (op) {
- default: /* should never happen */
- return 0;
-
- case CC_OP_EFLAGS:
- return compute_c_eflags(env);
-
- case CC_OP_MULB:
- return compute_c_mull(env);
- case CC_OP_MULW:
- return compute_c_mull(env);
- case CC_OP_MULL:
- return compute_c_mull(env);
-
- case CC_OP_ADDB:
- return compute_c_addb(env);
- case CC_OP_ADDW:
- return compute_c_addw(env);
- case CC_OP_ADDL:
- return compute_c_addl(env);
-
- case CC_OP_ADCB:
- return compute_c_adcb(env);
- case CC_OP_ADCW:
- return compute_c_adcw(env);
- case CC_OP_ADCL:
- return compute_c_adcl(env);
-
- case CC_OP_SUBB:
- return compute_c_subb(env);
- case CC_OP_SUBW:
- return compute_c_subw(env);
- case CC_OP_SUBL:
- return compute_c_subl(env);
-
- case CC_OP_SBBB:
- return compute_c_sbbb(env);
- case CC_OP_SBBW:
- return compute_c_sbbw(env);
- case CC_OP_SBBL:
- return compute_c_sbbl(env);
-
- case CC_OP_LOGICB:
- return compute_c_logicb();
- case CC_OP_LOGICW:
- return compute_c_logicw();
- case CC_OP_LOGICL:
- return compute_c_logicl();
-
- case CC_OP_INCB:
- return compute_c_incl(env);
- case CC_OP_INCW:
- return compute_c_incl(env);
- case CC_OP_INCL:
- return compute_c_incl(env);
-
- case CC_OP_DECB:
- return compute_c_incl(env);
- case CC_OP_DECW:
- return compute_c_incl(env);
- case CC_OP_DECL:
- return compute_c_incl(env);
-
- case CC_OP_SHLB:
- return compute_c_shlb(env);
- case CC_OP_SHLW:
- return compute_c_shlw(env);
- case CC_OP_SHLL:
- return compute_c_shll(env);
-
- case CC_OP_SARB:
- return compute_c_sarl(env);
- case CC_OP_SARW:
- return compute_c_sarl(env);
- case CC_OP_SARL:
- return compute_c_sarl(env);
-
-#ifdef TARGET_X86_64
- case CC_OP_MULQ:
- return compute_c_mull(env);
-
- case CC_OP_ADDQ:
- return compute_c_addq(env);
-
- case CC_OP_ADCQ:
- return compute_c_adcq(env);
-
- case CC_OP_SUBQ:
- return compute_c_subq(env);
-
- case CC_OP_SBBQ:
- return compute_c_sbbq(env);
-
- case CC_OP_LOGICQ:
- return compute_c_logicq();
-
- case CC_OP_INCQ:
- return compute_c_incl(env);
-
- case CC_OP_DECQ:
- return compute_c_incl(env);
-
- case CC_OP_SHLQ:
- return compute_c_shlq(env);
-
- case CC_OP_SARQ:
- return compute_c_sarl(env);
-#endif
- }
-}
-
void helper_write_eflags(CPUX86State *env, target_ulong t0,
uint32_t update_mask)
{
diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
index 1f94e11..951ceaf 100644
--- a/target-i386/cc_helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -58,16 +58,6 @@ static int glue(compute_all_add, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_add, SUFFIX)(CPUX86State *env)
-{
- int cf;
- target_long src1;
-
- src1 = CC_SRC;
- cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
- return cf;
-}
-
static int glue(compute_all_adc, SUFFIX)(CPUX86State *env)
{
int cf, pf, af, zf, sf, of;
@@ -84,16 +74,6 @@ static int glue(compute_all_adc, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_adc, SUFFIX)(CPUX86State *env)
-{
- int cf;
- target_long src1;
-
- src1 = CC_SRC;
- cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
- return cf;
-}
-
static int glue(compute_all_sub, SUFFIX)(CPUX86State *env)
{
int cf, pf, af, zf, sf, of;
@@ -110,17 +90,6 @@ static int glue(compute_all_sub, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_sub, SUFFIX)(CPUX86State *env)
-{
- int cf;
- target_long src1, src2;
-
- src1 = CC_DST + CC_SRC;
- src2 = CC_SRC;
- cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
- return cf;
-}
-
static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env)
{
int cf, pf, af, zf, sf, of;
@@ -137,17 +106,6 @@ static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_sbb, SUFFIX)(CPUX86State *env)
-{
- int cf;
- target_long src1, src2;
-
- src1 = CC_DST + CC_SRC + 1;
- src2 = CC_SRC;
- cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
- return cf;
-}
-
static int glue(compute_all_logic, SUFFIX)(CPUX86State *env)
{
int cf, pf, af, zf, sf, of;
@@ -161,11 +119,6 @@ static int glue(compute_all_logic, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_logic, SUFFIX)(void)
-{
- return 0;
-}
-
static int glue(compute_all_inc, SUFFIX)(CPUX86State *env)
{
int cf, pf, af, zf, sf, of;
@@ -182,13 +135,6 @@ static int glue(compute_all_inc, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-#if DATA_BITS == 32
-static int glue(compute_c_inc, SUFFIX)(CPUX86State *env)
-{
- return CC_SRC;
-}
-#endif
-
static int glue(compute_all_dec, SUFFIX)(CPUX86State *env)
{
int cf, pf, af, zf, sf, of;
@@ -219,18 +165,6 @@ static int glue(compute_all_shl, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-static int glue(compute_c_shl, SUFFIX)(CPUX86State *env)
-{
- return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
-}
-
-#if DATA_BITS == 32
-static int glue(compute_c_sar, SUFFIX)(CPUX86State *env)
-{
- return CC_SRC & 1;
-}
-#endif
-
static int glue(compute_all_sar, SUFFIX)(CPUX86State *env)
{
int cf, pf, af, zf, sf, of;
@@ -245,16 +179,6 @@ static int glue(compute_all_sar, SUFFIX)(CPUX86State *env)
return cf | pf | af | zf | sf | of;
}
-#if DATA_BITS == 32
-static int glue(compute_c_mul, SUFFIX)(CPUX86State *env)
-{
- int cf;
-
- cf = (CC_SRC != 0);
- return cf;
-}
-#endif
-
/* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
CF are modified and it is slower to do that. */
static int glue(compute_all_mul, SUFFIX)(CPUX86State *env)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 93850ce..2f54753 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,7 +1,6 @@
#include "def-helper.h"
DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_PURE, i32, env, int)
-DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_PURE, i32, env, int)
DEF_HELPER_0(lock, void)
DEF_HELPER_0(unlock, void)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index abcd944..4561c9d 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -823,19 +823,6 @@ static void gen_op_update_neg_cc(void)
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
-/* compute eflags.C to reg */
-static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
-{
- if (s->cc_op != CC_OP_DYNAMIC) {
- gen_op_set_cc_op(s->cc_op);
- }
- gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
- tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
- if (inv) {
- tcg_gen_xori_tl(reg, reg, 1);
- }
-}
-
/* compute all eflags to cc_src */
static void gen_compute_eflags(DisasContext *s)
{
@@ -851,6 +838,130 @@ static void gen_compute_eflags(DisasContext *s)
tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
}
+/* compute eflags.C to reg */
+static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
+{
+ int t0, t1, size;
+
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ gen_compute_eflags(s);
+ }
+ switch(s->cc_op) {
+ case CC_OP_SUBB:
+ case CC_OP_SUBW:
+ case CC_OP_SUBL:
+ case CC_OP_SUBQ:
+ /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */
+ size = (s->cc_op - CC_OP_ADDB) & 3;
+ t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ if (t1 == reg && reg == cpu_cc_src) {
+ tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
+ t1 = cpu_tmp0;
+ }
+
+ tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
+ gen_extu(size, reg);
+ t0 = reg;
+ goto add_sub;
+
+ case CC_OP_ADDB:
+ case CC_OP_ADDW:
+ case CC_OP_ADDL:
+ case CC_OP_ADDQ:
+ /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
+ size = (s->cc_op - CC_OP_ADDB) & 3;
+ t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+ add_sub:
+ tcg_gen_setcond_tl(inv ? TCG_COND_GEU : TCG_COND_LTU, reg, t0, t1);
+ return;
+
+ case CC_OP_SBBB:
+ case CC_OP_SBBW:
+ case CC_OP_SBBL:
+ case CC_OP_SBBQ:
+ /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
+ size = (s->cc_op - CC_OP_ADDB) & 3;
+ t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ if (t1 == reg && reg == cpu_cc_src) {
+ tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
+ t1 = cpu_tmp0;
+ }
+
+ tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
+ tcg_gen_addi_tl(reg, reg, 1);
+ gen_extu(size, reg);
+ t0 = reg;
+ goto adc_sbb;
+
+ case CC_OP_ADCB:
+ case CC_OP_ADCW:
+ case CC_OP_ADCL:
+ case CC_OP_ADCQ:
+ /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
+ size = (s->cc_op - CC_OP_ADDB) & 3;
+ t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
+ adc_sbb:
+ tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1);
+ return;
+
+ case CC_OP_LOGICB:
+ case CC_OP_LOGICW:
+ case CC_OP_LOGICL:
+ case CC_OP_LOGICQ:
+ tcg_gen_movi_tl(reg, 0);
+ break;
+
+ case CC_OP_INCB:
+ case CC_OP_INCW:
+ case CC_OP_INCL:
+ case CC_OP_INCQ:
+ case CC_OP_DECB:
+ case CC_OP_DECW:
+ case CC_OP_DECL:
+ case CC_OP_DECQ:
+ if (inv) {
+ tcg_gen_xori_tl(reg, cpu_cc_src, 1);
+ } else {
+ tcg_gen_mov_tl(reg, cpu_cc_src);
+ }
+ return;
+
+ case CC_OP_SHLB:
+ case CC_OP_SHLW:
+ case CC_OP_SHLL:
+ case CC_OP_SHLQ:
+ /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
+ size = (s->cc_op - CC_OP_ADDB) & 3;
+ tcg_gen_shri_tl(reg, cpu_cc_src, (8 << size) - 1);
+ tcg_gen_andi_tl(reg, reg, 1);
+ break;
+
+ case CC_OP_MULB:
+ case CC_OP_MULW:
+ case CC_OP_MULL:
+ case CC_OP_MULQ:
+ tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, cpu_cc_src, 0);
+ return;
+
+ case CC_OP_SARB:
+ case CC_OP_SARW:
+ case CC_OP_SARL:
+ case CC_OP_SARQ:
+ case CC_OP_EFLAGS:
+ /* CC_SRC & 1 */
+ tcg_gen_andi_tl(reg, cpu_cc_src, 1);
+ break;
+
+ default:
+ abort();
+ }
+ if (inv) {
+ tcg_gen_xori_tl(reg, reg, 1);
+ }
+}
+
/* compute eflags.P to reg */
static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
{
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG
2012-10-06 12:30 ` [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG Paolo Bonzini
@ 2012-10-07 19:35 ` Blue Swirl
2012-10-09 20:07 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:35 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Do the switch at translation time, converting the helper templates to
> TCG opcodes. In some cases CF can be computed with a single setcond,
> though others it may require a little more work.
>
> In the CC_OP_DYNAMIC case, compute the whole EFLAGS, same as for ZF/SF/PF.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target-i386/cc_helper.c | 118 ---------------------------------
> target-i386/cc_helper_template.h | 76 ----------------------
> target-i386/helper.h | 1 -
> target-i386/translate.c | 137 +++++++++++++++++++++++++++++++++++----
> 4 file modificati, 124 inserzioni(+), 208 rimozioni(-)
>
> diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
> index 9422003..214d715 100644
> --- a/target-i386/cc_helper.c
> +++ b/target-i386/cc_helper.c
> @@ -80,11 +80,6 @@ static int compute_all_eflags(CPUX86State *env)
> return CC_SRC;
> }
>
> -static int compute_c_eflags(CPUX86State *env)
> -{
> - return CC_SRC & CC_C;
> -}
> -
> uint32_t helper_cc_compute_all(CPUX86State *env, int op)
> {
> switch (op) {
> @@ -203,119 +198,6 @@ uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
> return helper_cc_compute_all(env, op);
> }
>
> -uint32_t helper_cc_compute_c(CPUX86State *env, int op)
> -{
> - switch (op) {
> - default: /* should never happen */
> - return 0;
> -
> - case CC_OP_EFLAGS:
> - return compute_c_eflags(env);
> -
> - case CC_OP_MULB:
> - return compute_c_mull(env);
> - case CC_OP_MULW:
> - return compute_c_mull(env);
> - case CC_OP_MULL:
> - return compute_c_mull(env);
> -
> - case CC_OP_ADDB:
> - return compute_c_addb(env);
> - case CC_OP_ADDW:
> - return compute_c_addw(env);
> - case CC_OP_ADDL:
> - return compute_c_addl(env);
> -
> - case CC_OP_ADCB:
> - return compute_c_adcb(env);
> - case CC_OP_ADCW:
> - return compute_c_adcw(env);
> - case CC_OP_ADCL:
> - return compute_c_adcl(env);
> -
> - case CC_OP_SUBB:
> - return compute_c_subb(env);
> - case CC_OP_SUBW:
> - return compute_c_subw(env);
> - case CC_OP_SUBL:
> - return compute_c_subl(env);
> -
> - case CC_OP_SBBB:
> - return compute_c_sbbb(env);
> - case CC_OP_SBBW:
> - return compute_c_sbbw(env);
> - case CC_OP_SBBL:
> - return compute_c_sbbl(env);
> -
> - case CC_OP_LOGICB:
> - return compute_c_logicb();
> - case CC_OP_LOGICW:
> - return compute_c_logicw();
> - case CC_OP_LOGICL:
> - return compute_c_logicl();
> -
> - case CC_OP_INCB:
> - return compute_c_incl(env);
> - case CC_OP_INCW:
> - return compute_c_incl(env);
> - case CC_OP_INCL:
> - return compute_c_incl(env);
> -
> - case CC_OP_DECB:
> - return compute_c_incl(env);
> - case CC_OP_DECW:
> - return compute_c_incl(env);
> - case CC_OP_DECL:
> - return compute_c_incl(env);
> -
> - case CC_OP_SHLB:
> - return compute_c_shlb(env);
> - case CC_OP_SHLW:
> - return compute_c_shlw(env);
> - case CC_OP_SHLL:
> - return compute_c_shll(env);
> -
> - case CC_OP_SARB:
> - return compute_c_sarl(env);
> - case CC_OP_SARW:
> - return compute_c_sarl(env);
> - case CC_OP_SARL:
> - return compute_c_sarl(env);
> -
> -#ifdef TARGET_X86_64
> - case CC_OP_MULQ:
> - return compute_c_mull(env);
> -
> - case CC_OP_ADDQ:
> - return compute_c_addq(env);
> -
> - case CC_OP_ADCQ:
> - return compute_c_adcq(env);
> -
> - case CC_OP_SUBQ:
> - return compute_c_subq(env);
> -
> - case CC_OP_SBBQ:
> - return compute_c_sbbq(env);
> -
> - case CC_OP_LOGICQ:
> - return compute_c_logicq();
> -
> - case CC_OP_INCQ:
> - return compute_c_incl(env);
> -
> - case CC_OP_DECQ:
> - return compute_c_incl(env);
> -
> - case CC_OP_SHLQ:
> - return compute_c_shlq(env);
> -
> - case CC_OP_SARQ:
> - return compute_c_sarl(env);
> -#endif
> - }
> -}
> -
> void helper_write_eflags(CPUX86State *env, target_ulong t0,
> uint32_t update_mask)
> {
> diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
> index 1f94e11..951ceaf 100644
> --- a/target-i386/cc_helper_template.h
> +++ b/target-i386/cc_helper_template.h
> @@ -58,16 +58,6 @@ static int glue(compute_all_add, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -static int glue(compute_c_add, SUFFIX)(CPUX86State *env)
> -{
> - int cf;
> - target_long src1;
> -
> - src1 = CC_SRC;
> - cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
> - return cf;
> -}
> -
> static int glue(compute_all_adc, SUFFIX)(CPUX86State *env)
> {
> int cf, pf, af, zf, sf, of;
> @@ -84,16 +74,6 @@ static int glue(compute_all_adc, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -static int glue(compute_c_adc, SUFFIX)(CPUX86State *env)
> -{
> - int cf;
> - target_long src1;
> -
> - src1 = CC_SRC;
> - cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
> - return cf;
> -}
> -
> static int glue(compute_all_sub, SUFFIX)(CPUX86State *env)
> {
> int cf, pf, af, zf, sf, of;
> @@ -110,17 +90,6 @@ static int glue(compute_all_sub, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -static int glue(compute_c_sub, SUFFIX)(CPUX86State *env)
> -{
> - int cf;
> - target_long src1, src2;
> -
> - src1 = CC_DST + CC_SRC;
> - src2 = CC_SRC;
> - cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
> - return cf;
> -}
> -
> static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env)
> {
> int cf, pf, af, zf, sf, of;
> @@ -137,17 +106,6 @@ static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -static int glue(compute_c_sbb, SUFFIX)(CPUX86State *env)
> -{
> - int cf;
> - target_long src1, src2;
> -
> - src1 = CC_DST + CC_SRC + 1;
> - src2 = CC_SRC;
> - cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
> - return cf;
> -}
> -
> static int glue(compute_all_logic, SUFFIX)(CPUX86State *env)
> {
> int cf, pf, af, zf, sf, of;
> @@ -161,11 +119,6 @@ static int glue(compute_all_logic, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -static int glue(compute_c_logic, SUFFIX)(void)
> -{
> - return 0;
> -}
> -
> static int glue(compute_all_inc, SUFFIX)(CPUX86State *env)
> {
> int cf, pf, af, zf, sf, of;
> @@ -182,13 +135,6 @@ static int glue(compute_all_inc, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -#if DATA_BITS == 32
> -static int glue(compute_c_inc, SUFFIX)(CPUX86State *env)
> -{
> - return CC_SRC;
> -}
> -#endif
> -
> static int glue(compute_all_dec, SUFFIX)(CPUX86State *env)
> {
> int cf, pf, af, zf, sf, of;
> @@ -219,18 +165,6 @@ static int glue(compute_all_shl, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -static int glue(compute_c_shl, SUFFIX)(CPUX86State *env)
> -{
> - return (CC_SRC >> (DATA_BITS - 1)) & CC_C;
> -}
> -
> -#if DATA_BITS == 32
> -static int glue(compute_c_sar, SUFFIX)(CPUX86State *env)
> -{
> - return CC_SRC & 1;
> -}
> -#endif
> -
> static int glue(compute_all_sar, SUFFIX)(CPUX86State *env)
> {
> int cf, pf, af, zf, sf, of;
> @@ -245,16 +179,6 @@ static int glue(compute_all_sar, SUFFIX)(CPUX86State *env)
> return cf | pf | af | zf | sf | of;
> }
>
> -#if DATA_BITS == 32
> -static int glue(compute_c_mul, SUFFIX)(CPUX86State *env)
> -{
> - int cf;
> -
> - cf = (CC_SRC != 0);
> - return cf;
> -}
> -#endif
> -
> /* NOTE: we compute the flags like the P4. On olders CPUs, only OF and
> CF are modified and it is slower to do that. */
> static int glue(compute_all_mul, SUFFIX)(CPUX86State *env)
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 93850ce..2f54753 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -1,7 +1,6 @@
> #include "def-helper.h"
>
> DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_PURE, i32, env, int)
> -DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_PURE, i32, env, int)
>
> DEF_HELPER_0(lock, void)
> DEF_HELPER_0(unlock, void)
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index abcd944..4561c9d 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -823,19 +823,6 @@ static void gen_op_update_neg_cc(void)
> tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> }
>
> -/* compute eflags.C to reg */
> -static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
> -{
> - if (s->cc_op != CC_OP_DYNAMIC) {
> - gen_op_set_cc_op(s->cc_op);
> - }
> - gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
> - if (inv) {
> - tcg_gen_xori_tl(reg, reg, 1);
> - }
> -}
> -
> /* compute all eflags to cc_src */
> static void gen_compute_eflags(DisasContext *s)
> {
> @@ -851,6 +838,130 @@ static void gen_compute_eflags(DisasContext *s)
> tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
> }
>
> +/* compute eflags.C to reg */
> +static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv)
> +{
> + int t0, t1, size;
> +
> + if (s->cc_op == CC_OP_DYNAMIC) {
> + gen_compute_eflags(s);
> + }
> + switch(s->cc_op) {
> + case CC_OP_SUBB:
> + case CC_OP_SUBW:
> + case CC_OP_SUBL:
> + case CC_OP_SUBQ:
> + /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
> + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + if (t1 == reg && reg == cpu_cc_src) {
> + tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
> + t1 = cpu_tmp0;
> + }
> +
> + tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
> + gen_extu(size, reg);
> + t0 = reg;
> + goto add_sub;
> +
> + case CC_OP_ADDB:
> + case CC_OP_ADDW:
> + case CC_OP_ADDL:
> + case CC_OP_ADDQ:
> + /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
> + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
> + add_sub:
> + tcg_gen_setcond_tl(inv ? TCG_COND_GEU : TCG_COND_LTU, reg, t0, t1);
> + return;
It's a tad confusing that 'return' and 'break' are used in a seemingly
random fashion. How about repeating the last few lines for 'break'
cases, or setting 'inv' to false in 'return' cases?
Otherwise the patch looks correct.
> +
> + case CC_OP_SBBB:
> + case CC_OP_SBBW:
> + case CC_OP_SBBL:
> + case CC_OP_SBBQ:
> + /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
> + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + if (t1 == reg && reg == cpu_cc_src) {
> + tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
> + t1 = cpu_tmp0;
> + }
> +
> + tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
> + tcg_gen_addi_tl(reg, reg, 1);
> + gen_extu(size, reg);
> + t0 = reg;
> + goto adc_sbb;
> +
> + case CC_OP_ADCB:
> + case CC_OP_ADCW:
> + case CC_OP_ADCL:
> + case CC_OP_ADCQ:
> + /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
> + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
> + adc_sbb:
> + tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1);
> + return;
> +
> + case CC_OP_LOGICB:
> + case CC_OP_LOGICW:
> + case CC_OP_LOGICL:
> + case CC_OP_LOGICQ:
> + tcg_gen_movi_tl(reg, 0);
> + break;
> +
> + case CC_OP_INCB:
> + case CC_OP_INCW:
> + case CC_OP_INCL:
> + case CC_OP_INCQ:
> + case CC_OP_DECB:
> + case CC_OP_DECW:
> + case CC_OP_DECL:
> + case CC_OP_DECQ:
> + if (inv) {
> + tcg_gen_xori_tl(reg, cpu_cc_src, 1);
> + } else {
> + tcg_gen_mov_tl(reg, cpu_cc_src);
> + }
> + return;
> +
> + case CC_OP_SHLB:
> + case CC_OP_SHLW:
> + case CC_OP_SHLL:
> + case CC_OP_SHLQ:
> + /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
> + tcg_gen_shri_tl(reg, cpu_cc_src, (8 << size) - 1);
> + tcg_gen_andi_tl(reg, reg, 1);
> + break;
> +
> + case CC_OP_MULB:
> + case CC_OP_MULW:
> + case CC_OP_MULL:
> + case CC_OP_MULQ:
> + tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, cpu_cc_src, 0);
> + return;
> +
> + case CC_OP_SARB:
> + case CC_OP_SARW:
> + case CC_OP_SARL:
> + case CC_OP_SARQ:
> + case CC_OP_EFLAGS:
> + /* CC_SRC & 1 */
> + tcg_gen_andi_tl(reg, cpu_cc_src, 1);
> + break;
> +
> + default:
> + abort();
> + }
> + if (inv) {
> + tcg_gen_xori_tl(reg, reg, 1);
> + }
> +}
> +
> /* compute eflags.P to reg */
> static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
> {
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG
2012-10-06 12:30 ` [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG Paolo Bonzini
2012-10-07 19:35 ` Blue Swirl
@ 2012-10-09 20:07 ` Richard Henderson
2012-10-10 6:47 ` Paolo Bonzini
1 sibling, 1 reply; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 20:07 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> + case CC_OP_SUBB:
> + case CC_OP_SUBW:
> + case CC_OP_SUBL:
> + case CC_OP_SUBQ:
> + /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
I guess the & 3 makes the result "just so happen" to be right,
but I think the code would be more readable with "- SUBB" there.
And the other cases of the same pattern below.
> + case CC_OP_SBBB:
> + case CC_OP_SBBW:
> + case CC_OP_SBBL:
> + case CC_OP_SBBQ:
> + /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
> + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + if (t1 == reg && reg == cpu_cc_src) {
> + tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
> + t1 = cpu_tmp0;
> + }
> +
> + tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
> + tcg_gen_addi_tl(reg, reg, 1);
> + gen_extu(size, reg);
> + t0 = reg;
> + goto adc_sbb;
> +
> + case CC_OP_ADCB:
> + case CC_OP_ADCW:
> + case CC_OP_ADCL:
> + case CC_OP_ADCQ:
> + /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
> + size = (s->cc_op - CC_OP_ADDB) & 3;
> + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
> + adc_sbb:
> + tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1);
> + return;
There's no point in handling these, because you can never see them
assigned to s->cc_op. The ADC/SBB translators always set CC_OP_DYNAMIC
after dynamically selecting CC_OP_ADD or CC_OP_ADC based on the carry-in.
> + default:
> + abort();
Better to just treat unlisted codes as dynamic? I.e.
default: /* Including CC_OP_DYNAMIC */
gen_compute_eflags(s);
/* FALLTHRU */
case CC_OP_EFLAGS:
...
All that said, the patch as written is correct.
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG
2012-10-09 20:07 ` Richard Henderson
@ 2012-10-10 6:47 ` Paolo Bonzini
0 siblings, 0 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-10 6:47 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
Il 09/10/2012 22:07, Richard Henderson ha scritto:
>> > + case CC_OP_ADCB:
>> > + case CC_OP_ADCW:
>> > + case CC_OP_ADCL:
>> > + case CC_OP_ADCQ:
>> > + /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
>> > + size = (s->cc_op - CC_OP_ADDB) & 3;
>> > + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
>> > + t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
>> > + adc_sbb:
>> > + tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1);
>> > + return;
> There's no point in handling these, because you can never see them
> assigned to s->cc_op. The ADC/SBB translators always set CC_OP_DYNAMIC
> after dynamically selecting CC_OP_ADD or CC_OP_ADC based on the carry-in.
>
That's correct, but compared to
case CC_OP_ADCB:
case CC_OP_ADCW:
case CC_OP_ADCL:
case CC_OP_ADCQ:
case CC_OP_SBBB:
case CC_OP_SBBW:
case CC_OP_SBBL:
case CC_OP_SBBQ:
/* There's no point in handling these, because you can never
* see them assigned to s->cc_op. The ADC/SBB translators
* always set CC_OP_DYNAMIC after dynamically selecting
* CC_OP_ADD or CC_OP_ADC based on the carry-in.
*/
abort();
it's not a great saving and it's a bit less self-documenting...
Paolo
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 12/14] i386: change gen_setcc_slow_T0 to gen_setcc_slow
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (10 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 11/14] i386: convert gen_compute_eflags_c to TCG Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:36 ` Blue Swirl
2012-10-09 20:07 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 13/14] i386: optimize setbe Paolo Bonzini
2012-10-06 12:30 ` [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions Paolo Bonzini
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Do not hard code the destination register.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 39 ++++++++++++++++++++-------------------
1 file modificato, 20 inserzioni(+), 19 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 4561c9d..fb44839 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1016,50 +1016,51 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
}
}
-static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv)
+static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
{
+ assert(reg != cpu_cc_src);
switch(jcc_op) {
case JCC_O:
- gen_compute_eflags_o(s, cpu_T[0]);
+ gen_compute_eflags_o(s, reg);
break;
case JCC_B:
- gen_compute_eflags_c(s, cpu_T[0], inv);
+ gen_compute_eflags_c(s, reg, inv);
return;
case JCC_Z:
- gen_compute_eflags_z(s, cpu_T[0], inv);
+ gen_compute_eflags_z(s, reg, inv);
return;
case JCC_BE:
gen_compute_eflags(s);
- tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
- tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ tcg_gen_shri_tl(reg, cpu_cc_src, 6);
+ tcg_gen_or_tl(reg, reg, cpu_cc_src);
+ tcg_gen_andi_tl(reg, reg, 1);
break;
case JCC_S:
- gen_compute_eflags_s(s, cpu_T[0], inv);
+ gen_compute_eflags_s(s, reg, inv);
return;
case JCC_P:
- gen_compute_eflags_p(s, cpu_T[0]);
+ gen_compute_eflags_p(s, reg);
break;
case JCC_L:
gen_compute_eflags(s);
- tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+ tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
- tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ tcg_gen_xor_tl(reg, reg, cpu_tmp0);
+ tcg_gen_andi_tl(reg, reg, 1);
break;
default:
case JCC_LE:
gen_compute_eflags(s);
- tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+ tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
- tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
- tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
- tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
+ tcg_gen_xor_tl(reg, reg, cpu_tmp4);
+ tcg_gen_or_tl(reg, reg, cpu_tmp0);
+ tcg_gen_andi_tl(reg, reg, 1);
break;
}
if (inv) {
- tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
+ tcg_gen_xori_tl(reg, reg, 1);
}
}
@@ -1226,7 +1227,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1)
break;
default:
slow_jcc:
- gen_setcc_slow_T0(s, jcc_op, false);
+ gen_setcc_slow(s, jcc_op, cpu_T[0], false);
tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
cpu_T[0], 0, l1);
break;
@@ -2497,7 +2498,7 @@ static void gen_setcc(DisasContext *s, int b)
worth to */
inv = b & 1;
jcc_op = (b >> 1) & 7;
- gen_setcc_slow_T0(s, jcc_op, inv);
+ gen_setcc_slow(s, jcc_op, cpu_T[0], inv);
}
}
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 12/14] i386: change gen_setcc_slow_T0 to gen_setcc_slow
2012-10-06 12:30 ` [Qemu-devel] [PATCH 12/14] i386: change gen_setcc_slow_T0 to gen_setcc_slow Paolo Bonzini
@ 2012-10-07 19:36 ` Blue Swirl
2012-10-09 20:07 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:36 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Do not hard code the destination register.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 39 ++++++++++++++++++++-------------------
> 1 file modificato, 20 inserzioni(+), 19 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 4561c9d..fb44839 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -1016,50 +1016,51 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv)
> }
> }
>
> -static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv)
> +static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv)
> {
> + assert(reg != cpu_cc_src);
> switch(jcc_op) {
> case JCC_O:
> - gen_compute_eflags_o(s, cpu_T[0]);
> + gen_compute_eflags_o(s, reg);
> break;
> case JCC_B:
> - gen_compute_eflags_c(s, cpu_T[0], inv);
> + gen_compute_eflags_c(s, reg, inv);
> return;
> case JCC_Z:
> - gen_compute_eflags_z(s, cpu_T[0], inv);
> + gen_compute_eflags_z(s, reg, inv);
> return;
> case JCC_BE:
> gen_compute_eflags(s);
> - tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
> - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
> - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> + tcg_gen_shri_tl(reg, cpu_cc_src, 6);
> + tcg_gen_or_tl(reg, reg, cpu_cc_src);
> + tcg_gen_andi_tl(reg, reg, 1);
> break;
> case JCC_S:
> - gen_compute_eflags_s(s, cpu_T[0], inv);
> + gen_compute_eflags_s(s, reg, inv);
> return;
> case JCC_P:
> - gen_compute_eflags_p(s, cpu_T[0]);
> + gen_compute_eflags_p(s, reg);
> break;
> case JCC_L:
> gen_compute_eflags(s);
> - tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
> + tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
> tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
> - tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> + tcg_gen_xor_tl(reg, reg, cpu_tmp0);
> + tcg_gen_andi_tl(reg, reg, 1);
> break;
> default:
> case JCC_LE:
> gen_compute_eflags(s);
> - tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
> + tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */
> tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
> tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
> - tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
> - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> + tcg_gen_xor_tl(reg, reg, cpu_tmp4);
> + tcg_gen_or_tl(reg, reg, cpu_tmp0);
> + tcg_gen_andi_tl(reg, reg, 1);
> break;
> }
> if (inv) {
> - tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1);
> + tcg_gen_xori_tl(reg, reg, 1);
> }
> }
>
> @@ -1226,7 +1227,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1)
> break;
> default:
> slow_jcc:
> - gen_setcc_slow_T0(s, jcc_op, false);
> + gen_setcc_slow(s, jcc_op, cpu_T[0], false);
> tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE,
> cpu_T[0], 0, l1);
> break;
> @@ -2497,7 +2498,7 @@ static void gen_setcc(DisasContext *s, int b)
> worth to */
> inv = b & 1;
> jcc_op = (b >> 1) & 7;
> - gen_setcc_slow_T0(s, jcc_op, inv);
> + gen_setcc_slow(s, jcc_op, cpu_T[0], inv);
> }
> }
>
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 12/14] i386: change gen_setcc_slow_T0 to gen_setcc_slow
2012-10-06 12:30 ` [Qemu-devel] [PATCH 12/14] i386: change gen_setcc_slow_T0 to gen_setcc_slow Paolo Bonzini
2012-10-07 19:36 ` Blue Swirl
@ 2012-10-09 20:07 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 20:07 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> Do not hard code the destination register.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target-i386/translate.c | 39 ++++++++++++++++++++-------------------
> 1 file modificato, 20 inserzioni(+), 19 rimozioni(-)
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 13/14] i386: optimize setbe
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (11 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 12/14] i386: change gen_setcc_slow_T0 to gen_setcc_slow Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:43 ` Blue Swirl
2012-10-09 20:13 ` Richard Henderson
2012-10-06 12:30 ` [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions Paolo Bonzini
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
This is looking at EFLAGS, but it can do so more efficiently with
setcond.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 7 +++----
1 file modificato, 3 inserzioni(+), 4 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index fb44839..342b9ec 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1031,10 +1031,9 @@ static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool in
return;
case JCC_BE:
gen_compute_eflags(s);
- tcg_gen_shri_tl(reg, cpu_cc_src, 6);
- tcg_gen_or_tl(reg, reg, cpu_cc_src);
- tcg_gen_andi_tl(reg, reg, 1);
- break;
+ tcg_gen_andi_tl(reg, cpu_cc_src, 0x41);
+ tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
+ return;
case JCC_S:
gen_compute_eflags_s(s, reg, inv);
return;
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 13/14] i386: optimize setbe
2012-10-06 12:30 ` [Qemu-devel] [PATCH 13/14] i386: optimize setbe Paolo Bonzini
@ 2012-10-07 19:43 ` Blue Swirl
2012-10-09 20:13 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:43 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> This is looking at EFLAGS, but it can do so more efficiently with
> setcond.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Blue Swirl <blauwirbel@gmail.com>
> ---
> target-i386/translate.c | 7 +++----
> 1 file modificato, 3 inserzioni(+), 4 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index fb44839..342b9ec 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -1031,10 +1031,9 @@ static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool in
> return;
> case JCC_BE:
> gen_compute_eflags(s);
> - tcg_gen_shri_tl(reg, cpu_cc_src, 6);
> - tcg_gen_or_tl(reg, reg, cpu_cc_src);
> - tcg_gen_andi_tl(reg, reg, 1);
> - break;
> + tcg_gen_andi_tl(reg, cpu_cc_src, 0x41);
Symbolic names for the flags would be nice.
> + tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
> + return;
> case JCC_S:
> gen_compute_eflags_s(s, reg, inv);
> return;
> --
> 1.7.12.1
>
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 13/14] i386: optimize setbe
2012-10-06 12:30 ` [Qemu-devel] [PATCH 13/14] i386: optimize setbe Paolo Bonzini
2012-10-07 19:43 ` Blue Swirl
@ 2012-10-09 20:13 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 20:13 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> gen_compute_eflags(s);
> - tcg_gen_shri_tl(reg, cpu_cc_src, 6);
> - tcg_gen_or_tl(reg, reg, cpu_cc_src);
> - tcg_gen_andi_tl(reg, reg, 1);
> - break;
> + tcg_gen_andi_tl(reg, cpu_cc_src, 0x41);
> + tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0);
> + return;
This one's questionable. Whether it's an improvement really depends
on the host cpu isa. It's probably more of a benefit if we can avoid
the setcond completely. See the comments for the next patch.
That said, the patch produces correct code.
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions
2012-10-06 12:30 [Qemu-devel] [CFT PATCH 00/14] Improve handling of x86 condition codes (tcg) Paolo Bonzini
` (12 preceding siblings ...)
2012-10-06 12:30 ` [Qemu-devel] [PATCH 13/14] i386: optimize setbe Paolo Bonzini
@ 2012-10-06 12:30 ` Paolo Bonzini
2012-10-07 19:58 ` Blue Swirl
2012-10-09 20:22 ` Richard Henderson
13 siblings, 2 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-06 12:30 UTC (permalink / raw)
To: qemu-devel
Reconstruct the arguments for complex conditions involving CC_OP_SUBx (BE,
L, LE). In the others do it via setcond and gen_setcc_slow (which is
not that slow in many cases).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/translate.c | 93 +++++++++++++++++++------------------------------
1 file modificato, 36 inserzioni(+), 57 rimozioni(-)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 342b9ec..92e8291 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1063,55 +1063,55 @@ static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool in
}
}
-/* return true if setcc_slow is not needed (WARNING: must be kept in
- sync with gen_jcc1) */
-static int is_fast_jcc_case(DisasContext *s, int b)
+/* perform a conditional store into register 'reg' according to jump opcode
+ value 'b'. In the fast case, T0 is guaranted not to be used. */
+static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
{
- int jcc_op;
+ int inv, jcc_op, size, cond;
+ TCGv t0;
+
+ inv = b & 1;
jcc_op = (b >> 1) & 7;
+
switch(s->cc_op) {
- /* we optimize the cmp/jcc case */
+ /* we optimize relational operators for the cmp/jcc case */
case CC_OP_SUBB:
case CC_OP_SUBW:
case CC_OP_SUBL:
case CC_OP_SUBQ:
- if (jcc_op == JCC_O || jcc_op == JCC_P)
- goto slow_jcc;
- break;
-
- /* some jumps are easy to compute */
- case CC_OP_ADDB:
- case CC_OP_ADDW:
- case CC_OP_ADDL:
- case CC_OP_ADDQ:
-
- case CC_OP_LOGICB:
- case CC_OP_LOGICW:
- case CC_OP_LOGICL:
- case CC_OP_LOGICQ:
-
- case CC_OP_INCB:
- case CC_OP_INCW:
- case CC_OP_INCL:
- case CC_OP_INCQ:
+ size = s->cc_op - CC_OP_SUBB;
+ switch(jcc_op) {
+ case JCC_BE:
+ cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
+ tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+ gen_extu(size, cpu_tmp4);
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
+ tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
+ break;
- case CC_OP_DECB:
- case CC_OP_DECW:
- case CC_OP_DECL:
- case CC_OP_DECQ:
+ case JCC_L:
+ cond = inv ? TCG_COND_GE : TCG_COND_LT;
+ goto fast_jcc_l;
+ case JCC_LE:
+ cond = inv ? TCG_COND_GT : TCG_COND_LE;
+ fast_jcc_l:
+ tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
+ gen_exts(size, cpu_tmp4);
+ t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
+ tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
+ break;
- case CC_OP_SHLB:
- case CC_OP_SHLW:
- case CC_OP_SHLL:
- case CC_OP_SHLQ:
- if (jcc_op != JCC_Z && jcc_op != JCC_S)
+ default:
goto slow_jcc;
+ }
break;
+
default:
slow_jcc:
- return 0;
+ /* gen_setcc_slow actually generates good code for JC, JZ and JS */
+ gen_setcc_slow(s, jcc_op, reg, inv);
+ break;
}
- return 1;
}
/* generate a conditional jump to label 'l1' according to jump opcode
@@ -2477,28 +2477,7 @@ static inline void gen_jcc(DisasContext *s, int b,
static void gen_setcc(DisasContext *s, int b)
{
- int inv, jcc_op, l1;
- TCGv t0;
-
- if (is_fast_jcc_case(s, b)) {
- /* nominal case: we use a jump */
- /* XXX: make it faster by adding new instructions in TCG */
- t0 = tcg_temp_local_new();
- tcg_gen_movi_tl(t0, 0);
- l1 = gen_new_label();
- gen_jcc1(s, b ^ 1, l1);
- tcg_gen_movi_tl(t0, 1);
- gen_set_label(l1);
- tcg_gen_mov_tl(cpu_T[0], t0);
- tcg_temp_free(t0);
- } else {
- /* slow case: it is more efficient not to generate a jump,
- although it is questionnable whether this optimization is
- worth to */
- inv = b & 1;
- jcc_op = (b >> 1) & 7;
- gen_setcc_slow(s, jcc_op, cpu_T[0], inv);
- }
+ gen_setcc1(s, b, cpu_T[0]);
}
static inline void gen_op_movl_T0_seg(int seg_reg)
--
1.7.12.1
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions
2012-10-06 12:30 ` [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions Paolo Bonzini
@ 2012-10-07 19:58 ` Blue Swirl
2012-10-09 20:22 ` Richard Henderson
1 sibling, 0 replies; 43+ messages in thread
From: Blue Swirl @ 2012-10-07 19:58 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Reconstruct the arguments for complex conditions involving CC_OP_SUBx (BE,
> L, LE). In the others do it via setcond and gen_setcc_slow (which is
> not that slow in many cases).
I think it would be useful to reconstruct also for add, inc and dec
along the same lines, the others are probably not so often used.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> target-i386/translate.c | 93 +++++++++++++++++++------------------------------
> 1 file modificato, 36 inserzioni(+), 57 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 342b9ec..92e8291 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -1063,55 +1063,55 @@ static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool in
> }
> }
>
> -/* return true if setcc_slow is not needed (WARNING: must be kept in
> - sync with gen_jcc1) */
> -static int is_fast_jcc_case(DisasContext *s, int b)
> +/* perform a conditional store into register 'reg' according to jump opcode
> + value 'b'. In the fast case, T0 is guaranted not to be used. */
> +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
> {
> - int jcc_op;
> + int inv, jcc_op, size, cond;
> + TCGv t0;
> +
> + inv = b & 1;
> jcc_op = (b >> 1) & 7;
> +
> switch(s->cc_op) {
> - /* we optimize the cmp/jcc case */
> + /* we optimize relational operators for the cmp/jcc case */
> case CC_OP_SUBB:
> case CC_OP_SUBW:
> case CC_OP_SUBL:
> case CC_OP_SUBQ:
> - if (jcc_op == JCC_O || jcc_op == JCC_P)
> - goto slow_jcc;
> - break;
> -
> - /* some jumps are easy to compute */
> - case CC_OP_ADDB:
> - case CC_OP_ADDW:
> - case CC_OP_ADDL:
> - case CC_OP_ADDQ:
> -
> - case CC_OP_LOGICB:
> - case CC_OP_LOGICW:
> - case CC_OP_LOGICL:
> - case CC_OP_LOGICQ:
> -
> - case CC_OP_INCB:
> - case CC_OP_INCW:
> - case CC_OP_INCL:
> - case CC_OP_INCQ:
> + size = s->cc_op - CC_OP_SUBB;
> + switch(jcc_op) {
> + case JCC_BE:
> + cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
> + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
> + gen_extu(size, cpu_tmp4);
> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
> + break;
>
> - case CC_OP_DECB:
> - case CC_OP_DECW:
> - case CC_OP_DECL:
> - case CC_OP_DECQ:
> + case JCC_L:
> + cond = inv ? TCG_COND_GE : TCG_COND_LT;
> + goto fast_jcc_l;
> + case JCC_LE:
> + cond = inv ? TCG_COND_GT : TCG_COND_LE;
> + fast_jcc_l:
> + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
> + gen_exts(size, cpu_tmp4);
> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
> + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
> + break;
>
> - case CC_OP_SHLB:
> - case CC_OP_SHLW:
> - case CC_OP_SHLL:
> - case CC_OP_SHLQ:
> - if (jcc_op != JCC_Z && jcc_op != JCC_S)
> + default:
> goto slow_jcc;
> + }
> break;
> +
> default:
> slow_jcc:
> - return 0;
> + /* gen_setcc_slow actually generates good code for JC, JZ and JS */
> + gen_setcc_slow(s, jcc_op, reg, inv);
> + break;
> }
> - return 1;
> }
>
> /* generate a conditional jump to label 'l1' according to jump opcode
> @@ -2477,28 +2477,7 @@ static inline void gen_jcc(DisasContext *s, int b,
>
> static void gen_setcc(DisasContext *s, int b)
> {
> - int inv, jcc_op, l1;
> - TCGv t0;
> -
> - if (is_fast_jcc_case(s, b)) {
> - /* nominal case: we use a jump */
> - /* XXX: make it faster by adding new instructions in TCG */
> - t0 = tcg_temp_local_new();
> - tcg_gen_movi_tl(t0, 0);
> - l1 = gen_new_label();
> - gen_jcc1(s, b ^ 1, l1);
> - tcg_gen_movi_tl(t0, 1);
> - gen_set_label(l1);
> - tcg_gen_mov_tl(cpu_T[0], t0);
> - tcg_temp_free(t0);
> - } else {
> - /* slow case: it is more efficient not to generate a jump,
> - although it is questionnable whether this optimization is
> - worth to */
> - inv = b & 1;
> - jcc_op = (b >> 1) & 7;
> - gen_setcc_slow(s, jcc_op, cpu_T[0], inv);
> - }
> + gen_setcc1(s, b, cpu_T[0]);
> }
>
> static inline void gen_op_movl_T0_seg(int seg_reg)
> --
> 1.7.12.1
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions
2012-10-06 12:30 ` [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions Paolo Bonzini
2012-10-07 19:58 ` Blue Swirl
@ 2012-10-09 20:22 ` Richard Henderson
2012-10-10 6:51 ` Paolo Bonzini
1 sibling, 1 reply; 43+ messages in thread
From: Richard Henderson @ 2012-10-09 20:22 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: qemu-devel
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
> {
> + int inv, jcc_op, size, cond;
> + TCGv t0;
> +
> + inv = b & 1;
> jcc_op = (b >> 1) & 7;
> +
> switch(s->cc_op) {
> + /* we optimize relational operators for the cmp/jcc case */
> case CC_OP_SUBB:
> case CC_OP_SUBW:
> case CC_OP_SUBL:
> case CC_OP_SUBQ:
> + size = s->cc_op - CC_OP_SUBB;
> + switch(jcc_op) {
> + case JCC_BE:
> + cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
> + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
> + gen_extu(size, cpu_tmp4);
> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
> + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
> + break;
I don't think this patch is going in the right direction. In particular,
this is going to be largely redundant with gen_jcc1.
Instead, c.f. the DisasCompare structure now present in target-sparc/,
or a similar DisasCompare structure present in my jumbo target-s390x
patch set. Here we use common code to generate a comparison, which
can then be fed into brcond, setcond, or movcond as desired.
I think that this Compare structure should be fed to gen_compute_eflags_*
so that a parent gen_condition routine can make use of them for simple
conditions like z/nz.
At which point gen_jcc1 and gen_setcc1 become fairly trivial routines.
r~
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [Qemu-devel] [PATCH 14/14] i386: optimize setcc instructions
2012-10-09 20:22 ` Richard Henderson
@ 2012-10-10 6:51 ` Paolo Bonzini
0 siblings, 0 replies; 43+ messages in thread
From: Paolo Bonzini @ 2012-10-10 6:51 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
Il 09/10/2012 22:22, Richard Henderson ha scritto:
> On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
>> +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg)
>> {
>> + int inv, jcc_op, size, cond;
>> + TCGv t0;
>> +
>> + inv = b & 1;
>> jcc_op = (b >> 1) & 7;
>> +
>> switch(s->cc_op) {
>> + /* we optimize relational operators for the cmp/jcc case */
>> case CC_OP_SUBB:
>> case CC_OP_SUBW:
>> case CC_OP_SUBL:
>> case CC_OP_SUBQ:
>> + size = s->cc_op - CC_OP_SUBB;
>> + switch(jcc_op) {
>> + case JCC_BE:
>> + cond = inv ? TCG_COND_GTU : TCG_COND_LEU;
>> + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src);
>> + gen_extu(size, cpu_tmp4);
>> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
>> + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0);
>> + break;
>
> I don't think this patch is going in the right direction. In particular,
> this is going to be largely redundant with gen_jcc1.
Yes, it is. That's something I had started after posting this series,
but didn't finish in time for the weekend... :)
You can look at a few more changes in the eflags2 branch of my github
repo, including:
- delaying the actual generation of conditions, so that they can be used
in setcond/brcond/movcond
- optimization of setle/setl similar to setbe (shift OF onto SF, XOR,
mask to SF or SF+ZF, after which you can already do a brcond)
There are also TCG changes that add zero-bit tracking to optimize.c to
eliminate redundant ext (leading to both better code generation and
better copy propagation).
Paolo
> Instead, c.f. the DisasCompare structure now present in target-sparc/,
> or a similar DisasCompare structure present in my jumbo target-s390x
> patch set. Here we use common code to generate a comparison, which
> can then be fed into brcond, setcond, or movcond as desired.
>
> I think that this Compare structure should be fed to gen_compute_eflags_*
> so that a parent gen_condition routine can make use of them for simple
> conditions like z/nz.
>
> At which point gen_jcc1 and gen_setcc1 become fairly trivial routines.
>
>
> r~
>
>
^ permalink raw reply [flat|nested] 43+ messages in thread