From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1N1faq-0008JY-6K for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:36 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1N1fai-0008EM-AO for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:32 -0400 Received: from [199.232.76.173] (port=58458 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1N1fah-0008EE-JZ for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:27 -0400 Received: from smtp.nokia.com ([192.100.122.230]:58028 helo=mgw-mx03.nokia.com) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1N1fag-0004Ig-Rd for qemu-devel@nongnu.org; Sat, 24 Oct 2009 08:19:27 -0400 Received: from esebh105.NOE.Nokia.com (esebh105.ntc.nokia.com [172.21.138.211]) by mgw-mx03.nokia.com (Switch-3.3.3/Switch-3.3.3) with ESMTP id n9OCJH3M011602 for ; Sat, 24 Oct 2009 15:19:24 +0300 Received: from localhost.localdomain (essapo-nirac252105.europe.nokia.com [10.162.252.105]) by mgw-sa02.ext.nokia.com (Switch-3.3.3/Switch-3.3.3) with ESMTP id n9OCJ8qF022164 for ; Sat, 24 Oct 2009 15:19:20 +0300 From: juha.riihimaki@nokia.com Date: Sat, 24 Oct 2009 15:19:08 +0300 Message-Id: <1256386749-85299-10-git-send-email-juha.riihimaki@nokia.com> In-Reply-To: <1256386749-85299-1-git-send-email-juha.riihimaki@nokia.com> References: <1256386749-85299-1-git-send-email-juha.riihimaki@nokia.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Subject: [Qemu-devel] [PATCH v2 09/10] target-arm: optimize neon vld/vst ops List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org From: Juha Riihim=C3=A4ki Reduce the amount of TCG ops generated from NEON vld/vst instructions by simplifying the code generation. Signed-off-by: Juha Riihim=C3=A4ki --- target-arm/translate.c | 67 ++++++++++++++++++++++++------------------= ----- 1 files changed, 34 insertions(+), 33 deletions(-) diff --git a/target-arm/translate.c b/target-arm/translate.c index f262758..55d6377 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -3708,6 +3708,7 @@ static int disas_neon_ls_insn(CPUState * env, Disas= Context *s, uint32_t insn) TCGv tmp; TCGv tmp2; TCGv_i64 tmp64; + TCGv stride_var; =20 if (!vfp_enabled(env)) return 1; @@ -3729,6 +3730,7 @@ static int disas_neon_ls_insn(CPUState * env, Disas= Context *s, uint32_t insn) return 1; load_reg_var(s, addr, rn); stride =3D (1 << size) * interleave; + stride_var =3D tcg_const_i32(stride); for (reg =3D 0; reg < nregs; reg++) { if (interleave > 2 || (interleave =3D=3D 2 && nregs =3D=3D 2= )) { load_reg_var(s, addr, rn); @@ -3747,7 +3749,7 @@ static int disas_neon_ls_insn(CPUState * env, Disas= Context *s, uint32_t insn) neon_load_reg64(tmp64, rd); gen_st64(tmp64, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_var); } else { for (pass =3D 0; pass < 2; pass++) { if (size =3D=3D 2) { @@ -3758,58 +3760,57 @@ static int disas_neon_ls_insn(CPUState * env, Dis= asContext *s, uint32_t insn) tmp =3D neon_load_reg(rd, pass); gen_st32(tmp, addr, IS_USER(s)); } - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_var); } else if (size =3D=3D 1) { if (load) { tmp =3D gen_ld16u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_add_i32(addr, addr, stride_var); tmp2 =3D gen_ld16u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - gen_bfi(tmp, tmp, tmp2, 16, 0xffff); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shli_i32(tmp2, tmp2, 16); + tcg_gen_or_i32(tmp, tmp, tmp2); dead_tmp(tmp2); neon_store_reg(rd, pass, tmp); } else { tmp =3D neon_load_reg(rd, pass); - tmp2 =3D new_tmp(); - tcg_gen_shri_i32(tmp2, tmp, 16); - gen_st16(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - gen_st16(tmp2, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tcg_gen_qemu_st16(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shri_i32(tmp, tmp, 16); + tcg_gen_qemu_st16(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + dead_tmp(tmp); } } else /* size =3D=3D 0 */ { if (load) { - TCGV_UNUSED(tmp2); - for (n =3D 0; n < 4; n++) { - tmp =3D gen_ld8u(addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); - if (n =3D=3D 0) { - tmp2 =3D tmp; - } else { - gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff= ); - dead_tmp(tmp); - } + tmp =3D gen_ld8u(addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + for (n =3D 1; n < 4; n++) { + tmp2 =3D gen_ld8u(addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shli_i32(tmp2, tmp2, n * 8); + tcg_gen_or_i32(tmp, tmp, tmp2); + dead_tmp(tmp2); } - neon_store_reg(rd, pass, tmp2); + neon_store_reg(rd, pass, tmp); } else { - tmp2 =3D neon_load_reg(rd, pass); - for (n =3D 0; n < 4; n++) { - tmp =3D new_tmp(); - if (n =3D=3D 0) { - tcg_gen_mov_i32(tmp, tmp2); - } else { - tcg_gen_shri_i32(tmp, tmp2, n * 8); - } - gen_st8(tmp, addr, IS_USER(s)); - tcg_gen_addi_i32(addr, addr, stride); + tmp2 =3D tcg_const_i32(8); + tmp =3D neon_load_reg(rd, pass); + for (n =3D 0; n < 3; n++) { + tcg_gen_qemu_st8(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + tcg_gen_shr_i32(tmp, tmp, tmp2); } - dead_tmp(tmp2); + tcg_gen_qemu_st8(tmp, addr, IS_USER(s)); + tcg_gen_add_i32(addr, addr, stride_var); + dead_tmp(tmp); + tcg_temp_free_i32(tmp2); } } } } rd +=3D spacing; } + tcg_temp_free_i32(stride_var); stride =3D nregs * 8; } else { size =3D (insn >> 10) & 3; --=20 1.6.5