From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55996) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1cKdWa-0007AE-Lf for qemu-devel@nongnu.org; Fri, 23 Dec 2016 23:01:37 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1cKdWZ-00068i-Pi for qemu-devel@nongnu.org; Fri, 23 Dec 2016 23:01:36 -0500 Received: from mail-pg0-x244.google.com ([2607:f8b0:400e:c05::244]:34866) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1cKdWZ-00067g-IP for qemu-devel@nongnu.org; Fri, 23 Dec 2016 23:01:35 -0500 Received: by mail-pg0-x244.google.com with SMTP id i5so2481159pgh.2 for ; Fri, 23 Dec 2016 20:01:35 -0800 (PST) Received: from bigtime.domain ([2602:47:d954:1500:5e51:4fff:fe40:9c64]) by smtp.gmail.com with ESMTPSA id n25sm65339316pfi.33.2016.12.23.20.01.33 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 23 Dec 2016 20:01:34 -0800 (PST) Sender: Richard Henderson From: Richard Henderson Date: Fri, 23 Dec 2016 20:00:40 -0800 Message-Id: <20161224040042.12654-64-rth@twiddle.net> In-Reply-To: <20161224040042.12654-1-rth@twiddle.net> References: <20161224040042.12654-1-rth@twiddle.net> Subject: [Qemu-devel] [PATCH 63/65] tcg: Use ctpop to generate ctz if needed List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Particularly when andc is also available, this is two insns shorter than using clz to compute ctz. Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 100 +++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 60 insertions(+), 40 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 6f4b1b6..95a39b7 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -497,33 +497,27 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) tcg_gen_extrl_i64_i32(ret, t1); tcg_temp_free_i64(t1); tcg_temp_free_i64(t2); - } else if (TCG_TARGET_HAS_clz_i32) { - TCGv_i32 t1 = tcg_temp_new_i32(); - TCGv_i32 t2 = tcg_temp_new_i32(); - tcg_gen_neg_i32(t1, arg1); - tcg_gen_xori_i32(t2, arg2, 31); - tcg_gen_and_i32(t1, t1, arg1); - tcg_gen_clz_i32(ret, t1, t2); - tcg_temp_free_i32(t1); - tcg_temp_free_i32(t2); - tcg_gen_xori_i32(ret, ret, 31); - } else if (TCG_TARGET_HAS_clz_i64) { - TCGv_i32 t1 = tcg_temp_new_i32(); - TCGv_i32 t2 = tcg_temp_new_i32(); - TCGv_i64 x1 = tcg_temp_new_i64(); - TCGv_i64 x2 = tcg_temp_new_i64(); - tcg_gen_neg_i32(t1, arg1); - tcg_gen_xori_i32(t2, arg2, 63); - tcg_gen_and_i32(t1, t1, arg1); - tcg_gen_extu_i32_i64(x1, t1); - tcg_gen_extu_i32_i64(x2, t2); - tcg_temp_free_i32(t1); - tcg_temp_free_i32(t2); - tcg_gen_clz_i64(x1, x1, x2); - tcg_gen_extrl_i64_i32(ret, x1); - tcg_temp_free_i64(x1); - tcg_temp_free_i64(x2); - tcg_gen_xori_i32(ret, ret, 63); + } else if (TCG_TARGET_HAS_ctpop_i32 + || TCG_TARGET_HAS_ctpop_i64 + || TCG_TARGET_HAS_clz_i32 + || TCG_TARGET_HAS_clz_i64) { + TCGv_i32 z, t = tcg_temp_new_i32(); + + if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) { + tcg_gen_subi_i32(t, arg1, 1); + tcg_gen_andc_i32(t, t, arg1); + tcg_gen_ctpop_i32(t, t); + } else { + /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */ + tcg_gen_neg_i32(t, arg1); + tcg_gen_and_i32(t, t, arg1); + tcg_gen_clzi_i32(t, t, 32); + tcg_gen_xori_i32(t, t, 31); + } + z = tcg_const_i32(0); + tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i32(t); + tcg_temp_free_i32(z); } else { gen_helper_ctz_i32(ret, arg1, arg2); } @@ -531,9 +525,18 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) { - TCGv_i32 t = tcg_const_i32(arg2); - tcg_gen_ctz_i32(ret, arg1, t); - tcg_temp_free_i32(t); + if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { + /* This equivalence has the advantage of not requiring a fixup. */ + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_subi_i32(t, arg1, 1); + tcg_gen_andc_i32(t, t, arg1); + tcg_gen_ctpop_i32(ret, t); + tcg_temp_free_i32(t); + } else { + TCGv_i32 t = tcg_const_i32(arg2); + tcg_gen_ctz_i32(ret, arg1, t); + tcg_temp_free_i32(t); + } } void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) @@ -1842,16 +1845,24 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_ctz_i64) { tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); - } else if (TCG_TARGET_HAS_clz_i64) { - TCGv_i64 t1 = tcg_temp_new_i64(); - TCGv_i64 t2 = tcg_temp_new_i64(); - tcg_gen_neg_i64(t1, arg1); - tcg_gen_xori_i64(t2, arg2, 63); - tcg_gen_and_i64(t1, t1, arg1); - tcg_gen_clz_i64(ret, t1, t2); - tcg_temp_free_i64(t1); - tcg_temp_free_i64(t2); - tcg_gen_xori_i64(ret, ret, 63); + } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) { + TCGv_i64 z, t = tcg_temp_new_i64(); + + if (TCG_TARGET_HAS_ctpop_i64) { + tcg_gen_subi_i64(t, arg1, 1); + tcg_gen_andc_i64(t, t, arg1); + tcg_gen_ctpop_i64(t, t); + } else { + /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */ + tcg_gen_neg_i64(t, arg1); + tcg_gen_and_i64(t, t, arg1); + tcg_gen_clzi_i64(t, t, 64); + tcg_gen_xori_i64(t, t, 63); + } + z = tcg_const_i64(0); + tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i64(t); + tcg_temp_free_i64(z); } else { gen_helper_ctz_i64(ret, arg1, arg2); } @@ -1868,6 +1879,15 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); tcg_temp_free_i32(t32); + } else if (!TCG_TARGET_HAS_ctz_i64 + && TCG_TARGET_HAS_ctpop_i64 + && arg2 == 64) { + /* This equivalence has the advantage of not requiring a fixup. */ + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_subi_i64(t, arg1, 1); + tcg_gen_andc_i64(t, t, arg1); + tcg_gen_ctpop_i64(ret, t); + tcg_temp_free_i64(t); } else { TCGv_i64 t64 = tcg_const_i64(arg2); tcg_gen_ctz_i64(ret, arg1, t64); -- 2.9.3