From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1NLwbq-0003D9-W6 for qemu-devel@nongnu.org; Sat, 19 Dec 2009 05:32:27 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1NLwbl-0003BK-Lb for qemu-devel@nongnu.org; Sat, 19 Dec 2009 05:32:26 -0500 Received: from [199.232.76.173] (port=49052 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NLwbl-0003BE-DP for qemu-devel@nongnu.org; Sat, 19 Dec 2009 05:32:21 -0500 Received: from mail-yw0-f171.google.com ([209.85.211.171]:53667) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1NLwbl-0004uw-4m for qemu-devel@nongnu.org; Sat, 19 Dec 2009 05:32:21 -0500 Received: by ywh1 with SMTP id 1so3644659ywh.18 for ; Sat, 19 Dec 2009 02:32:19 -0800 (PST) MIME-Version: 1.0 In-Reply-To: <4259c837ce1a62fcb495e57f18b588eb7365d286.1261012798.git.rth@twiddle.net> References: <4259c837ce1a62fcb495e57f18b588eb7365d286.1261012798.git.rth@twiddle.net> From: Blue Swirl Date: Sat, 19 Dec 2009 10:31:59 +0000 Message-ID: Subject: Re: [Qemu-devel] [PATCH 5/7] tcg-sparc: Implement setcond, movcond, setcond2, brcond2. Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: qemu-devel@nongnu.org On Wed, Dec 16, 2009 at 11:26 PM, Richard Henderson wrote= : > An initial cut at conditional moves for the sparc backend. > > Untested, as I don't have sparc hardware and the build system > resists attempts at cross-compilation. I can try if you have a test case. > Note fixes to tcg_out_movi_imm32 (wrong check_fit_tl width), > use of TCG_TARGET_REG_BITS =3D=3D 64 tests instead of explicitly > checking for __sparc_v9__ everywhere. Good fixes. I think these should be in a different patch which could be app= lied. > - =C2=A0 =C2=A0tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR); > + =C2=A0 =C2=A0if (ret !=3D arg) > + =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH= _OR); > =C2=A0} This optimization is already handled at tcg-op.h:tcg_gen_mov_i32(). > =C2=A0static inline void tcg_out_movi_imm32(TCGContext *s, int ret, uint3= 2_t arg) > =C2=A0{ > - =C2=A0 =C2=A0if (check_fit_tl(arg, 12)) > + =C2=A0 =C2=A0if (check_fit_tl(arg, 13)) > =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_movi_imm13(s, ret, arg); IIRC sign extension prevents this. > =C2=A0static inline void tcg_out_movi(TCGContext *s, TCGType type, > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 int ret, tcg_target_long arg) > =C2=A0{ > -#if defined(__sparc_v9__) && !defined(__sparc_v8plus__) > - =C2=A0 =C2=A0if (!check_fit_tl(arg, 32) && (arg & ~0xffffffffULL) !=3D = 0) { > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_movi_imm32(s, TCG_REG_I4, arg >> 32)= ; > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32= , SHIFT_SLLX); > + =C2=A0 =C2=A0if (type =3D=3D TCG_TYPE_I32 || (arg & ~(tcg_target_long)0= xffffffff)) > =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_movi_imm32(s, ret, arg); > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH= _OR); > - =C2=A0 =C2=A0} else if (check_fit_tl(arg, 12)) > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_movi_imm13(s, ret, arg); > - =C2=A0 =C2=A0else { > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_sethi(s, ret, arg); > - =C2=A0 =C2=A0 =C2=A0 =C2=A0if (arg & 0x3ff) > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arithi(s, ret, ret, ar= g & 0x3ff, ARITH_OR); > + =C2=A0 =C2=A0else if (TCG_TARGET_REG_BITS =3D=3D 64) { > + =C2=A0 =C2=A0 =C2=A0 =C2=A0if (check_fit_tl(arg, 32)) { > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0/* Sign extended 32-bit consta= nts are formed with SETHI+XOR. =C2=A0*/ > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_sethi(s, ret, ~arg); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arithi(s, ret, ret, (a= rg & 0x3ff) | -0x400, ARITH_XOR); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0} else { > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_movi_imm32(s, TCG_REG_= I4, arg >> 32); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arithi(s, TCG_REG_I4, = TCG_REG_I4, 32, SHIFT_SLLX); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_movi_imm32(s, ret, arg= ); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arith(s, ret, ret, TCG= _REG_I4, ARITH_OR); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0} > =C2=A0 =C2=A0 } > -#else > - =C2=A0 =C2=A0tcg_out_movi_imm32(s, ret, arg); > -#endif > =C2=A0} Please split this also to another patch, it looks good. > + =C2=A0 =C2=A0 =C2=A0 =C2=A0int32_t val =3D l->u.value - (tcg_target_lon= g)s->code_ptr; > + =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x= 2) > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr))); > =C2=A0 =C2=A0 } else { > =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22= , label_index, 0); > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0)= | INSN_OP2(0x2) | 0)); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x= 2) | 0)); What instruction is this? A define would be in order. > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0)= | INSN_OP2(0x1) | > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0(0x5 << 19) | > + =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x= 1) | (0x5 << 19) | > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 INSN_OFF19(l->u.value - (unsigned long)s->code_ptr))); > =C2=A0 =C2=A0 } else { > =C2=A0 =C2=A0 =C2=A0 =C2=A0 tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19= , label_index, 0); > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0)= | INSN_OP2(0x1) | > - =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 = =C2=A0(0x5 << 19) | 0)); > + =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out32(s, (INSN_OP(0) | opc | INSN_OP2(0x= 1) | (0x5 << 19) | 0)); Same here. > =C2=A0static void tcg_out_brcond_i32(TCGContext *s, int cond, > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0TCGArg arg1, TCGArg arg2, int const_a= rg2, > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0int label_index) > =C2=A0{ > - =C2=A0 =C2=A0if (const_arg2 && arg2 =3D=3D 0) > - =C2=A0 =C2=A0 =C2=A0 =C2=A0/* orcc %g0, r, %g0 */ > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arith(s, TCG_REG_G0, TCG_REG_G0, arg= 1, ARITH_ORCC); > - =C2=A0 =C2=A0else > - =C2=A0 =C2=A0 =C2=A0 =C2=A0/* subcc r1, r2, %g0 */ > - =C2=A0 =C2=A0 =C2=A0 =C2=A0tcg_out_arith(s, TCG_REG_G0, arg1, arg2, ARI= TH_SUBCC); > - =C2=A0 =C2=A0tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index= ); > + =C2=A0 =C2=A0tcg_out_cmp(s, arg1, arg2, const_arg2); What's wrong with 'orcc' (produces the synthetic instruction 'tst')?