From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1NM8mz-0004Pb-ER for qemu-devel@nongnu.org; Sat, 19 Dec 2009 18:32:45 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1NM8mu-0004NW-Px for qemu-devel@nongnu.org; Sat, 19 Dec 2009 18:32:45 -0500 Received: from [199.232.76.173] (port=48687 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NM8mu-0004NL-CO for qemu-devel@nongnu.org; Sat, 19 Dec 2009 18:32:40 -0500 Received: from mail-pz0-f188.google.com ([209.85.222.188]:47036) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1NM8mt-00034p-To for qemu-devel@nongnu.org; Sat, 19 Dec 2009 18:32:40 -0500 Received: by pzk26 with SMTP id 26so2414252pzk.4 for ; Sat, 19 Dec 2009 15:32:37 -0800 (PST) MIME-Version: 1.0 In-Reply-To: <9f38711e7d93ded68cad15e5cdbeecbf2c9ae521.1261248772.git.rth@twiddle.net> References: <9f38711e7d93ded68cad15e5cdbeecbf2c9ae521.1261248772.git.rth@twiddle.net> Date: Sun, 20 Dec 2009 00:32:37 +0100 Message-ID: <761ea48b0912191532n4aaf019drd2fc5d06f74b6fe7@mail.gmail.com> Subject: Re: [Qemu-devel] [PATCH 3/5] tcg-i386: Implement small forward branches. From: Laurent Desnogues Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: qemu-devel@nongnu.org, aurelien@aurel32.net On Sat, Dec 19, 2009 at 7:44 PM, Richard Henderson wrote: > There are places, like brcond2, where we know that the destination > of a forward branch will be within 127 bytes. > > Add the R_386_PC8 relocation type to support this. =A0Add a flag to > tcg_out_jxx and tcg_out_brcond* to enable it. =A0Set the flag in the > brcond2 label_next branches; pass along the input flag otherwise. > > Signed-off-by: Richard Henderson > > --- > =A0elf.h =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 | =A0 =A02 + > =A0tcg/i386/tcg-target.c | =A0116 +++++++++++++++++++++++++++++++++------= ---------- > =A02 files changed, 80 insertions(+), 38 deletions(-) > > diff --git a/elf.h b/elf.h > index 11674d7..c84c8ab 100644 > --- a/elf.h > +++ b/elf.h > @@ -243,6 +243,8 @@ typedef struct { > =A0#define R_386_GOTOFF =A0 9 > =A0#define R_386_GOTPC =A0 =A010 > =A0#define R_386_NUM =A0 =A0 =A011 > +/* Not a dynamic reloc, so not included in R_386_NUM. =A0Used in TCG. = =A0*/ > +#define R_386_PC8 =A0 =A0 =A023 > > =A0#define R_MIPS_NONE =A0 =A0 =A0 =A0 =A0 =A00 > =A0#define R_MIPS_16 =A0 =A0 =A0 =A0 =A0 =A0 =A01 > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 972b102..4c42caf 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -61,6 +61,12 @@ static void patch_reloc(uint8_t *code_ptr, int type, > =A0 =A0 case R_386_PC32: > =A0 =A0 =A0 =A0 *(uint32_t *)code_ptr =3D value - (long)code_ptr; > =A0 =A0 =A0 =A0 break; > + =A0 =A0case R_386_PC8: > + =A0 =A0 =A0 =A0value -=3D (long)code_ptr; > + =A0 =A0 =A0 =A0if (value !=3D (int8_t)value) > + =A0 =A0 =A0 =A0 =A0 =A0tcg_abort(); > + =A0 =A0 =A0 =A0*(uint8_t *)code_ptr =3D value; > + =A0 =A0 =A0 =A0break; > =A0 =A0 default: > =A0 =A0 =A0 =A0 tcg_abort(); > =A0 =A0 } > @@ -305,7 +311,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_= target_long val) > =A0 =A0 =A0 =A0 tgen_arithi(s, ARITH_ADD, reg, val, 0); > =A0} > > -static void tcg_out_jxx(TCGContext *s, int opc, int label_index) > +/* Use SMALL !=3D 0 to force a short forward branch. =A0*/ > +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int sma= ll) > =A0{ > =A0 =A0 int32_t val, val1; > =A0 =A0 TCGLabel *l =3D &s->labels[label_index]; > @@ -314,12 +321,16 @@ static void tcg_out_jxx(TCGContext *s, int opc, int= label_index) > =A0 =A0 =A0 =A0 val =3D l->u.value - (tcg_target_long)s->code_ptr; > =A0 =A0 =A0 =A0 val1 =3D val - 2; > =A0 =A0 =A0 =A0 if ((int8_t)val1 =3D=3D val1) { > - =A0 =A0 =A0 =A0 =A0 =A0if (opc =3D=3D -1) > + =A0 =A0 =A0 =A0 =A0 =A0if (opc =3D=3D -1) { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 tcg_out8(s, 0xeb); > - =A0 =A0 =A0 =A0 =A0 =A0else > + =A0 =A0 =A0 =A0 =A0 =A0} else { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 tcg_out8(s, 0x70 + opc); > + =A0 =A0 =A0 =A0 =A0 =A0} > =A0 =A0 =A0 =A0 =A0 =A0 tcg_out8(s, val1); > =A0 =A0 =A0 =A0 } else { > + =A0 =A0 =A0 =A0 =A0 =A0if (small) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0tcg_abort(); > + =A0 =A0 =A0 =A0 =A0 =A0} > =A0 =A0 =A0 =A0 =A0 =A0 if (opc =3D=3D -1) { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 tcg_out8(s, 0xe9); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 tcg_out32(s, val - 5); > @@ -329,6 +340,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, int = label_index) > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 tcg_out32(s, val - 6); > =A0 =A0 =A0 =A0 =A0 =A0 } > =A0 =A0 =A0 =A0 } > + =A0 =A0} else if (small) { > + =A0 =A0 =A0 =A0if (opc =3D=3D -1) { > + =A0 =A0 =A0 =A0 =A0 =A0tcg_out8(s, 0xeb); > + =A0 =A0 =A0 =A0} else { > + =A0 =A0 =A0 =A0 =A0 =A0tcg_out8(s, 0x70 + opc); > + =A0 =A0 =A0 =A0} > + =A0 =A0 =A0 =A0tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1= ); > + =A0 =A0 =A0 =A0s->code_ptr +=3D 1; > =A0 =A0 } else { > =A0 =A0 =A0 =A0 if (opc =3D=3D -1) { > =A0 =A0 =A0 =A0 =A0 =A0 tcg_out8(s, 0xe9); > @@ -343,7 +362,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int l= abel_index) > > =A0static void tcg_out_brcond(TCGContext *s, int cond, > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0TCGArg arg1, TCGAr= g arg2, int const_arg2, > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int label_index) > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 int label_index, in= t small) > =A0{ > =A0 =A0 if (const_arg2) { > =A0 =A0 =A0 =A0 if (arg2 =3D=3D 0) { > @@ -355,64 +374,84 @@ static void tcg_out_brcond(TCGContext *s, int cond, > =A0 =A0 } else { > =A0 =A0 =A0 =A0 tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1); > =A0 =A0 } > - =A0 =A0tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index); > + =A0 =A0tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); > =A0} > > =A0/* XXX: we implement it at the target level to avoid having to > =A0 =A0handle cross basic blocks temporaries */ > -static void tcg_out_brcond2(TCGContext *s, > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0const TCGArg *ar= gs, const int *const_args) > +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0const int *const= _args, int small) > =A0{ > =A0 =A0 int label_next; > =A0 =A0 label_next =3D gen_new_label(); > =A0 =A0 switch(args[4]) { > =A0 =A0 case TCG_COND_EQ: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_a= rgs[2], label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_a= rgs[3], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_a= rgs[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 label_next, small); Shouldn't it be 1 instead of small? The rest is OK. Laurent > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_a= rgs[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_NE: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_a= rgs[2], args[5]); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_a= rgs[3], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_a= rgs[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_a= rgs[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_LT: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_a= rgs[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_a= rgs[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_LE: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_a= rgs[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_a= rgs[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_GT: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_a= rgs[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_a= rgs[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_GE: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_a= rgs[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_a= rgs[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_LTU: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_= args[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_= args[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_LEU: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_= args[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_= args[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_GTU: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_= args[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_= args[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case TCG_COND_GEU: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_= args[3], args[5]); > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next); > - =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_= args[2], args[5]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_= args[3], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JNE, label_next, 1); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_= args[2], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[5], small); > =A0 =A0 =A0 =A0 break; > =A0 =A0 default: > =A0 =A0 =A0 =A0 tcg_abort(); > @@ -913,7 +952,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, > =A0 =A0 =A0 =A0 } > =A0 =A0 =A0 =A0 break; > =A0 =A0 case INDEX_op_br: > - =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JMP, args[0]); > + =A0 =A0 =A0 =A0tcg_out_jxx(s, JCC_JMP, args[0], 0); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case INDEX_op_movi_i32: > =A0 =A0 =A0 =A0 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); > @@ -1044,10 +1083,11 @@ static inline void tcg_out_op(TCGContext *s, int = opc, > =A0 =A0 =A0 =A0 =A0 =A0 tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5]= , args[1]); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case INDEX_op_brcond_i32: > - =A0 =A0 =A0 =A0tcg_out_brcond(s, args[2], args[0], args[1], const_args[= 1], args[3]); > + =A0 =A0 =A0 =A0tcg_out_brcond(s, args[2], args[0], args[1], const_args[= 1], > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args[3], 0); > =A0 =A0 =A0 =A0 break; > =A0 =A0 case INDEX_op_brcond2_i32: > - =A0 =A0 =A0 =A0tcg_out_brcond2(s, args, const_args); > + =A0 =A0 =A0 =A0tcg_out_brcond2(s, args, const_args, 0); > =A0 =A0 =A0 =A0 break; > > =A0 =A0 case INDEX_op_bswap16_i32: > -- > 1.6.5.2 > > > >