From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([208.118.235.92]:47214) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TE4je-00016F-8J for qemu-devel@nongnu.org; Tue, 18 Sep 2012 16:49:35 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TE4jc-0004bT-N5 for qemu-devel@nongnu.org; Tue, 18 Sep 2012 16:49:34 -0400 Received: from cantor2.suse.de ([195.135.220.15]:33969 helo=mx2.suse.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TE4jc-0004aU-9q for qemu-devel@nongnu.org; Tue, 18 Sep 2012 16:49:32 -0400 Message-ID: <5058DE58.9080006@suse.de> Date: Tue, 18 Sep 2012 22:49:28 +0200 From: Alexander Graf MIME-Version: 1.0 References: <1347224784-19472-1-git-send-email-rth@twiddle.net> <1347224784-19472-48-git-send-email-rth@twiddle.net> In-Reply-To: <1347224784-19472-48-git-send-email-rth@twiddle.net> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 047/126] target-s390: Convert LOAD, STORE MULTIPLE List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: qemu-devel@nongnu.org On 09/09/2012 11:05 PM, Richard Henderson wrote: > Signed-off-by: Richard Henderson > --- > target-s390x/insn-data.def | 14 ++++ > target-s390x/translate.c | 191 ++++++++++++++++++++++++--------------------- > 2 files changed, 116 insertions(+), 89 deletions(-) > > diff --git a/target-s390x/insn-data.def b/target-s390x/insn-data.def > index 103c1d2..b1f0448 100644 > --- a/target-s390x/insn-data.def > +++ b/target-s390x/insn-data.def > @@ -269,6 +269,13 @@ > C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64) > C(0xb910, LPGFR, RRE, Z, 0, r2_32s, r1, 0, abs, abs64) > > +/* LOAD MULTIPLE */ > + C(0x9800, LM, RS_a, Z, 0, a2, 0, 0, lm32, 0) > + C(0xeb98, LMY, RSY_a, LD, 0, a2, 0, 0, lm32, 0) > + C(0xeb04, LMG, RSY_a, Z, 0, a2, 0, 0, lm64, 0) > +/* LOAD MULTIPLE HIGH */ > + C(0xeb96, LMH, RSY_a, Z, 0, a2, 0, 0, lmh, 0) > + > /* MOVE LONG */ > C(0x0e00, MVCL, RR_a, Z, 0, 0, 0, 0, mvcl, 0) > > @@ -364,6 +371,13 @@ > /* STORE HALFWORD RELATIVE LONG */ > C(0xc407, STHRL, RIL_b, GIE, r1_o, ri2, 0, 0, st16, 0) > > +/* STORE MULTIPLE */ > + D(0x9000, STM, RS_a, Z, 0, a2, 0, 0, stm, 0, 4) > + D(0xeb90, STMY, RSY_a, LD, 0, a2, 0, 0, stm, 0, 4) > + D(0xeb24, STMG, RSY_a, Z, 0, a2, 0, 0, stm, 0, 8) > +/* STORE MULTIPLE HIGH */ > + C(0xeb26, STMH, RSY_a, Z, 0, a2, 0, 0, stmh, 0) > + > /* SUBTRACT */ > C(0x1b00, SR, RR_a, Z, r1, r2, new, r1_32, sub, subs32) > C(0xb9f9, SRK, RRF_a, DO, r2, r3, new, r1_32, sub, subs32) > diff --git a/target-s390x/translate.c b/target-s390x/translate.c > index e78e4bb..4c5540d 100644 > --- a/target-s390x/translate.c > +++ b/target-s390x/translate.c > @@ -271,6 +271,11 @@ static inline void store_reg32_i64(int reg, TCGv_i64 v) > tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32); > } > > +static inline void store_reg32h_i64(int reg, TCGv_i64 v) > +{ > + tcg_gen_deposit_i64(regs[reg], regs[reg], v, 32, 32); > +} > + > static inline void store_reg16(int reg, TCGv_i32 v) > { > /* 16 bit register writes keep the upper bytes */ > @@ -1320,65 +1325,12 @@ static void disas_e5(DisasContext* s, uint64_t insn) > > static void disas_eb(DisasContext *s, int op, int r1, int r3, int b2, int d2) > { > - TCGv_i64 tmp, tmp2, tmp3, tmp4; > + TCGv_i64 tmp, tmp2; > TCGv_i32 tmp32_1, tmp32_2; > - int i, stm_len; > > LOG_DISAS("disas_eb: op 0x%x r1 %d r3 %d b2 %d d2 0x%x\n", > op, r1, r3, b2, d2); > switch (op) { > - case 0x4: /* LMG R1,R3,D2(B2) [RSE] */ > - case 0x24: /* STMG R1,R3,D2(B2) [RSE] */ > - stm_len = 8; > - goto do_mh; > - case 0x26: /* STMH R1,R3,D2(B2) [RSE] */ > - case 0x96: /* LMH R1,R3,D2(B2) [RSE] */ > - stm_len = 4; > -do_mh: > - /* Apparently, unrolling lmg/stmg of any size gains performance - > - even for very long ones... */ > - tmp = get_address(s, 0, b2, d2); > - tmp3 = tcg_const_i64(stm_len); > - tmp4 = tcg_const_i64(op == 0x26 ? 32 : 4); > - for (i = r1;; i = (i + 1) % 16) { > - switch (op) { > - case 0x4: > - tcg_gen_qemu_ld64(regs[i], tmp, get_mem_index(s)); > - break; > - case 0x96: > - tmp2 = tcg_temp_new_i64(); > -#if HOST_LONG_BITS == 32 > - tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s)); > - tcg_gen_trunc_i64_i32(TCGV_HIGH(regs[i]), tmp2); > -#else > - tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s)); > - tcg_gen_shl_i64(tmp2, tmp2, tmp4); > - tcg_gen_ext32u_i64(regs[i], regs[i]); > - tcg_gen_or_i64(regs[i], regs[i], tmp2); > -#endif > - tcg_temp_free_i64(tmp2); > - break; > - case 0x24: > - tcg_gen_qemu_st64(regs[i], tmp, get_mem_index(s)); > - break; > - case 0x26: > - tmp2 = tcg_temp_new_i64(); > - tcg_gen_shr_i64(tmp2, regs[i], tmp4); > - tcg_gen_qemu_st32(tmp2, tmp, get_mem_index(s)); > - tcg_temp_free_i64(tmp2); > - break; > - default: > - tcg_abort(); > - } > - if (i == r3) { > - break; > - } > - tcg_gen_add_i64(tmp, tmp, tmp3); > - } > - tcg_temp_free_i64(tmp); > - tcg_temp_free_i64(tmp3); > - tcg_temp_free_i64(tmp4); > - break; > case 0x2c: /* STCMH R1,M3,D2(B2) [RSY] */ > tmp = get_address(s, 0, b2, d2); > tmp32_1 = tcg_const_i32(r1); > @@ -2270,44 +2222,17 @@ static void disas_b9(DisasContext *s, int op, int r1, int r2) > > static void disas_s390_insn(DisasContext *s) > { > - TCGv_i64 tmp, tmp2, tmp3, tmp4; > + TCGv_i64 tmp, tmp2; > TCGv_i32 tmp32_1, tmp32_2; > unsigned char opc; > uint64_t insn; > - int op, r1, r2, r3, d1, d2, x2, b1, b2, i, i2, r1b; > + int op, r1, r2, r3, d1, d2, x2, b1, b2, i2, r1b; > TCGv_i32 vl; > > opc = cpu_ldub_code(cpu_single_env, s->pc); > LOG_DISAS("opc 0x%x\n", opc); > > switch (opc) { > - case 0x98: /* LM R1,R3,D2(B2) [RS] */ > - case 0x90: /* STM R1,R3,D2(B2) [RS] */ > - insn = ld_code4(s->pc); > - decode_rs(s, insn, &r1, &r3, &b2, &d2); > - > - tmp = get_address(s, 0, b2, d2); > - tmp2 = tcg_temp_new_i64(); > - tmp3 = tcg_const_i64(4); > - tmp4 = tcg_const_i64(0xffffffff00000000ULL); > - for (i = r1;; i = (i + 1) % 16) { > - if (opc == 0x98) { > - tcg_gen_qemu_ld32u(tmp2, tmp, get_mem_index(s)); > - tcg_gen_and_i64(regs[i], regs[i], tmp4); > - tcg_gen_or_i64(regs[i], regs[i], tmp2); > - } else { > - tcg_gen_qemu_st32(regs[i], tmp, get_mem_index(s)); > - } > - if (i == r3) { > - break; > - } > - tcg_gen_add_i64(tmp, tmp, tmp3); > - } > - tcg_temp_free_i64(tmp); > - tcg_temp_free_i64(tmp2); > - tcg_temp_free_i64(tmp3); > - tcg_temp_free_i64(tmp4); > - break; > case 0x92: /* MVI D1(B1),I2 [SI] */ > insn = ld_code4(s->pc); > tmp = decode_si(s, insn, &i2, &b1, &d1); > @@ -2604,19 +2529,17 @@ static void disas_s390_insn(DisasContext *s) > d1 = (insn >> 16) & 0xfff; > b2 = (insn >> 12) & 0xf; > d2 = insn & 0xfff; > - tmp = load_reg(r1); > /* XXX key in r3 */ > - tmp2 = get_address(s, 0, b1, d1); > - tmp3 = get_address(s, 0, b2, d2); > + tmp = get_address(s, 0, b1, d1); > + tmp2 = get_address(s, 0, b2, d2); > if (opc == 0xda) { > - gen_helper_mvcp(cc_op, cpu_env, tmp, tmp2, tmp3); > + gen_helper_mvcp(cc_op, cpu_env, regs[r1], tmp, tmp2); > } else { > - gen_helper_mvcs(cc_op, cpu_env, tmp, tmp2, tmp3); > + gen_helper_mvcs(cc_op, cpu_env, regs[r1], tmp, tmp2); > } > set_cc_static(s); > tcg_temp_free_i64(tmp); > tcg_temp_free_i64(tmp2); > - tcg_temp_free_i64(tmp3); > break; > #endif > case 0xe3: > @@ -3346,6 +3269,57 @@ static ExitStatus op_lpsw(DisasContext *s, DisasOps *o) > } > #endif > > +static ExitStatus op_lm32(DisasContext *s, DisasOps *o) > +{ > + int r1 = get_field(s->fields, r1); > + int r3 = get_field(s->fields, r3); > + TCGv_i64 t = tcg_temp_new_i64(); > + > + while (1) { > + tcg_gen_qemu_ld32u(t, o->in2, get_mem_index(s)); > + store_reg32_i64(r1, t); > + if (r1 == r3) { > + break; > + } > + tcg_gen_addi_i64(o->in2, o->in2, 4); > + r1 = (r1 + 1) & 15; > + } > + return NO_EXIT; > +} > + > +static ExitStatus op_lmh(DisasContext *s, DisasOps *o) > +{ > + int r1 = get_field(s->fields, r1); > + int r3 = get_field(s->fields, r3); > + TCGv_i64 t = tcg_temp_new_i64(); > + > + while (1) { > + tcg_gen_qemu_ld32u(t, o->in2, get_mem_index(s)); > + store_reg32h_i64(r1, t); > + if (r1 == r3) { > + break; > + } > + tcg_gen_addi_i64(o->in2, o->in2, 4); I don't have a good answer here, but how do we guarantee that we're not overrunning the tcg inst buffer? Alex