All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: Richard Henderson <rth@twiddle.net>
Cc: "Vassili Karpov (malc)" <av1474@comtv.ru>,
	qemu-devel@nongnu.org, aurelien@aurel32.net
Subject: Re: [Qemu-devel] [PATCH 3/4] tcg-ppc: Convert to helper_ret_ld/st_mmu
Date: Sat, 07 Sep 2013 09:46:56 -0000	[thread overview]
Message-ID: <5212753A.7050509@redhat.com> (raw)
In-Reply-To: <1378051658-3393-4-git-send-email-rth@twiddle.net>

On 09/01/2013 06:07 PM, Richard Henderson wrote:
> Drop the ld/st_trampolines, loading the return address into a
> parameter register directly.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>   include/exec/exec-all.h |   4 +-
>   tcg/ppc/tcg-target.c    | 220 +++++++++++++++++++-----------------------------
>   2 files changed, 86 insertions(+), 138 deletions(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index beb4149..a81e805 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -324,9 +324,7 @@ extern uintptr_t tci_tb_ptr;
>      In some implementations, we pass the "logical" return address manually;
>      in others, we must infer the logical return from the true return.  */
>   #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
> -# if defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
> -#  define GETRA_LDST(RA)   (*(int32_t *)((RA) - 4))
> -# elif defined(__arm__)
> +# if defined(__arm__)
>   /* We define two insns between the return address and the branch back to
>      straight-line.  Find and decode that branch insn.  */
>   #  define GETRA_LDST(RA)   tcg_getra_ldst(RA)
> diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
> index b0fbc54..a890319 100644
> --- a/tcg/ppc/tcg-target.c
> +++ b/tcg/ppc/tcg-target.c
> @@ -551,27 +551,26 @@ static void add_qemu_ldst_label (TCGContext *s,
>       label->label_ptr[0] = label_ptr;
>   }
>
> -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
> -   int mmu_idx) */
> +/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
> + *                                     int mmu_idx, uintptr_t ra)
> + */
>   static const void * const qemu_ld_helpers[4] = {
> -    helper_ldb_mmu,
> -    helper_ldw_mmu,
> -    helper_ldl_mmu,
> -    helper_ldq_mmu,
> +    helper_ret_ldub_mmu,
> +    helper_ret_lduw_mmu,
> +    helper_ret_ldul_mmu,
> +    helper_ret_ldq_mmu,
>   };
>
> -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
> -   uintxx_t val, int mmu_idx) */
> +/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
> + *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
> + */
>   static const void * const qemu_st_helpers[4] = {
> -    helper_stb_mmu,
> -    helper_stw_mmu,
> -    helper_stl_mmu,
> -    helper_stq_mmu,
> +    helper_ret_stb_mmu,
> +    helper_ret_stw_mmu,
> +    helper_ret_stl_mmu,
> +    helper_ret_stq_mmu,
>   };
>
> -static void *ld_trampolines[4];
> -static void *st_trampolines[4];
> -
>   static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
>                                  int addr_reg, int addr_reg2, int s_bits,
>                                  int offset1, int offset2, uint8_t **label_ptr)
> @@ -608,9 +607,14 @@ static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
>       tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
>       tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
>   #endif
> +
> +    /* Use a conditional branch-and-link so that we load a pointer to
> +       somewhere within the current opcode, for passing on to the helper.
> +       This address cannot be used for a tail call, but it's shorter
> +       than forming an address from scratch.  */
>       *label_ptr = s->code_ptr;
>       retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
> -    tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE);
> +    tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK);
>
>       /* r0 now contains &env->tlb_table[mem_index][index].addr_x */
>       tcg_out32 (s, (LWZ
> @@ -833,132 +837,99 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
>   }
>
>   #if defined(CONFIG_SOFTMMU)
> -static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
> +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
>   {
> -    int s_bits;
> -    int ir;
> -    int opc = label->opc;
> -    int mem_index = label->mem_index;
> -    int data_reg = label->datalo_reg;
> -    int data_reg2 = label->datahi_reg;
> -    int addr_reg = label->addrlo_reg;
> -    uint8_t *raddr = label->raddr;
> -    uint8_t **label_ptr = &label->label_ptr[0];
> +    TCGReg ir, datalo, datahi;
> +    int opc = lb->opc;
>
> -    s_bits = opc & 3;
> +    reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
>
> -    /* resolve label address */
> -    reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
> +    ir = TCG_REG_R3;
> +    tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0);
>
> -    /* slow path */
> -    ir = 4;
> -#if TARGET_LONG_BITS == 32
> -    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
> -#else
> +    if (TARGET_LONG_BITS == 32) {
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
> +    } else {
>   #ifdef TCG_TARGET_CALL_ALIGN_ARGS
> -    ir |= 1;
> -#endif
> -    tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
> -    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
> +        ir |= 1;
>   #endif
> -    tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
> -    tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1);
> -    tcg_out32 (s, (tcg_target_long) raddr);
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg);
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
> +    }
> +
> +    tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index);
> +    tcg_out32(s, MFSPR | RT(ir++) | LR);
> +
> +    tcg_out_call(s, (uintptr_t)qemu_ld_helpers[opc & 3], 1);
> +
> +    datalo = lb->datalo_reg;
>       switch (opc) {
>       case 0|4:
> -        tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
> +        tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3));
>           break;
>       case 1|4:
> -        tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
> +        tcg_out32(s, EXTSH | RA(datalo) | RS(TCG_REG_R3));
>           break;
> -    case 0:
> -    case 1:
> -    case 2:
> -        if (data_reg != 3)
> -            tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
> +
> +    default:
> +        tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R3);
>           break;
> +
>       case 3:
> -        if (data_reg == 3) {
> -            if (data_reg2 == 4) {
> -                tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
> -                tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
> -                tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
> -            }
> -            else {
> -                tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
> -                tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
> -            }
> -        }
> -        else {
> -            if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
> -            if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
> +        datahi = lb->datahi_reg;
> +        if (datalo != TCG_REG_R3) {
> +            tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4);
> +            tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
> +        } else if (datahi != TCG_REG_R4) {
> +            tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
> +            tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R4);
> +        } else {
> +            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, TCG_REG_R4);
> +            tcg_out_mov(s, TCG_TYPE_I32, datahi, TCG_REG_R3);
> +            tcg_out_mov(s, TCG_TYPE_I32, datalo, TCG_REG_R0);
>           }
>           break;
>       }
> +
>       /* Jump to the code corresponding to next IR of qemu_st */
> -    tcg_out_b (s, 0, (tcg_target_long) raddr);
> +    tcg_out_b(s, 0, (uintptr_t)lb->raddr);
>   }
>
> -static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
> +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
>   {
> -    int ir;
> -    int opc = label->opc;
> -    int mem_index = label->mem_index;
> -    int data_reg = label->datalo_reg;
> -    int data_reg2 = label->datahi_reg;
> -    int addr_reg = label->addrlo_reg;
> -    uint8_t *raddr = label->raddr;
> -    uint8_t **label_ptr = &label->label_ptr[0];
> -
> -    /* resolve label address */
> -    reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
> -
> -    /* slow path */
> -    ir = 4;
> -#if TARGET_LONG_BITS == 32
> -    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
> -#else
> +    TCGReg ir;
> +
> +    reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
> +
> +    ir = TCG_REG_R3;
> +    tcg_out_mov(s, TCG_TYPE_PTR, ir++, TCG_AREG0);
> +
> +    if (TARGET_LONG_BITS == 32) {
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
> +    } else {
>   #ifdef TCG_TARGET_CALL_ALIGN_ARGS
> -    ir |= 1;
> -#endif
> -    tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
> -    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
> +        ir |= 1;
>   #endif
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrhi_reg);
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->addrlo_reg);
> +    }
>
> -    switch (opc) {
> -    case 0:
> -        tcg_out32 (s, (RLWINM
> -                       | RA (ir)
> -                       | RS (data_reg)
> -                       | SH (0)
> -                       | MB (24)
> -                       | ME (31)));
> -        break;
> -    case 1:
> -        tcg_out32 (s, (RLWINM
> -                       | RA (ir)
> -                       | RS (data_reg)
> -                       | SH (0)
> -                       | MB (16)
> -                       | ME (31)));
> -        break;
> -    case 2:
> -        tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
> -        break;
> -    case 3:
> +    if (lb->opc != 3) {
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg);
> +    } else {
>   #ifdef TCG_TARGET_CALL_ALIGN_ARGS
>           ir |= 1;
>   #endif
> -        tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2);
> -        tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
> -        break;
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datahi_reg);
> +        tcg_out_mov(s, TCG_TYPE_I32, ir++, lb->datalo_reg);
>       }
> -    ir++;
>
> -    tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
> -    tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1);
> -    tcg_out32 (s, (tcg_target_long) raddr);
> -    tcg_out_b (s, 0, (tcg_target_long) raddr);
> +    tcg_out_movi(s, TCG_TYPE_I32, ir++, lb->mem_index);
> +    tcg_out32(s, MFSPR | RT(ir++) | LR);
> +
> +    tcg_out_call(s, (uintptr_t)qemu_st_helpers[lb->opc], 1);
> +
> +    tcg_out_b(s, 0, (uintptr_t)lb->raddr);
>   }
>
>   void tcg_out_tb_finalize(TCGContext *s)
> @@ -979,17 +950,6 @@ void tcg_out_tb_finalize(TCGContext *s)
>   }
>   #endif
>
> -#ifdef CONFIG_SOFTMMU
> -static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
> -{
> -    tcg_out32 (s, MFSPR | RT (3) | LR);
> -    tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
> -    tcg_out32 (s, MTSPR | RS (3) | LR);
> -    tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
> -    tcg_out_b (s, 0, (tcg_target_long) ptr);
> -}
> -#endif
> -
>   static void tcg_target_qemu_prologue (TCGContext *s)
>   {
>       int i, frame_size;
> @@ -1050,16 +1010,6 @@ static void tcg_target_qemu_prologue (TCGContext *s)
>       tcg_out32 (s, MTSPR | RS (0) | LR);
>       tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
>       tcg_out32 (s, BCLR | BO_ALWAYS);
> -
> -#ifdef CONFIG_SOFTMMU
> -    for (i = 0; i < 4; ++i) {
> -        ld_trampolines[i] = s->code_ptr;
> -        emit_ldst_trampoline (s, qemu_ld_helpers[i]);
> -
> -        st_trampolines[i] = s->code_ptr;
> -        emit_ldst_trampoline (s, qemu_st_helpers[i]);
> -    }
> -#endif
>   }
>
>   static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
>

Bad news... with this patch, either with or without patch 2, trying to 
execute sieve.flat from kvm-unit-tests (it doesn't matter if it is 
compiled as 32-bit or 64-bit, and with both i386-softmmu and 
x86_64-softmmu targets) fails as follows on my PowerBook:

qemu: fatal: Trying to execute code outside RAM or ROM at 0x70360000

EAX=00006d20 EBX=00007025 ECX=00000000 EDX=00000000
ESI=07fd7bd0 EDI=000f1930 EBP=07fd7b00 ESP=00006e0c
EIP=70270000 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 00000000 ffffffff 008f9300
CS =f000 000f0000 ffffffff 008f9b00
SS =0000 00000000 ffffffff 008f9300
DS =0000 00000000 ffffffff 008f9300
FS =0000 00000000 ffffffff 008f9300
GS =0000 00000000 ffffffff 008f9300
LDT=0000 00000000 0000ffff 00008200
TR =0000 00000000 0000ffff 00008b00
GDT=     000f69e0 00000037
IDT=     00000000 000003ff
CR0=00000010 CR2=00000000 CR3=00000000 CR4=00000000
DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000
DR6=ffff0ff0 DR7=00000400
CCS=00000044 CCD=00006df8 CCO=ADDL
EFER=0000000000000000
FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80
FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
XMM00=00000000000000000000000000000000 
XMM01=00000000000000000000000000000000
XMM02=00000000000000000000000000000000 
XMM03=00000000000000000000000000000000
XMM04=00000000000000000000000000000000 
XMM05=00000000000000000000000000000000
XMM06=00000000000000000000000000000000 
XMM07=00000000000000000000000000000000
Aborted

The failure happens as soon as the first hardware interrupt is serviced:

Servicing hardware INT=0x08
Servicing hardware INT=0x09
----------------
IN:
0x000fe98f:  push   %es
0x000fe990:  push   %ebp
0x000fe992:  push   %edi
0x000fe994:  push   %esi
0x000fe996:  push   %ebx
0x000fe998:  sub    $0x44,%esp
0x000fe99c:  mov    $0x40,%eax
0x000fe9a2:  mov    %ax,%es
0x000fe9a4:  mov    %es:0x6c,%edx
0x000fe9aa:  inc    %edx
0x000fe9ac:  cmp    $0x1800af,%edx
0x000fe9b3:  jbe    0xfe9c6

----------------
IN:
0x000f77f4:  xor    %edx,%edx
0x000f77f7:  calll  *%ecx

qemu: fatal: Trying to execute code outside RAM or ROM at 0x70360000

Command line:

i386-softmmu/qemu-system-i386 -kernel sieve32.flat -serial stdio -device 
isa-debug-exit,iobase=0xf4 -nographic

My two patches + 2/4 from this series work.  I didn't try 4/4 because it 
doesn't apply cleanly on top of my patches.	

Paolo

  reply	other threads:[~2013-09-07  9:46 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-01 16:07 [Qemu-devel] [PATCH 0/4] tcg-ppc ldst improvements Richard Henderson
2013-09-01 16:07 ` [Qemu-devel] [PATCH 1/4] configure: Allow command-line configure for ppc32 Richard Henderson
2013-09-01 16:07 ` [Qemu-devel] [PATCH 2/4] tcg-ppc: Avoid code for nop move Richard Henderson
2013-09-01 16:07 ` [Qemu-devel] [PATCH 3/4] tcg-ppc: Convert to helper_ret_ld/st_mmu Richard Henderson
2013-09-07  9:46   ` Paolo Bonzini [this message]
2013-09-09 17:42     ` Richard Henderson
2013-09-09 17:49       ` Paolo Bonzini
2013-09-09 18:20         ` Richard Henderson
2013-09-01 16:07 ` [Qemu-devel] [PATCH 4/4] tcg-ppc: Fix and cleanup tcg_out_tlb_check Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5212753A.7050509@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=aurelien@aurel32.net \
    --cc=av1474@comtv.ru \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.