From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1IgFy6-0002AB-83 for qemu-devel@nongnu.org; Fri, 12 Oct 2007 04:34:02 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1IgFy5-00028v-9J for qemu-devel@nongnu.org; Fri, 12 Oct 2007 04:34:01 -0400 Received: from [199.232.76.173] (helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1IgFy4-00028Y-U1 for qemu-devel@nongnu.org; Fri, 12 Oct 2007 04:34:01 -0400 Received: from honiara.magic.fr ([195.154.193.36]) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1IgFy4-0007ZK-AI for qemu-devel@nongnu.org; Fri, 12 Oct 2007 04:34:00 -0400 Received: from [192.168.0.2] (ppp-36.net-123.static.magiconline.fr [80.118.184.36]) by honiara.magic.fr (8.13.1/8.13.1) with ESMTP id l9C8XsTX017611 for ; Fri, 12 Oct 2007 10:33:55 +0200 From: "J. Mayer" Content-Type: multipart/mixed; boundary="=-1knY4cScbh2nLbfYL06+" Date: Fri, 12 Oct 2007 10:33:57 +0200 Message-Id: <1192178037.9976.259.camel@rapid> Mime-Version: 1.0 Subject: [Qemu-devel] RFC: Code fetch optimisation Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org --=-1knY4cScbh2nLbfYL06+ Content-Type: text/plain Content-Transfer-Encoding: 7bit Here's a small patch that allow an optimisation for code fetch, at least for RISC CPU targets, as suggested by Fabrice Bellard. The main idea is that a translated block is never to span over a page boundary. As the tb_find_slow routine already gets the physical address of the page of code to be translated, the code translator could then fetch the code using raw host memory accesses instead of doing it through the softmmu routines. This patch could also be adapted to RISC CPU targets, with care for the last instruction of a page. For now, I did implement it for alpha, arm, mips, PowerPC and SH4. I don't actually know if the optimsation would bring a sensible speed gain or if it will be absolutelly marginal. Please comment. -- J. Mayer Never organized --=-1knY4cScbh2nLbfYL06+ Content-Disposition: attachment; filename=code_raw_optim.diff Content-Type: text/x-patch; name=code_raw_optim.diff; charset=ISO-8859-15 Content-Transfer-Encoding: 7bit Index: cpu-exec.c =================================================================== RCS file: /sources/qemu/qemu/cpu-exec.c,v retrieving revision 1.119 diff -u -d -d -p -r1.119 cpu-exec.c --- cpu-exec.c 8 Oct 2007 13:16:13 -0000 1.119 +++ cpu-exec.c 12 Oct 2007 07:14:43 -0000 @@ -133,6 +133,7 @@ static TranslationBlock *tb_find_slow(ta tb->tc_ptr = tc_ptr; tb->cs_base = cs_base; tb->flags = flags; + tb->page_addr[0] = phys_page1; cpu_gen_code(env, tb, CODE_GEN_MAX_SIZE, &code_gen_size); code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); Index: target-alpha/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-alpha/translate.c,v retrieving revision 1.5 diff -u -d -d -p -r1.5 translate.c --- target-alpha/translate.c 16 Sep 2007 21:08:01 -0000 1.5 +++ target-alpha/translate.c 12 Oct 2007 07:14:47 -0000 @@ -1966,12 +1966,15 @@ int gen_intermediate_code_internal (CPUS #endif DisasContext ctx, *ctxp = &ctx; target_ulong pc_start; + unsigned long phys_pc; uint32_t insn; uint16_t *gen_opc_end; int j, lj = -1; int ret; pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -2010,7 +2013,7 @@ int gen_intermediate_code_internal (CPUS ctx.pc, ctx.mem_idx); } #endif - insn = ldl_code(ctx.pc); + insn = ldl_raw(phys_pc); #if defined ALPHA_DEBUG_DISAS insn_count++; if (logfile != NULL) { @@ -2018,6 +2021,7 @@ int gen_intermediate_code_internal (CPUS } #endif ctx.pc += 4; + phys_pc += 4; ret = translate_one(ctxp, insn); if (ret != 0) break; Index: target-arm/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-arm/translate.c,v retrieving revision 1.57 diff -u -d -d -p -r1.57 translate.c --- target-arm/translate.c 17 Sep 2007 08:09:51 -0000 1.57 +++ target-arm/translate.c 12 Oct 2007 07:14:47 -0000 @@ -38,6 +38,7 @@ /* internal defines */ typedef struct DisasContext { target_ulong pc; + unsigned long phys_pc; int is_jmp; /* Nonzero if this instruction has been conditionally skipped. */ int condjmp; @@ -2206,8 +2207,9 @@ static void disas_arm_insn(CPUState * en { unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh; - insn = ldl_code(s->pc); + insn = ldl_raw(s->phys_pc); s->pc += 4; + s->phys_pc += 4; cond = insn >> 28; if (cond == 0xf){ @@ -2971,8 +2973,9 @@ static void disas_thumb_insn(DisasContex int32_t offset; int i; - insn = lduw_code(s->pc); + insn = lduw_raw(s->phys_pc); s->pc += 2; + s->phys_pc += 2; switch (insn >> 12) { case 0: case 1: @@ -3494,7 +3497,7 @@ static void disas_thumb_insn(DisasContex break; } offset = ((int32_t)insn << 21) >> 10; - insn = lduw_code(s->pc); + insn = lduw_raw(s->phys_pc); offset |= insn & 0x7ff; val = (uint32_t)s->pc + 2; @@ -3544,6 +3547,8 @@ static inline int gen_intermediate_code_ dc->is_jmp = DISAS_NEXT; dc->pc = pc_start; + dc->phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); dc->singlestep_enabled = env->singlestep_enabled; dc->condjmp = 0; dc->thumb = env->thumb; Index: target-mips/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-mips/translate.c,v retrieving revision 1.106 diff -u -d -d -p -r1.106 translate.c --- target-mips/translate.c 9 Oct 2007 03:39:58 -0000 1.106 +++ target-mips/translate.c 12 Oct 2007 07:14:48 -0000 @@ -6483,6 +6483,7 @@ gen_intermediate_code_internal (CPUState { DisasContext ctx; target_ulong pc_start; + unsigned long phys_pc; uint16_t *gen_opc_end; int j, lj = -1; @@ -6490,6 +6491,8 @@ gen_intermediate_code_internal (CPUState fprintf (logfile, "search pc %d\n", search_pc); pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -6544,9 +6547,10 @@ gen_intermediate_code_internal (CPUState gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK; gen_opc_instr_start[lj] = 1; } - ctx.opcode = ldl_code(ctx.pc); + ctx.opcode = ldl_raw(phys_pc); decode_opc(env, &ctx); ctx.pc += 4; + phys_pc += 4; if (env->singlestep_enabled) break; Index: target-ppc/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-ppc/translate.c,v retrieving revision 1.92 diff -u -d -d -p -r1.92 translate.c --- target-ppc/translate.c 7 Oct 2007 23:10:08 -0000 1.92 +++ target-ppc/translate.c 12 Oct 2007 07:14:49 -0000 @@ -6679,12 +7569,15 @@ static always_inline int gen_intermediat DisasContext ctx, *ctxp = &ctx; opc_handler_t **table, *handler; target_ulong pc_start; + unsigned long phys_pc; uint16_t *gen_opc_end; int supervisor; int single_step, branch_step; int j, lj = -1; pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -6763,7 +7649,7 @@ static always_inline int gen_intermediat ctx.nip, 1 - msr_pr, msr_ir); } #endif - ctx.opcode = ldl_code(ctx.nip); + ctx.opcode = ldl_raw(phys_pc); if (msr_le) { ctx.opcode = ((ctx.opcode & 0xFF000000) >> 24) | ((ctx.opcode & 0x00FF0000) >> 8) | @@ -6778,6 +7664,7 @@ static always_inline int gen_intermediat } #endif ctx.nip += 4; + phys_pc += 4; table = env->opcodes; handler = table[opc1(ctx.opcode)]; if (is_indirect_opcode(handler)) { Index: target-sh4/translate.c =================================================================== RCS file: /sources/qemu/qemu/target-sh4/translate.c,v retrieving revision 1.18 diff -u -d -d -p -r1.18 translate.c --- target-sh4/translate.c 29 Sep 2007 19:52:22 -0000 1.18 +++ target-sh4/translate.c 12 Oct 2007 07:14:50 -0000 @@ -1150,11 +1150,14 @@ gen_intermediate_code_internal(CPUState { DisasContext ctx; target_ulong pc_start; + unsigned long phys_pc; static uint16_t *gen_opc_end; uint32_t old_flags; int i, ii; pc_start = tb->pc; + phys_pc = (unsigned long)phys_ram_base + tb->page_addr[0] + + (pc_start & ~TARGET_PAGE_MASK); gen_opc_ptr = gen_opc_buf; gen_opc_end = gen_opc_buf + OPC_MAX_SIZE; gen_opparam_ptr = gen_opparam_buf; @@ -1210,9 +1213,10 @@ gen_intermediate_code_internal(CPUState fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc); fflush(stderr); #endif - ctx.opcode = lduw_code(ctx.pc); + ctx.opcode = lduw_raw(phys_pc); decode_opc(&ctx); ctx.pc += 2; + phys_pc += 2; if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) break; if (env->singlestep_enabled) --=-1knY4cScbh2nLbfYL06+--