Index: dyngen.c =================================================================== RCS file: /cvsroot/qemu/qemu/dyngen.c,v retrieving revision 1.38 diff -u -p -r1.38 dyngen.c --- dyngen.c 21 Feb 2005 19:53:34 -0000 1.38 +++ dyngen.c 11 Mar 2005 00:53:05 -0000 @@ -1211,90 +1211,137 @@ int arm_emit_ldr_info(const char *name, { uint8_t *p; uint32_t insn; - int offset, min_offset, pc_offset, data_size; + int offset, min_offset, pc_offset, data_size, spare, max_pool; uint8_t data_allocated[1024]; unsigned int data_index; + int type; memset(data_allocated, 0, sizeof(data_allocated)); p = p_start; min_offset = p_end - p_start; + spare = 0x7fffffff; while (p < p_start + min_offset) { insn = get32((uint32_t *)p); + /* TODO: Armv5e ldrd. */ + /* TODO: VFP load. */ if ((insn & 0x0d5f0000) == 0x051f0000) { /* ldr reg, [pc, #im] */ offset = insn & 0xfff; if (!(insn & 0x00800000)) - offset = -offset; + offset = -offset; + max_pool = 4096; + type = 0; + } else if ((insn & 0x0e5f0f00) == 0x0c1f0100) { + /* FPA ldf. */ + offset = (insn & 0xff) << 2; + if (!(insn & 0x00800000)) + offset = -offset; + max_pool = 1024; + type = 1; + } else if ((insn & 0x0fff0000) == 0x028f0000) { + /* Some gcc load a doubleword immediate with + add regN, pc, #imm + ldmia regN, {regN, regM} + Hope and pray the compiler never generates somethin like + add reg, pc, #imm1; ldr reg, [reg, #-imm2]; */ + int r; + + r = (insn & 0xf00) >> 7; + offset = ((insn & 0xff) >> r) | ((insn & 0xff) << (32 - r)); + max_pool = 1024; + type = 2; + } else { + max_pool = 0; + type = -1; + } + if (type >= 0) { + /* PC-relative load needs fixing up. */ + if (spare > max_pool - offset) + spare = max_pool - offset; if ((offset & 3) !=0) - error("%s:%04x: ldr pc offset must be 32 bit aligned", + error("%s:%04x: pc offset must be 32 bit aligned", + name, start_offset + p - p_start); + if (offset < 0) + error("%s:%04x: Embedded literal value", name, start_offset + p - p_start); pc_offset = p - p_start + offset + 8; if (pc_offset <= (p - p_start) || pc_offset >= (p_end - p_start)) - error("%s:%04x: ldr pc offset must point inside the function code", + error("%s:%04x: pc offset must point inside the function code", name, start_offset + p - p_start); if (pc_offset < min_offset) min_offset = pc_offset; if (outfile) { - /* ldr position */ + /* The intruction position */ fprintf(outfile, " arm_ldr_ptr->ptr = gen_code_ptr + %d;\n", p - p_start); - /* ldr data index */ - data_index = ((p_end - p_start) - pc_offset - 4) >> 2; - fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr + %d;\n", + /* The position of the constant pool data. */ + data_index = ((p_end - p_start) - pc_offset) >> 2; + fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr - %d;\n", data_index); + fprintf(outfile, " arm_ldr_ptr->type = %d;\n", type); fprintf(outfile, " arm_ldr_ptr++;\n"); - if (data_index >= sizeof(data_allocated)) - error("%s: too many data", name); - if (!data_allocated[data_index]) { - ELF_RELOC *rel; - int i, addend, type; - const char *sym_name, *p; - char relname[1024]; - - data_allocated[data_index] = 1; - - /* data value */ - addend = get32((uint32_t *)(p_start + pc_offset)); - relname[0] = '\0'; - for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { - if (rel->r_offset == (pc_offset + start_offset)) { - sym_name = get_rel_sym_name(rel); - /* the compiler leave some unnecessary references to the code */ - get_reloc_expr(relname, sizeof(relname), sym_name); - type = ELF32_R_TYPE(rel->r_info); - if (type != R_ARM_ABS32) - error("%s: unsupported data relocation", name); - break; - } - } - fprintf(outfile, " arm_data_ptr[%d] = 0x%x", - data_index, addend); - if (relname[0] != '\0') - fprintf(outfile, " + %s", relname); - fprintf(outfile, ";\n"); - } } } p += 4; } + + /* Copy and relocate the constant pool data. */ data_size = (p_end - p_start) - min_offset; if (data_size > 0 && outfile) { - fprintf(outfile, " arm_data_ptr += %d;\n", data_size >> 2); + spare += min_offset; + fprintf(outfile, " arm_data_ptr -= %d;\n", data_size >> 2); + fprintf(outfile, " arm_pool_ptr -= %d;\n", data_size); + fprintf(outfile, " if (arm_pool_ptr > gen_code_ptr + %d)\n" + " arm_pool_ptr = gen_code_ptr + %d;\n", + spare, spare); + + data_index = 0; + for (pc_offset = min_offset; + pc_offset < p_end - p_start; + pc_offset += 4) { + + ELF_RELOC *rel; + int i, addend, type; + const char *sym_name; + char relname[1024]; + + /* data value */ + addend = get32((uint32_t *)(p_start + pc_offset)); + relname[0] = '\0'; + for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { + if (rel->r_offset == (pc_offset + start_offset)) { + sym_name = get_rel_sym_name(rel); + /* the compiler leave some unnecessary references to the code */ + get_reloc_expr(relname, sizeof(relname), sym_name); + type = ELF32_R_TYPE(rel->r_info); + if (type != R_ARM_ABS32) + error("%s: unsupported data relocation", name); + break; + } + } + fprintf(outfile, " arm_data_ptr[%d] = 0x%x", + data_index, addend); + if (relname[0] != '\0') + fprintf(outfile, " + %s", relname); + fprintf(outfile, ";\n"); + + data_index++; + } } - /* the last instruction must be a mov pc, lr */ + /* the last instruction must be ldmfd fp, {..., pc} */ if (p == p_start) goto arm_ret_error; p -= 4; insn = get32((uint32_t *)p); - if ((insn & 0xffff0000) != 0xe91b0000) { + if ((insn & 0xffff8000) != 0xe91b8000) { arm_ret_error: if (!outfile) printf("%s: invalid epilog\n", name); } - return p - p_start; + return p - p_start; } #endif @@ -2221,6 +2268,28 @@ int gen_file(FILE *outfile, int out_type } else { /* generate big code generation switch */ + +#ifdef HOST_ARM + /* We need to know the size of all the ops so we can figure out when + to emit constant pools. This must be consistent with opc.h. */ +fprintf(outfile, +"static const uint32_t arm_opc_size[] = {\n" +" 0,\n" /* end */ +" 0,\n" /* nop */ +" 0,\n" /* nop1 */ +" 0,\n" /* nop2 */ +" 0,\n"); /* nop3 */ + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { + const char *name; + name = get_sym_name(sym); + if (strstart(name, OP_PREFIX, NULL)) { + fprintf(outfile, " %d,\n", sym->st_size); + } + } +fprintf(outfile, +"};\n"); +#endif + fprintf(outfile, "int dyngen_code(uint8_t *gen_code_buf,\n" " uint16_t *label_offsets, uint16_t *jmp_offsets,\n" @@ -2231,10 +2300,36 @@ fprintf(outfile, " const uint32_t *opparam_ptr;\n"); #ifdef HOST_ARM +/* Arm is tricky because it uses constant pools for loading immediate values. + We assume (and require) each function is code followed by a constant pool. + All the ops are small so this should be ok. For each op we figure + out how much "spare" range we have in the load instructions. This allows + us to insert subsequent ops in between the op and the constant pool, + eliminating the neeed to jump around the pool. + + We currently generate: + + [ For this example we assume merging would move op1_pool out of range. + In practive we should be able to combine many ops before the offset + limits are reached. ] + op1_code; + op2_code; + goto op3; + op2_pool; + op1_pool; +op3: + op3_code; + ret; + op3_pool; + + Ideally we'd put op1_pool before op2_pool, but that requires two passes. + */ fprintf(outfile, " uint8_t *last_gen_code_ptr = gen_code_buf;\n" " LDREntry *arm_ldr_ptr = arm_ldr_table;\n" -" uint32_t *arm_data_ptr = arm_data_table;\n"); +" uint32_t *arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n" +/* Initialise the parmissible pool offset to an arbitary large value. */ +" uint8_t *arm_pool_ptr = gen_code_buf + 0x1000000;\n"); #endif fprintf(outfile, @@ -2246,9 +2341,23 @@ fprintf(outfile, /* Generate prologue, if needed. */ fprintf(outfile, -" for(;;) {\n" -" switch(*opc_ptr++) {\n" -); +" for(;;) {\n"); + +#ifdef HOST_ARM +/* Generate constant pool if needed */ +fprintf(outfile, +" if (gen_code_ptr + arm_opc_size[*opc_ptr] >= arm_pool_ptr) {\n" +" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, " +"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 1);\n" +" last_gen_code_ptr = gen_code_ptr;\n" +" arm_ldr_ptr = arm_ldr_table;\n" +" arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n" +" arm_pool_ptr = gen_code_ptr + 0x1000000;\n" +" }\n"); +#endif + +fprintf(outfile, +" switch(*opc_ptr++) {\n"); for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { const char *name; @@ -2282,17 +2391,6 @@ fprintf(outfile, " goto the_end;\n" " }\n"); -#ifdef HOST_ARM -/* generate constant table if needed */ -fprintf(outfile, -" if ((gen_code_ptr - last_gen_code_ptr) >= (MAX_FRAG_SIZE - MAX_OP_SIZE)) {\n" -" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 1);\n" -" last_gen_code_ptr = gen_code_ptr;\n" -" arm_ldr_ptr = arm_ldr_table;\n" -" arm_data_ptr = arm_data_table;\n" -" }\n"); -#endif - fprintf(outfile, " }\n" @@ -2301,7 +2399,10 @@ fprintf(outfile, /* generate some code patching */ #ifdef HOST_ARM -fprintf(outfile, "gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 0);\n"); +fprintf(outfile, +"if (arm_data_ptr != arm_data_table + ARM_LDR_TABLE_SIZE)\n" +" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, " +"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 0);\n"); #endif /* flush instruction cache */ fprintf(outfile, "flush_icache_range((unsigned long)gen_code_buf, (unsigned long)gen_code_ptr);\n"); Index: dyngen.h =================================================================== RCS file: /cvsroot/qemu/qemu/dyngen.h,v retrieving revision 1.7 diff -u -p -r1.7 dyngen.h --- dyngen.h 3 Jan 2005 23:40:55 -0000 1.7 +++ dyngen.h 11 Mar 2005 00:53:05 -0000 @@ -134,18 +134,16 @@ void fix_bsr(void *p, int offset) { #ifdef __arm__ -#define MAX_OP_SIZE (128 * 4) /* in bytes */ -/* max size of the code that can be generated without calling arm_flush_ldr */ -#define MAX_FRAG_SIZE (1024 * 4) -//#define MAX_FRAG_SIZE (135 * 4) /* for testing */ +#define ARM_LDR_TABLE_SIZE 1024 typedef struct LDREntry { uint8_t *ptr; uint32_t *data_ptr; + unsigned type:2; } LDREntry; static LDREntry arm_ldr_table[1024]; -static uint32_t arm_data_table[1024]; +static uint32_t arm_data_table[ARM_LDR_TABLE_SIZE]; extern char exec_loop; @@ -164,8 +162,9 @@ static uint8_t *arm_flush_ldr(uint8_t *g int offset, data_size, target; uint8_t *data_ptr; uint32_t insn; + uint32_t mask; - data_size = (uint8_t *)data_end - (uint8_t *)data_start; + data_size = (data_end - data_start) << 2; if (gen_jmp) { /* generate branch to skip the data */ @@ -187,17 +186,48 @@ static uint8_t *arm_flush_ldr(uint8_t *g offset = ((unsigned long)(le->data_ptr) - (unsigned long)data_start) + (unsigned long)data_ptr - (unsigned long)ptr - 8; - insn = *ptr & ~(0xfff | 0x00800000); if (offset < 0) { - offset = - offset; - } else { - insn |= 0x00800000; - } - if (offset > 0xfff) { - fprintf(stderr, "Error ldr offset\n"); + fprintf(stderr, "Negative constant pool offset\n"); abort(); } - insn |= offset; + switch (le->type) { + case 0: /* ldr */ + mask = ~0x00800fff; + if (offset >= 4096) { + fprintf(stderr, "Bad ldr offset\n"); + abort(); + } + break; + case 1: /* ldc */ + mask = ~0x008000ff; + if (offset >= 1024 ) { + fprintf(stderr, "Bad ldc offset\n"); + abort(); + } + break; + case 2: /* add */ + mask = ~0xfff; + if (offset >= 1024 ) { + fprintf(stderr, "Bad add offset\n"); + abort(); + } + break; + default: + fprintf(stderr, "Bad pc relative fixup\n"); + abort(); + } + insn = *ptr & mask; + switch (le->type) { + case 0: /* ldr */ + insn |= offset | 0x00800000; + break; + case 1: /* ldc */ + insn |= (offset >> 2) | 0x00800000; + break; + case 2: /* add */ + insn |= (offset >> 2) | 0xf00; + break; + } *ptr = insn; } return gen_code_ptr;