From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1Kqt87-0001md-4R for qemu-devel@nongnu.org; Fri, 17 Oct 2008 13:28:51 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1Kqt83-0001mR-F9 for qemu-devel@nongnu.org; Fri, 17 Oct 2008 13:28:49 -0400 Received: from [199.232.76.173] (port=60601 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Kqt83-0001mO-9g for qemu-devel@nongnu.org; Fri, 17 Oct 2008 13:28:47 -0400 Received: from moutng.kundenserver.de ([212.227.126.186]:53071) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1Kqt82-0004v7-I2 for qemu-devel@nongnu.org; Fri, 17 Oct 2008 13:28:47 -0400 Message-ID: <48F8CB4C.30800@mail.berlios.de> Date: Fri, 17 Oct 2008 19:28:44 +0200 From: Stefan Weil MIME-Version: 1.0 Subject: Re: [Qemu-devel] [PATCH] Improve symbol lookup for system and user mode References: <48E52067.6080408@mail.berlios.de> <48F79CB9.4090602@mail.berlios.de> <48F7A4D6.8070405@mail.berlios.de> In-Reply-To: <48F7A4D6.8070405@mail.berlios.de> Content-Type: multipart/mixed; boundary="------------020201080508020700000603" Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org This is a multi-part message in MIME format. --------------020201080508020700000603 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit > Stefan Weil schrieb: >> Here is a short summary of my new patch: >> >> * Use function pointers for symbol lookup (currently for elf32 and >> elf64, could be expanded). >> This also fixes the bug with mips elf64 symbols in current Qemu trunk. >> >> * Use quicksort and binary search for symbol lookup. >> >> * Remove unneeded entries from symbol table. >> This reduced a typical table size (linux mips kernel) from 1764487 to >> 11656 entries. >> >> * In disas.c, the patch also fixes some warnings from old fashioned >> function prototypes. >> >> In loader.c, two defines control some compile time options (could be >> removed in >> production code): >> #define CONFIG_BINARY_SYMBOL_SEARCH >> #define CONFIG_REDUCE_SYMBOL_TABLE >> >> I tested the new code using 32 bit and 64 bit linux mips kernels and >> Qemu logging (-d in_asm). >> The speed improvement is extremely large - both because of the much >> smaller table and >> the binary search. >> >> Stefan >> > Please note: > > The current patch only supports system emulation. > User emulation needs more fixes to compile again. > > Stefan > > Here is an updated patch which now supports binary symbol lookup for both system emulation and user emulation. User emulation was tested using qemu-x86_64 and a simple application with symbol information. The new patch no longer includes compile time options for the old linear symbol search. I hope this new patch will be included in Qemu trunk. Regards Stefan --------------020201080508020700000603 Content-Type: text/x-diff; name="lookup3.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="lookup3.patch" * Use function pointers for symbol lookup (currently for elf32 and elf64, could be expanded). This also fixes the bug with mips elf64 symbols in current Qemu trunk. * Use quicksort and binary search for symbol lookup. * Remove unneeded entries from symbol table. This reduced a typical table size (linux mips kernel) from 1764487 to 11656 entries. * In disas.c, the patch also fixes some warnings from old fashioned function prototypes. Signed-off-by: Stefan Weil Index: linux-user/elfload.c =================================================================== --- linux-user/elfload.c (Revision 5497) +++ linux-user/elfload.c (Arbeitskopie) @@ -984,80 +984,130 @@ return ((abi_ulong) interp_elf_ex->e_entry) + load_addr; } +static int symfind(const void *s0, const void *s1) +{ + struct elf_sym *key = (struct elf_sym *)s0; + struct elf_sym *sym = (struct elf_sym *)s1; + int result = 0; + if (key->st_value < sym->st_value) { + result = -1; + } else if (key->st_value > sym->st_value + sym->st_size) { + result = 1; + } + return result; +} + +static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) +{ +#if ELF_CLASS == ELFCLASS32 + struct elf_sym *syms = s->disas_symtab.elf32; +#else + struct elf_sym *syms = s->disas_symtab.elf64; +#endif + + // binary search + struct elf_sym key; + struct elf_sym *sym; + + key.st_value = orig_addr; + + sym = bsearch(&key, syms, s->disas_num_syms, sizeof(*syms), symfind); + if (sym != 0) { + return s->disas_strtab + sym->st_name; + } + + return ""; +} + +static int symcmp(const void *s0, const void *s1) +{ + struct elf_sym *sym0 = (struct elf_sym *)s0; + struct elf_sym *sym1 = (struct elf_sym *)s1; + return (sym0->st_value < sym1->st_value) + ? -1 + : ((sym0->st_value > sym1->st_value) ? 1 : 0); +} + /* Best attempt to load symbols from this ELF object. */ static void load_symbols(struct elfhdr *hdr, int fd) { - unsigned int i; + unsigned int i, nsyms; struct elf_shdr sechdr, symtab, strtab; char *strings; struct syminfo *s; -#if (ELF_CLASS == ELFCLASS64) - // Disas uses 32 bit symbols - struct elf32_sym *syms32 = NULL; - struct elf_sym *sym; -#endif + struct elf_sym *syms; lseek(fd, hdr->e_shoff, SEEK_SET); for (i = 0; i < hdr->e_shnum; i++) { - if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr)) - return; + if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr)) + return; #ifdef BSWAP_NEEDED - bswap_shdr(&sechdr); + bswap_shdr(&sechdr); #endif - if (sechdr.sh_type == SHT_SYMTAB) { - symtab = sechdr; - lseek(fd, hdr->e_shoff - + sizeof(sechdr) * sechdr.sh_link, SEEK_SET); - if (read(fd, &strtab, sizeof(strtab)) - != sizeof(strtab)) - return; + if (sechdr.sh_type == SHT_SYMTAB) { + symtab = sechdr; + lseek(fd, hdr->e_shoff + + sizeof(sechdr) * sechdr.sh_link, SEEK_SET); + if (read(fd, &strtab, sizeof(strtab)) + != sizeof(strtab)) + return; #ifdef BSWAP_NEEDED - bswap_shdr(&strtab); + bswap_shdr(&strtab); #endif - goto found; - } + goto found; + } } return; /* Shouldn't happen... */ found: /* Now know where the strtab and symtab are. Snarf them. */ s = malloc(sizeof(*s)); - s->disas_symtab = malloc(symtab.sh_size); -#if (ELF_CLASS == ELFCLASS64) - syms32 = malloc(symtab.sh_size / sizeof(struct elf_sym) - * sizeof(struct elf32_sym)); -#endif + syms = malloc(symtab.sh_size); + if (!syms) + return; s->disas_strtab = strings = malloc(strtab.sh_size); - if (!s->disas_symtab || !s->disas_strtab) - return; + if (!s->disas_strtab) + return; lseek(fd, symtab.sh_offset, SEEK_SET); - if (read(fd, s->disas_symtab, symtab.sh_size) != symtab.sh_size) - return; + if (read(fd, syms, symtab.sh_size) != symtab.sh_size) + return; - for (i = 0; i < symtab.sh_size / sizeof(struct elf_sym); i++) { + nsyms = symtab.sh_size / sizeof(struct elf_sym); + + for (i = 0; i < nsyms; i++) { #ifdef BSWAP_NEEDED - bswap_sym(s->disas_symtab + sizeof(struct elf_sym)*i); + bswap_sym(syms + i); #endif -#if (ELF_CLASS == ELFCLASS64) - sym = s->disas_symtab + sizeof(struct elf_sym)*i; - syms32[i].st_name = sym->st_name; - syms32[i].st_info = sym->st_info; - syms32[i].st_other = sym->st_other; - syms32[i].st_shndx = sym->st_shndx; - syms32[i].st_value = sym->st_value & 0xffffffff; - syms32[i].st_size = sym->st_size & 0xffffffff; + // Throw away entries which we do not need. + while ((syms[i].st_shndx == SHN_UNDEF || + syms[i].st_shndx >= SHN_LORESERVE || + ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) && i < --nsyms) { + syms[i] = syms[nsyms]; +#ifdef BSWAP_NEEDED + bswap_sym(syms + i); #endif + } +#if defined(TARGET_ARM) || defined (TARGET_MIPS) + /* The bottom address bit marks a Thumb or MIPS16 symbol. */ + syms[i].st_value &= ~(target_ulong)1; +#endif } + syms = realloc(syms, nsyms * sizeof(*syms)); -#if (ELF_CLASS == ELFCLASS64) - free(s->disas_symtab); - s->disas_symtab = syms32; -#endif + qsort(syms, nsyms, sizeof(*syms), symcmp); + lseek(fd, strtab.sh_offset, SEEK_SET); if (read(fd, strings, strtab.sh_size) != strtab.sh_size) - return; - s->disas_num_syms = symtab.sh_size / sizeof(struct elf_sym); + return; + s->disas_num_syms = nsyms; +#if ELF_CLASS == ELFCLASS32 + s->disas_symtab.elf32 = syms; + s->lookup_symbol = lookup_symbolxx; +#else + s->disas_symtab.elf64 = syms; + s->lookup_symbol = lookup_symbolxx; +#endif s->next = syminfos; syminfos = s; } Index: disas.c =================================================================== --- disas.c (Revision 5497) +++ disas.c (Arbeitskopie) @@ -13,12 +13,8 @@ /* Get LENGTH bytes from info's buffer, at target address memaddr. Transfer them to myaddr. */ -int -buffer_read_memory (memaddr, myaddr, length, info) - bfd_vma memaddr; - bfd_byte *myaddr; - int length; - struct disassemble_info *info; +int buffer_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length, + struct disassemble_info *info) { if (memaddr < info->buffer_vma || memaddr + length > info->buffer_vma + info->buffer_length) @@ -46,10 +42,7 @@ /* Print an error message. We can assume that this is in response to an error return from buffer_read_memory. */ void -perror_memory (status, memaddr, info) - int status; - bfd_vma memaddr; - struct disassemble_info *info; +perror_memory (int status, bfd_vma memaddr, struct disassemble_info *info) { if (status != EIO) /* Can't happen. */ @@ -69,9 +62,7 @@ addresses). */ void -generic_print_address (addr, info) - bfd_vma addr; - struct disassemble_info *info; +generic_print_address (bfd_vma addr, struct disassemble_info *info) { (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr); } @@ -79,9 +70,7 @@ /* Just return the given address. */ int -generic_symbol_at_address (addr, info) - bfd_vma addr; - struct disassemble_info * info; +generic_symbol_at_address (bfd_vma addr, struct disassemble_info *info) { return 1; } @@ -303,33 +292,17 @@ /* Look up symbol for debugging purpose. Returns "" if unknown. */ const char *lookup_symbol(target_ulong orig_addr) { - unsigned int i; - /* Hack, because we know this is x86. */ - Elf32_Sym *sym; + const char *symbol = ""; struct syminfo *s; - target_ulong addr; for (s = syminfos; s; s = s->next) { - sym = s->disas_symtab; - for (i = 0; i < s->disas_num_syms; i++) { - if (sym[i].st_shndx == SHN_UNDEF - || sym[i].st_shndx >= SHN_LORESERVE) - continue; - - if (ELF_ST_TYPE(sym[i].st_info) != STT_FUNC) - continue; - - addr = sym[i].st_value; -#if defined(TARGET_ARM) || defined (TARGET_MIPS) - /* The bottom address bit marks a Thumb or MIPS16 symbol. */ - addr &= ~(target_ulong)1; -#endif - if (orig_addr >= addr - && orig_addr < addr + sym[i].st_size) - return s->disas_strtab + sym[i].st_name; - } + symbol = s->lookup_symbol(s, orig_addr); + if (symbol[0] != '\0') { + break; + } } - return ""; + + return symbol; } #if !defined(CONFIG_USER_ONLY) Index: disas.h =================================================================== --- disas.h (Revision 5497) +++ disas.h (Arbeitskopie) @@ -10,12 +10,24 @@ /* Look up symbol for debugging purpose. Returns "" if unknown. */ const char *lookup_symbol(target_ulong orig_addr); -/* Filled in by elfload.c. Simplistic, but will do for now. */ -extern struct syminfo { +struct syminfo; +struct elf32_sym; +struct elf64_sym; + +typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr); + +struct syminfo { + lookup_symbol_t lookup_symbol; unsigned int disas_num_syms; - void *disas_symtab; + union { + struct elf32_sym *elf32; + struct elf64_sym *elf64; + } disas_symtab; const char *disas_strtab; struct syminfo *next; -} *syminfos; +}; +/* Filled in by elfload.c. Simplistic, but will do for now. */ +extern struct syminfo *syminfos; + #endif /* _QEMU_DISAS_H */ Index: elf_ops.h =================================================================== --- elf_ops.h (Revision 5497) +++ elf_ops.h (Arbeitskopie) @@ -60,13 +60,50 @@ return NULL; } +static int glue(symfind, SZ)(const void *s0, const void *s1) +{ + struct elf_sym *key = (struct elf_sym *)s0; + struct elf_sym *sym = (struct elf_sym *)s1; + int result = 0; + if (key->st_value < sym->st_value) { + result = -1; + } else if (key->st_value > sym->st_value + sym->st_size) { + result = 1; + } + return result; +} + +static const char *glue(lookup_symbol, SZ)(struct syminfo *s, target_ulong orig_addr) +{ + struct elf_sym *syms = glue(s->disas_symtab.elf, SZ); + + // binary search + struct elf_sym key; + struct elf_sym *sym; + + key.st_value = orig_addr; + + sym = bsearch(&key, syms, s->disas_num_syms, sizeof(*syms), glue(symfind, SZ)); + if (sym != 0) { + return s->disas_strtab + sym->st_name; + } + + return ""; +} + +static int glue(symcmp, SZ)(const void *s0, const void *s1) +{ + struct elf_sym *sym0 = (struct elf_sym *)s0; + struct elf_sym *sym1 = (struct elf_sym *)s1; + return (sym0->st_value < sym1->st_value) + ? -1 + : ((sym0->st_value > sym1->st_value) ? 1 : 0); +} + static int glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab) { struct elf_shdr *symtab, *strtab, *shdr_table = NULL; struct elf_sym *syms = NULL; -#if (SZ == 64) - struct elf32_sym *syms32 = NULL; -#endif struct syminfo *s; int nsyms, i; char *str = NULL; @@ -90,21 +127,28 @@ goto fail; nsyms = symtab->sh_size / sizeof(struct elf_sym); -#if (SZ == 64) - syms32 = qemu_mallocz(nsyms * sizeof(struct elf32_sym)); -#endif + for (i = 0; i < nsyms; i++) { if (must_swab) glue(bswap_sym, SZ)(&syms[i]); -#if (SZ == 64) - syms32[i].st_name = syms[i].st_name; - syms32[i].st_info = syms[i].st_info; - syms32[i].st_other = syms[i].st_other; - syms32[i].st_shndx = syms[i].st_shndx; - syms32[i].st_value = syms[i].st_value & 0xffffffff; - syms32[i].st_size = syms[i].st_size & 0xffffffff; + // Throw away entries which we do not need. + // This reduces a typical table size from 1764487 to 11656 entries. + while ((syms[i].st_shndx == SHN_UNDEF || + syms[i].st_shndx >= SHN_LORESERVE || + ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) && i < --nsyms) { + syms[i] = syms[nsyms]; + if (must_swab) + glue(bswap_sym, SZ)(&syms[i]); + } +#if defined(TARGET_ARM) || defined (TARGET_MIPS) + /* The bottom address bit marks a Thumb or MIPS16 symbol. */ + syms[i].st_value &= ~(target_ulong)1; #endif } + syms = qemu_realloc(syms, nsyms * sizeof(*syms)); + + qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ)); + /* String table */ if (symtab->sh_link >= ehdr->e_shnum) goto fail; @@ -112,16 +156,12 @@ str = load_at(fd, strtab->sh_offset, strtab->sh_size); if (!str) - goto fail; + goto fail; /* Commit */ s = qemu_mallocz(sizeof(*s)); -#if (SZ == 64) - s->disas_symtab = syms32; - qemu_free(syms); -#else - s->disas_symtab = syms; -#endif + s->lookup_symbol = glue(lookup_symbol, SZ); + glue(s->disas_symtab.elf, SZ) = syms; s->disas_num_syms = nsyms; s->disas_strtab = str; s->next = syminfos; @@ -129,9 +169,6 @@ qemu_free(shdr_table); return 0; fail: -#if (SZ == 64) - qemu_free(syms32); -#endif qemu_free(syms); qemu_free(str); qemu_free(shdr_table); --------------020201080508020700000603--