qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Weil <weil@mail.berlios.de>
To: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Improve symbol lookup for system and user mode
Date: Fri, 17 Oct 2008 19:28:44 +0200	[thread overview]
Message-ID: <48F8CB4C.30800@mail.berlios.de> (raw)
In-Reply-To: <48F7A4D6.8070405@mail.berlios.de>

[-- Attachment #1: Type: text/plain, Size: 1470 bytes --]


> Stefan Weil schrieb:
>> Here is a short summary of my new patch:
>>
>> * Use function pointers for symbol lookup (currently for elf32 and
>> elf64, could be expanded).
>> This also fixes the bug with mips elf64 symbols in current Qemu trunk.
>>
>> * Use quicksort and binary search for symbol lookup.
>>
>> * Remove unneeded entries from symbol table.
>> This reduced a typical table size (linux mips kernel) from 1764487 to
>> 11656 entries.
>>
>> * In disas.c, the patch also fixes some warnings from old fashioned
>> function prototypes.
>>
>> In loader.c, two defines control some compile time options (could be
>> removed in
>> production code):
>> #define CONFIG_BINARY_SYMBOL_SEARCH
>> #define CONFIG_REDUCE_SYMBOL_TABLE
>>
>> I tested the new code using 32 bit and 64 bit linux mips kernels and
>> Qemu logging (-d in_asm).
>> The speed improvement is extremely large - both because of the much
>> smaller table and
>> the binary search.
>>
>> Stefan
>>
> Please note:
>
> The current patch only supports system emulation.
> User emulation needs more fixes to compile again.
>
> Stefan
>
>


Here is an updated patch which now supports binary symbol lookup
for both system emulation and user emulation.

User emulation was tested using qemu-x86_64 and a simple application
with symbol information.

The new patch no longer includes compile time options for the old
linear symbol search.

I hope this new patch will be included in Qemu trunk.

Regards
Stefan


[-- Attachment #2: lookup3.patch --]
[-- Type: text/x-diff, Size: 13186 bytes --]

* Use function pointers for symbol lookup (currently for elf32 and elf64, could be expanded).
  This also fixes the bug with mips elf64 symbols in current Qemu trunk.

* Use quicksort and binary search for symbol lookup.

* Remove unneeded entries from symbol table.
  This reduced a typical table size (linux mips kernel) from 1764487 to 11656 entries.

* In disas.c, the patch also fixes some warnings from old fashioned function prototypes.

Signed-off-by: Stefan Weil <weil@mail.berlios.de> 

Index: linux-user/elfload.c
===================================================================
--- linux-user/elfload.c	(Revision 5497)
+++ linux-user/elfload.c	(Arbeitskopie)
@@ -984,80 +984,130 @@
 	return ((abi_ulong) interp_elf_ex->e_entry) + load_addr;
 }
 
+static int symfind(const void *s0, const void *s1)
+{
+    struct elf_sym *key = (struct elf_sym *)s0;
+    struct elf_sym *sym = (struct elf_sym *)s1;
+    int result = 0;
+    if (key->st_value < sym->st_value) {
+        result = -1;
+    } else if (key->st_value > sym->st_value + sym->st_size) {
+        result = 1;
+    }
+    return result;
+}
+
+static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
+{
+#if ELF_CLASS == ELFCLASS32
+    struct elf_sym *syms = s->disas_symtab.elf32;
+#else
+    struct elf_sym *syms = s->disas_symtab.elf64;
+#endif
+
+    // binary search
+    struct elf_sym key;
+    struct elf_sym *sym;
+
+    key.st_value = orig_addr;
+
+    sym = bsearch(&key, syms, s->disas_num_syms, sizeof(*syms), symfind);
+    if (sym != 0) {
+        return s->disas_strtab + sym->st_name;
+    }
+
+    return "";
+}
+
+static int symcmp(const void *s0, const void *s1)
+{
+    struct elf_sym *sym0 = (struct elf_sym *)s0;
+    struct elf_sym *sym1 = (struct elf_sym *)s1;
+    return (sym0->st_value < sym1->st_value)
+        ? -1
+        : ((sym0->st_value > sym1->st_value) ? 1 : 0);
+}
+
 /* Best attempt to load symbols from this ELF object. */
 static void load_symbols(struct elfhdr *hdr, int fd)
 {
-    unsigned int i;
+    unsigned int i, nsyms;
     struct elf_shdr sechdr, symtab, strtab;
     char *strings;
     struct syminfo *s;
-#if (ELF_CLASS == ELFCLASS64)
-    // Disas uses 32 bit symbols
-    struct elf32_sym *syms32 = NULL;
-    struct elf_sym *sym;
-#endif
+    struct elf_sym *syms;
 
     lseek(fd, hdr->e_shoff, SEEK_SET);
     for (i = 0; i < hdr->e_shnum; i++) {
-	if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr))
-	    return;
+        if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr))
+            return;
 #ifdef BSWAP_NEEDED
-	bswap_shdr(&sechdr);
+        bswap_shdr(&sechdr);
 #endif
-	if (sechdr.sh_type == SHT_SYMTAB) {
-	    symtab = sechdr;
-	    lseek(fd, hdr->e_shoff
-		  + sizeof(sechdr) * sechdr.sh_link, SEEK_SET);
-	    if (read(fd, &strtab, sizeof(strtab))
-		!= sizeof(strtab))
-		return;
+        if (sechdr.sh_type == SHT_SYMTAB) {
+            symtab = sechdr;
+            lseek(fd, hdr->e_shoff
+                  + sizeof(sechdr) * sechdr.sh_link, SEEK_SET);
+            if (read(fd, &strtab, sizeof(strtab))
+                != sizeof(strtab))
+                return;
 #ifdef BSWAP_NEEDED
-	    bswap_shdr(&strtab);
+            bswap_shdr(&strtab);
 #endif
-	    goto found;
-	}
+            goto found;
+        }
     }
     return; /* Shouldn't happen... */
 
  found:
     /* Now know where the strtab and symtab are.  Snarf them. */
     s = malloc(sizeof(*s));
-    s->disas_symtab = malloc(symtab.sh_size);
-#if (ELF_CLASS == ELFCLASS64)
-    syms32 = malloc(symtab.sh_size / sizeof(struct elf_sym)
-                    * sizeof(struct elf32_sym));
-#endif
+    syms = malloc(symtab.sh_size);
+    if (!syms)
+        return;
     s->disas_strtab = strings = malloc(strtab.sh_size);
-    if (!s->disas_symtab || !s->disas_strtab)
-	return;
+    if (!s->disas_strtab)
+        return;
 
     lseek(fd, symtab.sh_offset, SEEK_SET);
-    if (read(fd, s->disas_symtab, symtab.sh_size) != symtab.sh_size)
-	return;
+    if (read(fd, syms, symtab.sh_size) != symtab.sh_size)
+        return;
 
-    for (i = 0; i < symtab.sh_size / sizeof(struct elf_sym); i++) {
+    nsyms = symtab.sh_size / sizeof(struct elf_sym);
+
+    for (i = 0; i < nsyms; i++) {
 #ifdef BSWAP_NEEDED
-	bswap_sym(s->disas_symtab + sizeof(struct elf_sym)*i);
+        bswap_sym(syms + i);
 #endif
-#if (ELF_CLASS == ELFCLASS64)
-        sym = s->disas_symtab + sizeof(struct elf_sym)*i;
-        syms32[i].st_name = sym->st_name;
-        syms32[i].st_info = sym->st_info;
-        syms32[i].st_other = sym->st_other;
-        syms32[i].st_shndx = sym->st_shndx;
-        syms32[i].st_value = sym->st_value & 0xffffffff;
-        syms32[i].st_size = sym->st_size & 0xffffffff;
+        // Throw away entries which we do not need.
+        while ((syms[i].st_shndx == SHN_UNDEF ||
+                syms[i].st_shndx >= SHN_LORESERVE ||
+                ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) && i < --nsyms) {
+            syms[i] = syms[nsyms];
+#ifdef BSWAP_NEEDED
+            bswap_sym(syms + i);
 #endif
+        }
+#if defined(TARGET_ARM) || defined (TARGET_MIPS)
+        /* The bottom address bit marks a Thumb or MIPS16 symbol.  */
+        syms[i].st_value &= ~(target_ulong)1;
+#endif
     }
+    syms = realloc(syms, nsyms * sizeof(*syms));
 
-#if (ELF_CLASS == ELFCLASS64)
-    free(s->disas_symtab);
-    s->disas_symtab = syms32;
-#endif
+    qsort(syms, nsyms, sizeof(*syms), symcmp);
+
     lseek(fd, strtab.sh_offset, SEEK_SET);
     if (read(fd, strings, strtab.sh_size) != strtab.sh_size)
-	return;
-    s->disas_num_syms = symtab.sh_size / sizeof(struct elf_sym);
+        return;
+    s->disas_num_syms = nsyms;
+#if ELF_CLASS == ELFCLASS32
+    s->disas_symtab.elf32 = syms;
+    s->lookup_symbol = lookup_symbolxx;
+#else
+    s->disas_symtab.elf64 = syms;
+    s->lookup_symbol = lookup_symbolxx;
+#endif
     s->next = syminfos;
     syminfos = s;
 }
Index: disas.c
===================================================================
--- disas.c	(Revision 5497)
+++ disas.c	(Arbeitskopie)
@@ -13,12 +13,8 @@
 
 /* Get LENGTH bytes from info's buffer, at target address memaddr.
    Transfer them to myaddr.  */
-int
-buffer_read_memory (memaddr, myaddr, length, info)
-     bfd_vma memaddr;
-     bfd_byte *myaddr;
-     int length;
-     struct disassemble_info *info;
+int buffer_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
+                       struct disassemble_info *info)
 {
     if (memaddr < info->buffer_vma
         || memaddr + length > info->buffer_vma + info->buffer_length)
@@ -46,10 +42,7 @@
 /* Print an error message.  We can assume that this is in response to
    an error return from buffer_read_memory.  */
 void
-perror_memory (status, memaddr, info)
-     int status;
-     bfd_vma memaddr;
-     struct disassemble_info *info;
+perror_memory (int status, bfd_vma memaddr, struct disassemble_info *info)
 {
   if (status != EIO)
     /* Can't happen.  */
@@ -69,9 +62,7 @@
    addresses).  */
 
 void
-generic_print_address (addr, info)
-     bfd_vma addr;
-     struct disassemble_info *info;
+generic_print_address (bfd_vma addr, struct disassemble_info *info)
 {
     (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
 }
@@ -79,9 +70,7 @@
 /* Just return the given address.  */
 
 int
-generic_symbol_at_address (addr, info)
-     bfd_vma addr;
-     struct disassemble_info * info;
+generic_symbol_at_address (bfd_vma addr, struct disassemble_info *info)
 {
   return 1;
 }
@@ -303,33 +292,17 @@
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
 const char *lookup_symbol(target_ulong orig_addr)
 {
-    unsigned int i;
-    /* Hack, because we know this is x86. */
-    Elf32_Sym *sym;
+    const char *symbol = "";
     struct syminfo *s;
-    target_ulong addr;
 
     for (s = syminfos; s; s = s->next) {
-	sym = s->disas_symtab;
-	for (i = 0; i < s->disas_num_syms; i++) {
-	    if (sym[i].st_shndx == SHN_UNDEF
-		|| sym[i].st_shndx >= SHN_LORESERVE)
-		continue;
-
-	    if (ELF_ST_TYPE(sym[i].st_info) != STT_FUNC)
-		continue;
-
-	    addr = sym[i].st_value;
-#if defined(TARGET_ARM) || defined (TARGET_MIPS)
-            /* The bottom address bit marks a Thumb or MIPS16 symbol.  */
-            addr &= ~(target_ulong)1;
-#endif
-	    if (orig_addr >= addr
-		&& orig_addr < addr + sym[i].st_size)
-		return s->disas_strtab + sym[i].st_name;
-	}
+        symbol = s->lookup_symbol(s, orig_addr);
+        if (symbol[0] != '\0') {
+            break;
+        }
     }
-    return "";
+
+    return symbol;
 }
 
 #if !defined(CONFIG_USER_ONLY)
Index: disas.h
===================================================================
--- disas.h	(Revision 5497)
+++ disas.h	(Arbeitskopie)
@@ -10,12 +10,24 @@
 /* Look up symbol for debugging purpose.  Returns "" if unknown. */
 const char *lookup_symbol(target_ulong orig_addr);
 
-/* Filled in by elfload.c.  Simplistic, but will do for now. */
-extern struct syminfo {
+struct syminfo;
+struct elf32_sym;
+struct elf64_sym;
+
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
+
+struct syminfo {
+    lookup_symbol_t lookup_symbol;
     unsigned int disas_num_syms;
-    void *disas_symtab;
+    union {
+      struct elf32_sym *elf32;
+      struct elf64_sym *elf64;
+    } disas_symtab;
     const char *disas_strtab;
     struct syminfo *next;
-} *syminfos;
+};
 
+/* Filled in by elfload.c.  Simplistic, but will do for now. */
+extern struct syminfo *syminfos;
+
 #endif /* _QEMU_DISAS_H */
Index: elf_ops.h
===================================================================
--- elf_ops.h	(Revision 5497)
+++ elf_ops.h	(Arbeitskopie)
@@ -60,13 +60,50 @@
     return NULL;
 }
 
+static int glue(symfind, SZ)(const void *s0, const void *s1)
+{
+    struct elf_sym *key = (struct elf_sym *)s0;
+    struct elf_sym *sym = (struct elf_sym *)s1;
+    int result = 0;
+    if (key->st_value < sym->st_value) {
+        result = -1;
+    } else if (key->st_value > sym->st_value + sym->st_size) {
+        result = 1;
+    }
+    return result;
+}
+
+static const char *glue(lookup_symbol, SZ)(struct syminfo *s, target_ulong orig_addr)
+{
+    struct elf_sym *syms = glue(s->disas_symtab.elf, SZ);
+
+    // binary search
+    struct elf_sym key;
+    struct elf_sym *sym;
+
+    key.st_value = orig_addr;
+
+    sym = bsearch(&key, syms, s->disas_num_syms, sizeof(*syms), glue(symfind, SZ));
+    if (sym != 0) {
+        return s->disas_strtab + sym->st_name;
+    }
+
+    return "";
+}
+
+static int glue(symcmp, SZ)(const void *s0, const void *s1)
+{
+    struct elf_sym *sym0 = (struct elf_sym *)s0;
+    struct elf_sym *sym1 = (struct elf_sym *)s1;
+    return (sym0->st_value < sym1->st_value)
+        ? -1
+        : ((sym0->st_value > sym1->st_value) ? 1 : 0);
+}
+
 static int glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab)
 {
     struct elf_shdr *symtab, *strtab, *shdr_table = NULL;
     struct elf_sym *syms = NULL;
-#if (SZ == 64)
-    struct elf32_sym *syms32 = NULL;
-#endif
     struct syminfo *s;
     int nsyms, i;
     char *str = NULL;
@@ -90,21 +127,28 @@
         goto fail;
 
     nsyms = symtab->sh_size / sizeof(struct elf_sym);
-#if (SZ == 64)
-    syms32 = qemu_mallocz(nsyms * sizeof(struct elf32_sym));
-#endif
+
     for (i = 0; i < nsyms; i++) {
         if (must_swab)
             glue(bswap_sym, SZ)(&syms[i]);
-#if (SZ == 64)
-	syms32[i].st_name = syms[i].st_name;
-	syms32[i].st_info = syms[i].st_info;
-	syms32[i].st_other = syms[i].st_other;
-	syms32[i].st_shndx = syms[i].st_shndx;
-	syms32[i].st_value = syms[i].st_value & 0xffffffff;
-	syms32[i].st_size = syms[i].st_size & 0xffffffff;
+        // Throw away entries which we do not need.
+        // This reduces a typical table size from 1764487 to 11656 entries.
+        while ((syms[i].st_shndx == SHN_UNDEF ||
+                syms[i].st_shndx >= SHN_LORESERVE ||
+                ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) && i < --nsyms) {
+            syms[i] = syms[nsyms];
+            if (must_swab)
+                glue(bswap_sym, SZ)(&syms[i]);
+        }
+#if defined(TARGET_ARM) || defined (TARGET_MIPS)
+        /* The bottom address bit marks a Thumb or MIPS16 symbol.  */
+        syms[i].st_value &= ~(target_ulong)1;
 #endif
     }
+    syms = qemu_realloc(syms, nsyms * sizeof(*syms));
+
+    qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ));
+
     /* String table */
     if (symtab->sh_link >= ehdr->e_shnum)
         goto fail;
@@ -112,16 +156,12 @@
 
     str = load_at(fd, strtab->sh_offset, strtab->sh_size);
     if (!str)
-	goto fail;
+        goto fail;
 
     /* Commit */
     s = qemu_mallocz(sizeof(*s));
-#if (SZ == 64)
-    s->disas_symtab = syms32;
-    qemu_free(syms);
-#else
-    s->disas_symtab = syms;
-#endif
+    s->lookup_symbol = glue(lookup_symbol, SZ);
+    glue(s->disas_symtab.elf, SZ) = syms;
     s->disas_num_syms = nsyms;
     s->disas_strtab = str;
     s->next = syminfos;
@@ -129,9 +169,6 @@
     qemu_free(shdr_table);
     return 0;
  fail:
-#if (SZ == 64)
-    qemu_free(syms32);
-#endif
     qemu_free(syms);
     qemu_free(str);
     qemu_free(shdr_table);

  parent reply	other threads:[~2008-10-17 17:28 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-02 19:26 [Qemu-devel] [PATCH] Fix symbol lookup for mips64* targets Stefan Weil
2008-10-02 19:53 ` Blue Swirl
2008-10-16 19:57   ` [Qemu-devel] [PATCH] Improve symbol lookup (was: Re: [Qemu-devel] [PATCH] Fix symbol lookup for mips64* targets) Stefan Weil
2008-10-16 20:32     ` [Qemu-devel] [PATCH] Improve symbol lookup Stefan Weil
2008-10-16 20:41       ` Laurent Desnogues
2008-10-17 17:28       ` Stefan Weil [this message]
2008-10-20 17:16         ` [Qemu-devel] [PATCH] Improve symbol lookup for system and user mode Blue Swirl
2008-10-16 20:38     ` [Qemu-devel] [PATCH] Improve symbol lookup (was: Re: [Qemu-devel] [PATCH] Fix symbol lookup for mips64* targets) Laurent Desnogues
2008-10-02 21:52 ` [Qemu-devel] [PATCH] Fix symbol lookup for mips64* targets Thiemo Seufer
2008-10-03 10:09   ` Stefan Weil
2008-10-03 10:29     ` Laurent Desnogues
2008-10-03 17:14       ` Blue Swirl
2008-10-03 17:13     ` Blue Swirl

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48F8CB4C.30800@mail.berlios.de \
    --to=weil@mail.berlios.de \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).