qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Luiz Capitulino <lcapitulino@redhat.com>
To: Wen Congyang <wency@cn.fujitsu.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>,
	HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>,
	Dave Anderson <anderson@redhat.com>,
	qemu-devel <qemu-devel@nongnu.org>,
	Eric Blake <eblake@redhat.com>
Subject: Re: [Qemu-devel] [RFC][PATCH 11/14 v9] introduce a new monitor command 'dump' to dump guest's memory
Date: Wed, 14 Mar 2012 14:18:47 -0300	[thread overview]
Message-ID: <20120314141847.4557b4bb@doriath.home> (raw)
In-Reply-To: <4F5FFE57.8010308@cn.fujitsu.com>

On Wed, 14 Mar 2012 10:11:35 +0800
Wen Congyang <wency@cn.fujitsu.com> wrote:

> The command's usage:
>    dump [-p] file
> file should be start with "file:"(the file's path) or "fd:"(the fd's name).
> 
> Note:
>   1. If you want to use gdb to analyse the core, please specify -p option.
>   2. This command doesn't support the fd that is is associated with a pipe,
>      socket, or FIFO(lseek will fail with such fd).
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> ---
>  Makefile.target  |    2 +-
>  dump.c           |  714 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  elf.h            |    5 +
>  hmp-commands.hx  |   21 ++
>  hmp.c            |   10 +
>  hmp.h            |    1 +
>  qapi-schema.json |   14 +
>  qmp-commands.hx  |   34 +++
>  8 files changed, 800 insertions(+), 1 deletions(-)
>  create mode 100644 dump.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index c81c4fa..287fbe7 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -213,7 +213,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
>  obj-$(CONFIG_VGA) += vga.o
>  obj-y += memory.o savevm.o
>  obj-y += memory_mapping.o
> -obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
> +obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o dump.o
>  LIBS+=-lz
>  
>  obj-i386-$(CONFIG_KVM) += hyperv.o
> diff --git a/dump.c b/dump.c
> new file mode 100644
> index 0000000..42e1681
> --- /dev/null
> +++ b/dump.c
> @@ -0,0 +1,714 @@
> +/*
> + * QEMU dump
> + *
> + * Copyright Fujitsu, Corp. 2011
> + *
> + * Authors:
> + *     Wen Congyang <wency@cn.fujitsu.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include <unistd.h>
> +#include "elf.h"
> +#include <sys/procfs.h>
> +#include <glib.h>
> +#include "cpu.h"
> +#include "cpu-all.h"
> +#include "targphys.h"
> +#include "monitor.h"
> +#include "kvm.h"
> +#include "dump.h"
> +#include "sysemu.h"
> +#include "bswap.h"
> +#include "memory_mapping.h"
> +#include "error.h"
> +#include "qmp-commands.h"
> +#include "gdbstub.h"
> +
> +static inline uint16_t cpu_convert_to_target16(uint16_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le16(val);
> +    } else {
> +        val = cpu_to_be16(val);
> +    }
> +
> +    return val;
> +}
> +
> +static inline uint32_t cpu_convert_to_target32(uint32_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le32(val);
> +    } else {
> +        val = cpu_to_be32(val);
> +    }
> +
> +    return val;
> +}
> +
> +static inline uint64_t cpu_convert_to_target64(uint64_t val, int endian)
> +{
> +    if (endian == ELFDATA2LSB) {
> +        val = cpu_to_le64(val);
> +    } else {
> +        val = cpu_to_be64(val);
> +    }
> +
> +    return val;
> +}
> +
> +enum {
> +    DUMP_STATE_ERROR,
> +    DUMP_STATE_SETUP,
> +    DUMP_STATE_CANCELLED,
> +    DUMP_STATE_ACTIVE,
> +    DUMP_STATE_COMPLETED,
> +};
> +
> +typedef struct DumpState {
> +    ArchDumpInfo dump_info;
> +    MemoryMappingList list;
> +    uint16_t phdr_num;
> +    uint32_t sh_info;
> +    bool have_section;
> +    int state;
> +    bool resume;
> +    char *error;
> +    target_phys_addr_t memory_offset;
> +    write_core_dump_function f;
> +    void (*cleanup)(void *opaque);
> +    void *opaque;
> +} DumpState;
> +
> +static DumpState *dump_get_current(void)
> +{
> +    static DumpState current_dump = {
> +        .state = DUMP_STATE_SETUP,
> +    };
> +
> +    return &current_dump;
> +}

You just dropped a few asynchronous bits and resent this as a synchronous
command, letting all the asynchronous infrastructure in. This is bad, as the
command is more complex then it should be and doesn't make full use of the
added infrastructure.

For example, does the synchronous version really uses DumpState? If it doesn't,
let's just drop it and everything else which is not necessary.

*However*, note that while it's fine with me to have this as a synchronous
command we need a few more ACKs (from libvirt and Anthony and/or Jan). So, I
wouldn't go too far on making changes before we get those ACKs.

> +
> +static int dump_cleanup(DumpState *s)
> +{
> +    int ret = 0;
> +
> +    memory_mapping_list_free(&s->list);
> +    s->cleanup(s->opaque);
> +    if (s->resume) {
> +        vm_start();
> +    }
> +
> +    return ret;
> +}
> +
> +static void dump_error(DumpState *s, const char *reason)
> +{
> +    s->state = DUMP_STATE_ERROR;
> +    s->error = g_strdup(reason);
> +    dump_cleanup(s);
> +}
> +
> +static int write_elf64_header(DumpState *s)
> +{
> +    Elf64_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
> +    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
> +    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_header(DumpState *s)
> +{
> +    Elf32_Ehdr elf_header;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
> +    memcpy(&elf_header, ELFMAG, 4);
> +    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
> +    elf_header.e_ident[EI_DATA] = endian;
> +    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
> +    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
> +    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
> +                                                   endian);
> +    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
> +    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
> +    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
> +    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
> +                                                     endian);
> +    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
> +    if (s->have_section) {
> +        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
> +
> +        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
> +        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
> +                                                         endian);
> +        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
> +    }
> +
> +    ret = s->f(0, &elf_header, sizeof(elf_header), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write elf header.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
> +    phdr.p_offset = cpu_convert_to_target64(offset, endian);
> +    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
> +    }
> +    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
> +    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
> +
> +    phdr_offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*phdr_index;
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
> +                            int phdr_index, target_phys_addr_t offset)
> +{
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int ret;
> +    int endian = s->dump_info.d_endian;
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
> +    phdr.p_offset = cpu_convert_to_target32(offset, endian);
> +    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
> +    if (offset == -1) {
> +        phdr.p_filesz = 0;
> +    } else {
> +        phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
> +    }
> +    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
> +    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
> +
> +    phdr_offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*phdr_index;
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf64_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf64_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = gdb_id(env);
> +        ret = cpu_write_elf64_note(s->f, env, id, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        ret = cpu_write_elf64_qemunote(s->f, env, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write CPU status.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf64_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
> +    phdr.p_offset = cpu_convert_to_target64(begin, endian);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = cpu_convert_to_target64(*offset - begin, endian);
> +    phdr.p_memsz = cpu_convert_to_target64(*offset - begin, endian);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf64_Ehdr);
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf64_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf32_notes(DumpState *s, int phdr_index,
> +                             target_phys_addr_t *offset)
> +{
> +    CPUState *env;
> +    int ret;
> +    target_phys_addr_t begin = *offset;
> +    Elf32_Phdr phdr;
> +    off_t phdr_offset;
> +    int id;
> +    int endian = s->dump_info.d_endian;
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        id = gdb_id(env);
> +        ret = cpu_write_elf32_note(s->f, env, id, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write elf notes.\n");
> +            return -1;
> +        }
> +    }
> +
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        ret = cpu_write_elf32_qemunote(s->f, env, offset, s->opaque);
> +        if (ret < 0) {
> +            dump_error(s, "dump: failed to write CPU status.\n");
> +            return -1;
> +        }
> +    }
> +
> +    memset(&phdr, 0, sizeof(Elf32_Phdr));
> +    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
> +    phdr.p_offset = cpu_convert_to_target32(begin, endian);
> +    phdr.p_paddr = 0;
> +    phdr.p_filesz = cpu_convert_to_target32(*offset - begin, endian);
> +    phdr.p_memsz = cpu_convert_to_target32(*offset - begin, endian);
> +    phdr.p_vaddr = 0;
> +
> +    phdr_offset = sizeof(Elf32_Ehdr);
> +    ret = s->f(phdr_offset, &phdr, sizeof(Elf32_Phdr), s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write program header table.\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int write_elf_section(DumpState *s, target_phys_addr_t *offset, int type)
> +{
> +    Elf32_Shdr shdr32;
> +    Elf64_Shdr shdr64;
> +    int endian = s->dump_info.d_endian;
> +    int shdr_size;
> +    void *shdr;
> +    int ret;
> +
> +    if (type == 0) {
> +        shdr_size = sizeof(Elf32_Shdr);
> +        memset(&shdr32, 0, shdr_size);
> +        shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
> +        shdr = &shdr32;
> +    } else {
> +        shdr_size = sizeof(Elf64_Shdr);
> +        memset(&shdr64, 0, shdr_size);
> +        shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
> +        shdr = &shdr64;
> +    }
> +
> +    ret = s->f(*offset, &shdr, shdr_size, s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to write section header table.\n");
> +        return -1;
> +    }
> +
> +    *offset += shdr_size;
> +    return 0;
> +}
> +
> +static int write_data(DumpState *s, void *buf, int length,
> +                      target_phys_addr_t *offset)
> +{
> +    int ret;
> +
> +    ret = s->f(*offset, buf, length, s->opaque);
> +    if (ret < 0) {
> +        dump_error(s, "dump: failed to save memory.\n");
> +        return -1;
> +    }
> +
> +    *offset += length;
> +    return 0;
> +}
> +
> +/* write the memroy to vmcore. 1 page per I/O. */
> +static int write_memory(DumpState *s, RAMBlock *block,
> +                        target_phys_addr_t *offset)
> +{
> +    int i, ret;
> +
> +    for (i = 0; i < block->length / TARGET_PAGE_SIZE; i++) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    if ((block->length % TARGET_PAGE_SIZE) != 0) {
> +        ret = write_data(s, block->host + i * TARGET_PAGE_SIZE,
> +                         block->length % TARGET_PAGE_SIZE, offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* get the memory's offset in the vmcore */
> +static target_phys_addr_t get_offset(target_phys_addr_t phys_addr,
> +                                     target_phys_addr_t memory_offset)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = memory_offset;
> +
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        if (phys_addr >= block->offset &&
> +            phys_addr < block->offset + block->length) {
> +            return phys_addr - block->offset + offset;
> +        }
> +        offset += block->length;
> +    }
> +
> +    return -1;
> +}
> +
> +/* write elf header, PT_NOTE and elf note to vmcore. */
> +static int dump_begin(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    int ret;
> +
> +    s->state = DUMP_STATE_ACTIVE;
> +
> +    /*
> +     * the vmcore's format is:
> +     *   --------------
> +     *   |  elf header |
> +     *   --------------
> +     *   |  PT_NOTE    |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  ......     |
> +     *   --------------
> +     *   |  PT_LOAD    |
> +     *   --------------
> +     *   |  sec_hdr    |
> +     *   --------------
> +     *   |  elf note   |
> +     *   --------------
> +     *   |  memory     |
> +     *   --------------
> +     *
> +     * we only know where the memory is saved after we write elf note into
> +     * vmcore.
> +     */
> +
> +    /* write elf header to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        ret = write_elf64_header(s);
> +    } else {
> +        ret = write_elf32_header(s);
> +    }
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    /* write elf section and notes to vmcore */
> +    if (s->dump_info.d_class == ELFCLASS64) {
> +        if (s->have_section) {
> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->sh_info;
> +            if (write_elf_section(s, &offset, 1) < 0) {
> +                return -1;
> +            }
> +        } else {
> +            offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr)*s->phdr_num;
> +        }
> +        ret = write_elf64_notes(s, 0, &offset);
> +    } else {
> +        if (s->have_section) {
> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->sh_info;
> +            if (write_elf_section(s, &offset, 0) < 0) {
> +                return -1;
> +            }
> +        } else {
> +            offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr)*s->phdr_num;
> +        }
> +        ret = write_elf32_notes(s, 0, &offset);
> +    }
> +
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    s->memory_offset = offset;
> +    return 0;
> +}
> +
> +/* write PT_LOAD to vmcore */
> +static int dump_completed(DumpState *s)
> +{
> +    target_phys_addr_t offset;
> +    MemoryMapping *memory_mapping;
> +    int phdr_index = 1, ret;
> +
> +    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
> +        offset = get_offset(memory_mapping->phys_addr, s->memory_offset);
> +        if (s->dump_info.d_class == ELFCLASS64) {
> +            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
> +        } else {
> +            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
> +        }
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    s->state = DUMP_STATE_COMPLETED;
> +    dump_cleanup(s);
> +    return 0;
> +}
> +
> +/* write all memory to vmcore */
> +static int dump_iterate(DumpState *s)
> +{
> +    RAMBlock *block;
> +    target_phys_addr_t offset = s->memory_offset;
> +    int ret;
> +
> +    /* write all memory to vmcore */
> +    QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        ret = write_memory(s, block, &offset);
> +        if (ret < 0) {
> +            return -1;
> +        }
> +    }
> +
> +    return dump_completed(s);
> +}
> +
> +static int create_vmcore(DumpState *s)
> +{
> +    int ret;
> +
> +    ret = dump_begin(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    ret = dump_iterate(s);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static DumpState *dump_init(bool paging, Error **errp)
> +{
> +    CPUState *env;
> +    DumpState *s = dump_get_current();
> +    int ret;
> +
> +    if (runstate_is_running()) {
> +        vm_stop(RUN_STATE_PAUSED);
> +        s->resume = true;

Hmm, you actually stop the VM. Seems obvious now, but when people talked about
making this asynchronous I automatically assumed that what we didn't want was
having the global mutex held for too much time (ie. while this command was
running).

The only disadvantage of having this as a synchronous command is that libvirt
won't be able to cancel it and won't be able to run other commands in parallel.
Doesn't seem that serious to me.

Btw, RUN_STATE_PAUSED is not a good one. Doesn't matter that much, as this
is unlikely to be visible, but you should use RUN_STATE_SAVE_VM or
RUN_STATE_DEBUG.

> +    } else {
> +        s->resume = false;
> +    }
> +    s->state = DUMP_STATE_SETUP;
> +    if (s->error) {
> +        g_free(s->error);
> +        s->error = NULL;
> +    }
> +
> +    /*
> +     * get dump info: endian, class and architecture.
> +     * If the target architecture is not supported, cpu_get_dump_info() will
> +     * return -1.
> +     *
> +     * if we use kvm, we should synchronize the register before we get dump
> +     * info.
> +     */
> +    for (env = first_cpu; env != NULL; env = env->next_cpu) {
> +        cpu_synchronize_state(env);
> +    }
> +
> +    ret = cpu_get_dump_info(&s->dump_info);
> +    if (ret < 0) {
> +        error_set(errp, QERR_UNSUPPORTED);

This will let the VM paused.

> +        return NULL;
> +    }
> +
> +    /* get memory mapping */
> +    memory_mapping_list_init(&s->list);
> +    if (paging) {
> +        qemu_get_guest_memory_mapping(&s->list);
> +    } else {
> +        qemu_get_guest_simple_memory_mapping(&s->list);
> +    }
> +
> +    /*
> +     * calculate phdr_num
> +     *
> +     * the type of phdr->num is uint16_t, so we should avoid overflow
> +     */
> +    s->phdr_num = 1; /* PT_NOTE */
> +    if (s->list.num < (1 << 16) - 2) {
> +        s->phdr_num += s->list.num;
> +        s->have_section = false;
> +    } else {
> +        s->have_section = true;
> +        s->phdr_num = PN_XNUM;
> +
> +        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
> +        if (s->list.num > (1ULL << 32) - 2) {
> +            s->sh_info = 0xffffffff;
> +        } else {
> +            s->sh_info += s->list.num;
> +        }
> +    }
> +
> +    return s;
> +}
> +
> +static int fd_write_vmcore(target_phys_addr_t offset, void *buf, size_t size,
> +                           void *opaque)
> +{
> +    int fd = (int)(intptr_t)opaque;
> +    int ret;
> +
> +    ret = lseek(fd, offset, SEEK_SET);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    ret = write(fd, buf, size);
> +    if (ret != size) {
> +        return -1;
> +    }

I think you should use send_all() instead of plain write().

> +
> +    return 0;
> +}
> +
> +static void fd_cleanup(void *opaque)
> +{
> +    int fd = (int)(intptr_t)opaque;
> +
> +    if (fd != -1) {
> +        close(fd);
> +    }
> +}
> +
> +static DumpState *dump_init_fd(int fd, bool paging, Error **errp)
> +{
> +    DumpState *s = dump_init(paging, errp);
> +
> +    if (s == NULL) {
> +        return NULL;
> +    }
> +
> +    s->f = fd_write_vmcore;
> +    s->cleanup = fd_cleanup;
> +    s->opaque = (void *)(intptr_t)fd;

Do we really need all these indirections?

> +
> +    return s;
> +}
> +
> +void qmp_dump(bool paging, const char *file, Error **errp)
> +{
> +    const char *p;
> +    int fd = -1;
> +    DumpState *s;
> +
> +#if !defined(WIN32)
> +    if (strstart(file, "fd:", &p)) {
> +        fd = qemu_get_fd(p);

qemu_get_fd() won't be merged, you should use monitor_get_fd(cur_mon, p);

> +        if (fd == -1) {
> +            error_set(errp, QERR_FD_NOT_FOUND, p);
> +            return;
> +        }
> +    }
> +#endif
> +
> +    if  (strstart(file, "file:", &p)) {
> +        fd = open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);

This is minor, but I'd use qemu_open() here.

> +        if (fd < 0) {
> +            error_set(errp, QERR_OPEN_FILE_FAILED, p);
> +            return;
> +        }
> +    }
> +
> +    if (fd == -1) {
> +        error_set(errp, QERR_INVALID_PARAMETER, "file");
> +        return;
> +    }
> +
> +    s = dump_init_fd(fd, paging, errp);
> +    if (!s) {
> +        return;
> +    }
> +
> +    if (create_vmcore(s) < 0) {
> +        error_set(errp, QERR_IO_ERROR);
> +    }
> +}
> diff --git a/elf.h b/elf.h
> index 2e05d34..6a10657 100644
> --- a/elf.h
> +++ b/elf.h
> @@ -1000,6 +1000,11 @@ typedef struct elf64_sym {
>  
>  #define EI_NIDENT	16
>  
> +/* Special value for e_phnum.  This indicates that the real number of
> +   program headers is too large to fit into e_phnum.  Instead the real
> +   value is in the field sh_info of section 0.  */
> +#define PN_XNUM         0xffff
> +
>  typedef struct elf32_hdr{
>    unsigned char	e_ident[EI_NIDENT];
>    Elf32_Half	e_type;
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 6980214..d4cf2e5 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -880,6 +880,27 @@ server will ask the spice/vnc client to automatically reconnect using the
>  new parameters (if specified) once the vm migration finished successfully.
>  ETEXI
>  
> +#if defined(CONFIG_HAVE_CORE_DUMP)
> +    {
> +        .name       = "dump",
> +        .args_type  = "paging:-p,file:s",
> +        .params     = "[-p] file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd = hmp_dump,
> +    },
> +
> +
> +STEXI
> +@item dump [-p] @var{file}
> +@findex dump
> +Dump to @var{file}. The file can be processed with crash or gdb.
> +    file: destination file(started with "file:") or destination file descriptor
> +          (started with "fd:")
> +  paging: do paging to get guest's memory mapping
> +ETEXI
> +#endif
> +
>      {
>          .name       = "snapshot_blkdev",
>          .args_type  = "reuse:-n,device:B,snapshot-file:s?,format:s?",
> diff --git a/hmp.c b/hmp.c
> index 290c43d..e13b793 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -860,3 +860,13 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict)
>  
>      hmp_handle_error(mon, &error);
>  }
> +
> +void hmp_dump(Monitor *mon, const QDict *qdict)
> +{
> +    Error *errp = NULL;
> +    int paging = qdict_get_try_bool(qdict, "paging", 0);
> +    const char *file = qdict_get_str(qdict, "file");
> +
> +    qmp_dump(!!paging, file, &errp);

Why the double negation on 'paging'?

> +    hmp_handle_error(mon, &errp);
> +}
> diff --git a/hmp.h b/hmp.h
> index 5409464..b055e50 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -59,5 +59,6 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict);
>  void hmp_block_stream(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
>  void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
> +void hmp_dump(Monitor *mon, const QDict *qdict);
>  
>  #endif
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 04fa84f..81b8c7c 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -1663,3 +1663,17 @@
>  { 'command': 'qom-list-types',
>    'data': { '*implements': 'str', '*abstract': 'bool' },
>    'returns': [ 'ObjectTypeInfo' ] }
> +
> +##
> +# @dump

'dump' is too generic, please call this dump-guest-memory-vmcore or something
more descriptive.

> +#
> +# Dump guest's memory to vmcore.
> +#
> +# @paging: if true, do paging to get guest's memory mapping
> +# @file: the filename or file descriptor of the vmcore.

'file' is not a good name because it can also dump to an fd, maybe 'protocol'?

> +#
> +# Returns: nothing on success
> +#
> +# Since: 1.1
> +##
> +{ 'command': 'dump', 'data': { 'paging': 'bool', 'file': 'str' } }
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index dfe8a5b..9e39bd9 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -586,6 +586,40 @@ Example:
>  
>  EQMP
>  
> +#if defined(CONFIG_HAVE_CORE_DUMP)
> +    {
> +        .name       = "dump",
> +        .args_type  = "paging:-p,file:s",
> +        .params     = "[-p] file",
> +        .help       = "dump to file",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = qmp_marshal_input_dump,
> +    },
> +
> +SQMP
> +dump
> +
> +
> +Dump to file. The file can be processed with crash or gdb.
> +
> +Arguments:
> +
> +- "paging": do paging to get guest's memory mapping (json-bool)
> +- "file": destination file(started with "file:") or destination file descriptor
> +          (started with "fd:") (json-string)
> +
> +Example:
> +
> +-> { "execute": "dump", "arguments": { "file": "fd:dump" } }
> +<- { "return": {} }
> +
> +Notes:
> +
> +(1) All boolean arguments default to false
> +
> +EQMP
> +#endif
> +
>      {
>          .name       = "netdev_add",
>          .args_type  = "netdev:O",

  reply	other threads:[~2012-03-14 17:19 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-14  2:03 [Qemu-devel] [RFC][PATCH 00/14 v9] introducing a new, dedicated memory dump mechanism Wen Congyang
2012-03-14  2:05 ` [Qemu-devel] [RFC][PATCH 01/14 v9] Add API to create memory mapping list Wen Congyang
2012-03-14  2:06 ` [Qemu-devel] [RFC][PATCH 02/14 v9] Add API to check whether a physical address is I/O address Wen Congyang
2012-03-14  9:18   ` [Qemu-devel] [RESEND][PATCH " Wen Congyang
2012-03-14  2:06 ` [Qemu-devel] [RFC][PATCH 03/14 v9] implement cpu_get_memory_mapping() Wen Congyang
2012-03-14  2:07 ` [Qemu-devel] [RFC][PATCH 04/14 v9] Add API to check whether paging mode is enabled Wen Congyang
2012-03-14  2:07 ` [Qemu-devel] [RFC][PATCH 05/14 v9] Add API to get memory mapping Wen Congyang
2012-03-16  3:52   ` HATAYAMA Daisuke
2012-03-16  6:50     ` Wen Congyang
2012-03-16  6:38   ` HATAYAMA Daisuke
2012-03-16  6:59     ` Wen Congyang
2012-03-14  2:08 ` [Qemu-devel] [RFC][PATCH 06/14 v9] Add API to get memory mapping without do paging Wen Congyang
2012-03-14  2:08 ` [Qemu-devel] [RFC][PATCH 07/14 v9] target-i386: Add API to write elf notes to core file Wen Congyang
2012-03-16  1:17   ` HATAYAMA Daisuke
2012-03-14  2:09 ` [Qemu-devel] [RFC][PATCH 08/14 v9] target-i386: Add API to write cpu status " Wen Congyang
2012-03-16  1:48   ` HATAYAMA Daisuke
2012-03-16  6:50     ` Wen Congyang
2012-03-19  1:09       ` HATAYAMA Daisuke
2012-03-14  2:09 ` [Qemu-devel] [RFC][PATCH 09/14 v9] target-i386: add API to get dump info Wen Congyang
2012-03-14  2:10 ` [Qemu-devel] [RFC][PATCH 10/14 v9] make gdb_id() generally avialable Wen Congyang
2012-03-14  2:11 ` [Qemu-devel] [RFC][PATCH 11/14 v9] introduce a new monitor command 'dump' to dump guest's memory Wen Congyang
2012-03-14 17:18   ` Luiz Capitulino [this message]
2012-03-15  2:29     ` Wen Congyang
2012-03-15 14:25     ` Luiz Capitulino
2012-03-16 10:13     ` Wen Congyang
2012-03-19  2:28     ` Wen Congyang
2012-03-19  8:31       ` Wen Congyang
2012-03-19 13:16       ` Luiz Capitulino
2012-03-16  3:23   ` HATAYAMA Daisuke
2012-03-16  6:41     ` Wen Congyang
2012-03-14  2:12 ` [Qemu-devel] [RFC][PATCH 12/14 v9] support to cancel the current dumping Wen Congyang
2012-03-14 17:19   ` Luiz Capitulino
2012-03-14  2:13 ` [Qemu-devel] [RFC][PATCH 13/14 v9] support to query dumping status Wen Congyang
2012-03-14 17:19   ` Luiz Capitulino
2012-03-14  2:13 ` [Qemu-devel] [RFC][PATCH 14/14 v9] allow user to dump a fraction of the memory Wen Congyang
2012-03-14 17:20   ` Luiz Capitulino
2012-03-14 17:26 ` [Qemu-devel] [RFC][PATCH 00/14 v9] introducing a new, dedicated memory dump mechanism Luiz Capitulino
2012-03-14 17:37   ` Eric Blake
2012-03-14 17:49   ` Anthony Liguori
2012-03-14 18:03     ` Luiz Capitulino

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120314141847.4557b4bb@doriath.home \
    --to=lcapitulino@redhat.com \
    --cc=anderson@redhat.com \
    --cc=d.hatayama@jp.fujitsu.com \
    --cc=eblake@redhat.com \
    --cc=jan.kiszka@siemens.com \
    --cc=qemu-devel@nongnu.org \
    --cc=wency@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).