public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Anthony Liguori <aliguori@linux.vnet.ibm.com>
To: Yoshiaki Tamura <tamura.yoshiaki@lab.ntt.co.jp>
Cc: kvm@vger.kernel.org, qemu-devel@nongnu.org, avi@redhat.com,
	Anthony Liguori <aliguori@us.ibm.com>,
	mtosatti@redhat.com, ohmura.kei@lab.ntt.co.jp,
	yoshikawa.takuya@oss.ntt.co.jp
Subject: Re: [RFC PATCH 08/20] Introduce RAMSaveIO and use cpu_physical_memory_get_dirty_range() to check multiple dirty pages.
Date: Thu, 22 Apr 2010 14:31:00 -0500	[thread overview]
Message-ID: <4BD0A3F4.5050708@linux.vnet.ibm.com> (raw)
In-Reply-To: <1271829445-5328-9-git-send-email-tamura.yoshiaki@lab.ntt.co.jp>

On 04/21/2010 12:57 AM, Yoshiaki Tamura wrote:
> Introduce RAMSaveIO to use writev for saving ram blocks, and modifies
> ram_save_block() and ram_save_remaining() to use
> cpu_physical_memory_get_dirty_range() to check multiple dirty and
> non-dirty pages at once.
>
> Signed-off-by: Yoshiaki Tamura<tamura.yoshiaki@lab.ntt.co.jp>
> Signed-off-by: OHMURA Kei<ohmura.kei@lab.ntt.co.jp>
>    

Perf data?

Regards,

Anthony Liguori

> ---
>   vl.c |  221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
>   1 files changed, 197 insertions(+), 24 deletions(-)
>
> diff --git a/vl.c b/vl.c
> index 729c955..9c3dc4c 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -2774,12 +2774,167 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
>       return 1;
>   }
>
> -static int ram_save_block(QEMUFile *f)
> +typedef struct RAMSaveIO RAMSaveIO;
> +
> +struct RAMSaveIO {
> +    QEMUFile *f;
> +    QEMUIOVector *qiov;
> +
> +    uint8_t *ram_store;
> +    size_t nalloc, nused;
> +    uint8_t io_mode;
> +
> +    void (*put_buffer)(RAMSaveIO *s, uint8_t *buf, size_t len);
> +    void (*put_byte)(RAMSaveIO *s, int v);
> +    void (*put_be64)(RAMSaveIO *s, uint64_t v);
> +
> +};
> +
> +static inline void ram_saveio_flush(RAMSaveIO *s, int prepare)
> +{
> +    qemu_put_vector(s->f, s->qiov);
> +    if (prepare)
> +        qemu_put_vector_prepare(s->f);
> +
> +    /* reset stored data */
> +    qemu_iovec_reset(s->qiov);
> +    s->nused = 0;
> +}
> +
> +static inline void ram_saveio_put_buffer(RAMSaveIO *s, uint8_t *buf, size_t len)
> +{
> +    s->put_buffer(s, buf, len);
> +}
> +
> +static inline void ram_saveio_put_byte(RAMSaveIO *s, int v)
> +{
> +    s->put_byte(s, v);
> +}
> +
> +static inline void ram_saveio_put_be64(RAMSaveIO *s, uint64_t v)
> +{
> +    s->put_be64(s, v);
> +}
> +
> +static inline void ram_saveio_set_error(RAMSaveIO *s)
> +{
> +    qemu_file_set_error(s->f);
> +}
> +
> +static void ram_saveio_put_buffer_vector(RAMSaveIO *s, uint8_t *buf, size_t len)
> +{
> +    qemu_iovec_add(s->qiov, buf, len);
> +}
> +
> +static void ram_saveio_put_buffer_direct(RAMSaveIO *s, uint8_t *buf, size_t len)
> +{
> +    qemu_put_buffer(s->f, buf, len);
> +}
> +
> +static void ram_saveio_put_byte_vector(RAMSaveIO *s, int v)
> +{
> +    uint8_t *to_save;
> +
> +    if (s->nalloc - s->nused<  sizeof(int))
> +        ram_saveio_flush(s, 1);
> +
> +    to_save =&s->ram_store[s->nused];
> +    to_save[0] = v&  0xff;
> +    s->nused++;
> +
> +    qemu_iovec_add(s->qiov, to_save, 1);
> +}
> +
> +static void ram_saveio_put_byte_direct(RAMSaveIO *s, int v)
> +{
> +    qemu_put_byte(s->f, v);
> +}
> +
> +static void ram_saveio_put_be64_vector(RAMSaveIO *s, uint64_t v)
> +{
> +    uint8_t *to_save;
> +
> +    if (s->nalloc - s->nused<  sizeof(uint64_t))
> +        ram_saveio_flush(s, 1);
> +
> +    to_save =&s->ram_store[s->nused];
> +    to_save[0] = (v>>  56)&  0xff;
> +    to_save[1] = (v>>  48)&  0xff;
> +    to_save[2] = (v>>  40)&  0xff;
> +    to_save[3] = (v>>  32)&  0xff;
> +    to_save[4] = (v>>  24)&  0xff;
> +    to_save[5] = (v>>  16)&  0xff;
> +    to_save[6] = (v>>   8)&  0xff;
> +    to_save[7] = (v>>   0)&  0xff;
> +    s->nused += sizeof(uint64_t);
> +
> +    qemu_iovec_add(s->qiov, to_save, sizeof(uint64_t));
> +}
> +
> +static void ram_saveio_put_be64_direct(RAMSaveIO *s, uint64_t v)
> +{
> +
> +    qemu_put_be64(s->f, v);
> +}
> +
> +static RAMSaveIO *ram_saveio_new(QEMUFile *f, size_t max_store)
> +{
> +    RAMSaveIO *s;
> +
> +    s = qemu_mallocz(sizeof(*s));
> +
> +    if (qemu_file_get_rate_limit(f) == 0) {/* non buffer mode */
> +        /* When QEMUFile don't have get_rate limit,
> +         * qemu_file_get_rate_limit will return 0.
> +         * However, we believe that all kinds of QEMUFile
> +         * except non-block mode has rate limit function.
> +         */
> +        s->io_mode = 1;
> +        s->ram_store = qemu_mallocz(max_store);
> +        s->nalloc = max_store;
> +        s->nused = 0;
> +
> +        s->qiov = qemu_mallocz(sizeof(*s->qiov));
> +        qemu_iovec_init(s->qiov, max_store);
> +
> +        s->put_buffer = ram_saveio_put_buffer_vector;
> +        s->put_byte = ram_saveio_put_byte_vector;
> +        s->put_be64 = ram_saveio_put_be64_vector;
> +
> +        qemu_put_vector_prepare(f);
> +    } else {
> +        s->io_mode = 0;
> +        s->put_buffer = ram_saveio_put_buffer_direct;
> +        s->put_byte = ram_saveio_put_byte_direct;
> +        s->put_be64 = ram_saveio_put_be64_direct;
> +    }
> +
> +    s->f = f;
> +
> +    return s;
> +}
> +
> +static void ram_saveio_destroy(RAMSaveIO *s)
> +{
> +    if (s->qiov != NULL) { /* means using put_vector */
> +        ram_saveio_flush(s, 0);
> +        qemu_iovec_destroy(s->qiov);
> +        qemu_free(s->qiov);
> +        qemu_free(s->ram_store);
> +    }
> +    qemu_free(s);
> +}
> +
> +/*
> + * RAMSaveIO will manage I/O.
> + */
> +static int ram_save_block(RAMSaveIO *s)
>   {
>       static ram_addr_t current_addr = 0;
>       ram_addr_t saved_addr = current_addr;
>       ram_addr_t addr = 0;
> -    int found = 0;
> +    ram_addr_t dirty_rams[HOST_LONG_BITS];
> +    int i, found = 0;
>
>       while (addr<  last_ram_offset) {
>           if (kvm_enabled()&&  current_addr == 0) {
> @@ -2787,32 +2942,38 @@ static int ram_save_block(QEMUFile *f)
>               r = kvm_update_dirty_pages_log();
>               if (r) {
>                   fprintf(stderr, "%s: update dirty pages log failed %d\n", __FUNCTION__, r);
> -                qemu_file_set_error(f);
> +                ram_saveio_set_error(s);
>                   return 0;
>               }
>           }
> -        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
> +        if ((found = cpu_physical_memory_get_dirty_range(
> +                 current_addr, last_ram_offset, dirty_rams, HOST_LONG_BITS,
> +                 MIGRATION_DIRTY_FLAG))) {
>               uint8_t *p;
>
> -            cpu_physical_memory_reset_dirty(current_addr,
> -                                            current_addr + TARGET_PAGE_SIZE,
> -                                            MIGRATION_DIRTY_FLAG);
> +            for (i = 0; i<  found; i++) {
> +                ram_addr_t page_addr = dirty_rams[i];
> +                cpu_physical_memory_reset_dirty(page_addr,
> +                                                page_addr + TARGET_PAGE_SIZE,
> +                                                MIGRATION_DIRTY_FLAG);
>
> -            p = qemu_get_ram_ptr(current_addr);
> +                p = qemu_get_ram_ptr(page_addr);
>
> -            if (is_dup_page(p, *p)) {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
> -                qemu_put_byte(f, *p);
> -            } else {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
> -                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> +                if (is_dup_page(p, *p)) {
> +                    ram_saveio_put_be64(s,
> +                                        (page_addr) | RAM_SAVE_FLAG_COMPRESS);
> +                    ram_saveio_put_byte(s, *p);
> +                } else {
> +                    ram_saveio_put_be64(s, (page_addr) | RAM_SAVE_FLAG_PAGE);
> +                    ram_saveio_put_buffer(s, p, TARGET_PAGE_SIZE);
> +                }
>               }
>
> -            found = 1;
>               break;
> +        } else {
> +            addr += dirty_rams[0];
> +            current_addr = (saved_addr + addr) % last_ram_offset;
>           }
> -        addr += TARGET_PAGE_SIZE;
> -        current_addr = (saved_addr + addr) % last_ram_offset;
>       }
>
>       return found;
> @@ -2822,12 +2983,19 @@ static uint64_t bytes_transferred;
>
>   static ram_addr_t ram_save_remaining(void)
>   {
> -    ram_addr_t addr;
> +    ram_addr_t addr = 0;
>       ram_addr_t count = 0;
> +    ram_addr_t dirty_rams[HOST_LONG_BITS];
> +    int found = 0;
>
> -    for (addr = 0; addr<  last_ram_offset; addr += TARGET_PAGE_SIZE) {
> -        if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
> -            count++;
> +    while (addr<  last_ram_offset) {
> +        if ((found = cpu_physical_memory_get_dirty_range(addr, last_ram_offset,
> +            dirty_rams, HOST_LONG_BITS, MIGRATION_DIRTY_FLAG))) {
> +            count += found;
> +            addr = dirty_rams[found - 1] + TARGET_PAGE_SIZE;
> +        } else {
> +            addr += dirty_rams[0];
> +        }
>       }
>
>       return count;
> @@ -2854,6 +3022,7 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       uint64_t bytes_transferred_last;
>       double bwidth = 0;
>       uint64_t expected_time = 0;
> +    RAMSaveIO *s;
>
>       if (stage<  0) {
>           cpu_physical_memory_set_dirty_tracking(0);
> @@ -2883,10 +3052,12 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       bytes_transferred_last = bytes_transferred;
>       bwidth = qemu_get_clock_ns(rt_clock);
>
> -    while (!qemu_file_rate_limit(f)) {
> +    s = ram_saveio_new(f, IOV_MAX);
> +
> +     while (!qemu_file_rate_limit(f)) {
>           int ret;
>
> -        ret = ram_save_block(f);
> +        ret = ram_save_block(s);
>           bytes_transferred += ret * TARGET_PAGE_SIZE;
>           if (ret == 0) /* no more blocks */
>               break;
> @@ -2903,12 +3074,14 @@ static int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       /* try transferring iterative blocks of memory */
>       if (stage == 3) {
>           /* flush all remaining blocks regardless of rate limiting */
> -        while (ram_save_block(f) != 0) {
> +        while (ram_save_block(s) != 0) {
>               bytes_transferred += TARGET_PAGE_SIZE;
>           }
>           cpu_physical_memory_set_dirty_tracking(0);
>       }
>
> +    ram_saveio_destroy(s);
> +
>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
>       expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>    


  reply	other threads:[~2010-04-22 19:31 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-21  5:57 [RFC PATCH 00/20] Kemari for KVM v0.1 Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 01/20] Modify DIRTY_FLAG value and introduce DIRTY_IDX to use as indexes of bit-based phys_ram_dirty Yoshiaki Tamura
2010-04-22 19:26   ` Anthony Liguori
2010-04-23  2:09     ` Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 02/20] Introduce cpu_physical_memory_get_dirty_range() Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 03/20] Use cpu_physical_memory_set_dirty_range() to update phys_ram_dirty Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 04/20] Make QEMUFile buf expandable, and introduce qemu_realloc_buffer() and qemu_clear_buffer() Yoshiaki Tamura
2010-04-21  8:03   ` Stefan Hajnoczi
2010-04-21  8:27     ` Yoshiaki Tamura
2010-04-23  9:53   ` Avi Kivity
2010-04-23  9:59     ` Yoshiaki Tamura
2010-04-23 13:14       ` Avi Kivity
2010-04-26 10:43         ` Yoshiaki Tamura
2010-04-23 13:26     ` Anthony Liguori
2010-04-21  5:57 ` [RFC PATCH 05/20] Introduce put_vector() and get_vector to QEMUFile and qemu_fopen_ops() Yoshiaki Tamura
2010-04-22 19:28   ` Anthony Liguori
2010-04-23  3:37     ` Yoshiaki Tamura
2010-04-23 13:22       ` Anthony Liguori
2010-04-23 13:48         ` Avi Kivity
2010-05-03  9:32           ` Yoshiaki Tamura
2010-05-03 12:05             ` Anthony Liguori
2010-05-03 15:36               ` Yoshiaki Tamura
2010-05-03 16:07                 ` Anthony Liguori
2010-04-26 10:43         ` Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 06/20] Introduce iovec util functions, qemu_iovec_to_vector() and qemu_iovec_to_size() Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 07/20] Introduce qemu_put_vector() and qemu_put_vector_prepare() to use put_vector() in QEMUFile Yoshiaki Tamura
2010-04-22 19:29   ` Anthony Liguori
2010-04-23  4:02     ` Yoshiaki Tamura
2010-04-23 13:23       ` Anthony Liguori
2010-04-26 10:43         ` Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 08/20] Introduce RAMSaveIO and use cpu_physical_memory_get_dirty_range() to check multiple dirty pages Yoshiaki Tamura
2010-04-22 19:31   ` Anthony Liguori [this message]
2010-04-21  5:57 ` [RFC PATCH 09/20] Introduce writev and read to FdMigrationState Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 10/20] Introduce skip_header parameter to qemu_loadvm_state() so that it can be called iteratively without reading the header Yoshiaki Tamura
2010-04-22 19:34   ` Anthony Liguori
2010-04-23  4:25     ` Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 11/20] Introduce some socket util functions Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 12/20] Introduce fault tolerant VM transaction QEMUFile and ft_mode Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 13/20] Introduce util functions to control ft_transaction from savevm layer Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 14/20] Upgrade QEMU_FILE_VERSION from 3 to 4, and introduce qemu_savevm_state_all() Yoshiaki Tamura
2010-04-22 19:37   ` Anthony Liguori
2010-04-23  3:29     ` Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 15/20] Introduce FT mode support to configure Yoshiaki Tamura
2010-04-22 19:38   ` Anthony Liguori
2010-04-23  3:09     ` Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 16/20] Introduce event_tap fucntions and ft_tranx_ready() Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 17/20] Modify migrate_fd_put_ready() when ft_mode is on Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 18/20] Modify tcp_accept_incoming_migration() to handle ft_mode, and add a hack not to close fd when ft_mode is enabled Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 19/20] Insert do_event_tap() to virtio-{blk,net}, comment out assert() on cpu_single_env temporally Yoshiaki Tamura
2010-04-22 19:39   ` [RFC PATCH 19/20] Insert do_event_tap() to virtio-{blk, net}, " Anthony Liguori
2010-04-23  4:51     ` Yoshiaki Tamura
2010-04-21  5:57 ` [RFC PATCH 20/20] Introduce -k option to enable FT migration mode (Kemari) Yoshiaki Tamura
2010-04-22  8:58 ` [Qemu-devel] [RFC PATCH 00/20] Kemari for KVM v0.1 Dor Laor
2010-04-22 10:35   ` Yoshiaki Tamura
2010-04-22 11:36     ` Takuya Yoshikawa
2010-04-22 12:35       ` Yoshiaki Tamura
2010-04-22 12:19     ` Dor Laor
2010-04-22 13:16       ` Yoshiaki Tamura
2010-04-22 20:33         ` Anthony Liguori
2010-04-23  1:53           ` Yoshiaki Tamura
2010-04-23 13:20             ` Anthony Liguori
2010-04-26 10:44               ` Yoshiaki Tamura
2010-04-22 20:38         ` Dor Laor
2010-04-23  5:17           ` Yoshiaki Tamura
2010-04-23  7:36             ` Fernando Luis Vázquez Cao
2010-04-25 21:52               ` Dor Laor
2010-04-22 16:15     ` Jamie Lokier
2010-04-23  0:20       ` Yoshiaki Tamura
2010-04-23 15:07         ` Jamie Lokier
2010-04-22 19:42 ` Anthony Liguori
2010-04-23  0:45   ` Yoshiaki Tamura
2010-04-23 13:10     ` Anthony Liguori
2010-04-23 13:24 ` Avi Kivity
2010-04-26 10:44   ` Yoshiaki Tamura

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BD0A3F4.5050708@linux.vnet.ibm.com \
    --to=aliguori@linux.vnet.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=ohmura.kei@lab.ntt.co.jp \
    --cc=qemu-devel@nongnu.org \
    --cc=tamura.yoshiaki@lab.ntt.co.jp \
    --cc=yoshikawa.takuya@oss.ntt.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox