All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kiszka <jan.kiszka@siemens.com>
To: Liran Schour <lirans@il.ibm.com>
Cc: qemu-devel@nongnu.org
Subject: [Qemu-devel] Re: [PATCH 2/4] Tranfer dirty blocks during iterative phase
Date: Tue, 12 Jan 2010 12:50:46 +0100	[thread overview]
Message-ID: <4B4C6216.2090606@siemens.com> (raw)
In-Reply-To: <12632848343008-git-send-email-lirans@il.ibm.com>

Liran Schour wrote:
> Start transfer dirty blocks during the iterative stage. That will
> reduce the time that the guest will be suspended
> 
> Signed-off-by: Liran Schour <lirans@il.ibm.com>
> ---
>  block-migration.c |  158 +++++++++++++++++++++++++++++++++++++++--------------
>  1 files changed, 116 insertions(+), 42 deletions(-)
> 
> diff --git a/block-migration.c b/block-migration.c
> index 6957909..90c84b1 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -29,6 +29,7 @@
>  #define MAX_BLOCKS_READ 10000
>  #define BLOCKS_READ_CHANGE 100
>  #define INITIAL_BLOCKS_READ 100
> +#define MAX_DIRTY_ITERATIONS 100
>  
>  //#define DEBUG_BLK_MIGRATION
>  
> @@ -45,6 +46,7 @@ typedef struct BlkMigDevState {
>      int bulk_completed;
>      int shared_base;
>      int64_t cur_sector;
> +    int64_t cur_dirty;
>      int64_t completed_sectors;
>      int64_t total_sectors;
>      int64_t dirty;
> @@ -73,6 +75,7 @@ typedef struct BlkMigState {
>      int64_t total_sector_sum;
>      int prev_progress;                                     
>      int bulk_completed;
> +    int dirty_iterations;
>  } BlkMigState;
>  
>  static BlkMigState block_mig_state;
> @@ -221,6 +224,7 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f)
>      block_mig_state.total_sector_sum = 0;
>      block_mig_state.prev_progress = -1;
>      block_mig_state.bulk_completed = 0;
> +    block_mig_state.dirty_iterations = 0;
>  
>      for (bs = bdrv_first; bs != NULL; bs = bs->next) {
>          if (bs->type == BDRV_TYPE_HD) {
> @@ -285,39 +289,88 @@ static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
>      return ret;
>  }
>  
> -#define MAX_NUM_BLOCKS 4
> -
> -static void blk_mig_save_dirty_blocks(Monitor *mon, QEMUFile *f)
> +static void blk_mig_reset_dirty_curser(void)

My translation help makes me believe that 'curser' is not what you
intended call this thing. :)

>  {
>      BlkMigDevState *bmds;
> -    BlkMigBlock blk;
> -    int64_t sector;
> +    
> +    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> +        bmds->cur_dirty = 0;
> +    }
> +}
>  
> -    blk.buf = qemu_malloc(BLOCK_SIZE);
> +static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, 
> +                                 BlkMigDevState *bmds, int is_async)
> +{   
> +    BlkMigBlock *blk;
> +    int64_t total_sectors = bmds->total_sectors;
> +    int64_t sector;
> +    int nr_sectors;
>  
> -    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> -        for (sector = 0; sector < bmds->cur_sector;) {
> -            if (bdrv_get_dirty(bmds->bs, sector)) {
> -                if (bdrv_read(bmds->bs, sector, blk.buf,
> -                              BDRV_SECTORS_PER_DIRTY_CHUNK) < 0) {
> -                    monitor_printf(mon, "Error reading sector %" PRId64 "\n",
> -                                   sector);
> -                    qemu_file_set_error(f);
> -                    qemu_free(blk.buf);
> -                    return;
> +    for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
> +        if (bdrv_get_dirty(bmds->bs, sector)) {
> +            
> +            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
> +                nr_sectors = total_sectors - sector;
> +            } else {
> +                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
> +            }
> +            blk = qemu_malloc(sizeof(BlkMigBlock));
> +            blk->buf = qemu_malloc(BLOCK_SIZE);
> +            blk->bmds = bmds;
> +            blk->sector = sector;
> +            
> +            if(is_async) {
> +                blk->iov.iov_base = blk->buf;
> +                blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
> +                qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
> +                
> +                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
> +                                            nr_sectors, blk_mig_read_cb, blk);
> +                if (!blk->aiocb) {
> +                    goto error;
>                  }
> -                blk.bmds = bmds;
> -                blk.sector = sector;
> -                blk_send(f, &blk);
> -
> -                bdrv_reset_dirty(bmds->bs, sector,
> -                                 BDRV_SECTORS_PER_DIRTY_CHUNK);
> +                block_mig_state.submitted++;
> +            } else {
> +                if (bdrv_read(bmds->bs, sector, blk->buf,
> +                              nr_sectors) < 0) {
> +                    goto error;
> +                }
> +                blk_send(f, blk);
> +            
> +                qemu_free(blk->buf);
> +                qemu_free(blk);
>              }
> -            sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
> +                
> +            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
> +            break;
>          }
> +        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
> +        bmds->cur_dirty = sector;
>      }
> +    
> +    return (bmds->cur_dirty >= bmds->total_sectors);
> +
> +    error:

Please do not indent the label.

> +    monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
> +    qemu_file_set_error(f);
> +    qemu_free(blk->buf);
> +    qemu_free(blk);
> +    return 0;
> +}
> +
> +static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
> +{
> +    BlkMigDevState *bmds;
> +    int ret = 0;
>  
> -    qemu_free(blk.buf);
> +    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> +        if(mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
> +            ret = 1;
> +            break;
> +        } 
> +    }
> +    
> +    return ret;
>  }
>  
>  static void flush_blks(QEMUFile* f)
> @@ -386,6 +439,8 @@ static void blk_mig_cleanup(Monitor *mon)
>  
>  static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>  {
> +    int dirty_iteration = 0;
> +    
>      dprintf("Enter save live stage %d submitted %d transferred %d\n",
>              stage, block_mig_state.submitted, block_mig_state.transferred);
>  
> @@ -413,29 +468,48 @@ static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>          blk_mig_cleanup(mon);
>          return 0;
>      }
> -
> -    /* control the rate of transfer */
> -    while ((block_mig_state.submitted +
> -            block_mig_state.read_done) * BLOCK_SIZE <
> -           qemu_file_get_rate_limit(f)) {
> -        if (blk_mig_save_bulked_block(mon, f) == 0) {
> -            /* finish saving bulk on all devices */
> -            block_mig_state.bulk_completed = 1;
> -            break;
> +    
> +    blk_mig_reset_dirty_curser();
> +    
> +    if(stage == 2) {
> +        /* control the rate of transfer */
> +        while ((block_mig_state.submitted +
> +                block_mig_state.read_done) * BLOCK_SIZE <
> +               qemu_file_get_rate_limit(f)) {
> +            if (block_mig_state.bulk_completed == 0) {
> +                /* first finish the bulk phase */
> +                if (blk_mig_save_bulked_block(mon, f) == 0) {
> +                    /* finish saving bulk on all devices */
> +                    block_mig_state.bulk_completed = 1;
> +                }
> +            } else if (block_mig_state.dirty_iterations < MAX_DIRTY_ITERATIONS) {
> +                if (dirty_iteration == 0) {
> +                    /* increment dirty iteration only once per round */
> +                    dirty_iteration = 1;
> +                    block_mig_state.dirty_iterations++;
> +                }
> +                if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
> +                    /* no more dirty blocks */
> +                    break;
> +                }
> +            } else {
> +                /* if we got here stop the loop */
> +                break;
> +            }

I did not yet get the purpose and effect of the iteration limitation.
Can you help me?

> +        }
> +        
> +        flush_blks(f);
> +        
> +        if (qemu_file_has_error(f)) {
> +            blk_mig_cleanup(mon);
> +            return 0;
>          }
>      }
> -
> -    flush_blks(f);
> -
> -    if (qemu_file_has_error(f)) {
> -        blk_mig_cleanup(mon);
> -        return 0;
> -    }
> -
> +    
>      if (stage == 3) {
>          /* we now for sure that save bulk is completed */
>  
> -        blk_mig_save_dirty_blocks(mon, f);
> +        while(blk_mig_save_dirty_block(mon, f, 0) != 0);

while (...

>          blk_mig_cleanup(mon);
>  
>          /* report completion */

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

  parent reply	other threads:[~2010-01-12 11:52 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-12  8:27 [Qemu-devel] [PATCH 0/4] Reduce down time during migration without shared storage Liran Schour
2010-01-12  8:27 ` [Qemu-devel] [PATCH 1/4] Remove unused code Liran Schour
2010-01-12  8:27   ` [Qemu-devel] [PATCH 2/4] Tranfer dirty blocks during iterative phase Liran Schour
2010-01-12  8:27     ` [Qemu-devel] [PATCH 3/4] Count dirty blocks and expose an API to get dirty count Liran Schour
2010-01-12  8:27       ` [Qemu-devel] [PATCH 4/4] Try not to exceed max downtime on stage3 Liran Schour
2010-01-12  9:52         ` Pierre Riteau
2010-01-12 11:56           ` Liran Schour
2010-01-12 11:51         ` [Qemu-devel] " Jan Kiszka
2010-01-12 15:07           ` Anthony Liguori
2010-01-12 15:07           ` Liran Schour
2010-01-12 11:50       ` [Qemu-devel] Re: [PATCH 3/4] Count dirty blocks and expose an API to get dirty count Jan Kiszka
2010-01-12 11:50     ` Jan Kiszka [this message]
2010-01-12 11:50   ` [Qemu-devel] Re: [PATCH 1/4] Remove unused code Jan Kiszka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B4C6216.2090606@siemens.com \
    --to=jan.kiszka@siemens.com \
    --cc=lirans@il.ibm.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.