All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: Fam Zheng <famz@redhat.com>
Cc: kwolf@redhat.com, qemu-devel@nongnu.org, stefanha@redhat.com
Subject: Re: [Qemu-devel] [PATCH 3/3] block: per caller dirty bitmap
Date: Wed, 30 Oct 2013 08:49:36 +0100	[thread overview]
Message-ID: <5270BA10.2090904@redhat.com> (raw)
In-Reply-To: <1383116892-11047-4-git-send-email-famz@redhat.com>

Il 30/10/2013 08:08, Fam Zheng ha scritto:
> Previously a BlockDriverState has only one dirty bitmap, so only one
> caller (e.g. a block job) can keep track of writing. This changes the
> dirty bitmap to a list and creates one HBitmap for each caller, the
> lifecycle is managed with these new functions:
> 
>     bdrv_create_dirty_bitmap
>     bdrv_release_dirty_bitmap
> 
> In place of this one:
> 
>     bdrv_set_dirty_tracking
> 
> An HBitmap pointer argument is added to these functions, since each
> caller has its own dirty bitmap:
> 
>     bdrv_get_dirty
>     bdrv_dirty_iter_init
>     bdrv_get_dirty_count
> 
> While bdrv_set_dirty and bdrv_reset_dirty prototypes unchanged but
> internally walk the list of all dirty bitmaps and set them one by one.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block-migration.c         | 22 ++++++++++----
>  block.c                   | 74 ++++++++++++++++++++++++++---------------------
>  block/mirror.c            | 23 ++++++++-------
>  block/qapi.c              |  8 -----
>  include/block/block.h     | 11 ++++---
>  include/block/block_int.h |  2 +-
>  6 files changed, 78 insertions(+), 62 deletions(-)
> 
> diff --git a/block-migration.c b/block-migration.c
> index daf9ec1..08df056 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -58,6 +58,7 @@ typedef struct BlkMigDevState {
>      /* Protected by block migration lock.  */
>      unsigned long *aio_bitmap;
>      int64_t completed_sectors;
> +    HBitmap *dirty_bitmap;
>  } BlkMigDevState;
>  
>  typedef struct BlkMigBlock {
> @@ -309,12 +310,21 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>  
>  /* Called with iothread lock taken.  */
>  
> -static void set_dirty_tracking(int enable)
> +static void set_dirty_tracking(void)
>  {
>      BlkMigDevState *bmds;
>  
>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> -        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
> +        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
> +    }
> +}
> +
> +static void unset_dirty_tracking(void)
> +{
> +    BlkMigDevState *bmds;
> +
> +    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> +        bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
>      }
>  }
>  
> @@ -432,7 +442,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
>          } else {
>              blk_mig_unlock();
>          }
> -        if (bdrv_get_dirty(bmds->bs, sector)) {
> +        if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
>  
>              if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
>                  nr_sectors = total_sectors - sector;
> @@ -554,7 +564,7 @@ static int64_t get_remaining_dirty(void)
>      int64_t dirty = 0;
>  
>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> -        dirty += bdrv_get_dirty_count(bmds->bs);
> +        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
>      }
>  
>      return dirty << BDRV_SECTOR_BITS;
> @@ -569,7 +579,7 @@ static void blk_mig_cleanup(void)
>  
>      bdrv_drain_all();
>  
> -    set_dirty_tracking(0);
> +    unset_dirty_tracking();
>  
>      blk_mig_lock();
>      while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
> @@ -604,7 +614,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
>      init_blk_migration(f);
>  
>      /* start track dirty blocks */
> -    set_dirty_tracking(1);
> +    set_dirty_tracking();
>      qemu_mutex_unlock_iothread();
>  
>      ret = flush_blks(f);
> diff --git a/block.c b/block.c
> index fd05a80..9975428 100644
> --- a/block.c
> +++ b/block.c
> @@ -323,6 +323,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>      BlockDriverState *bs;
>  
>      bs = g_malloc0(sizeof(BlockDriverState));
> +    QLIST_INIT(&bs->dirty_bitmaps);
>      pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
>      if (device_name[0] != '\0') {
>          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
> @@ -1614,7 +1615,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
>      bs_dest->iostatus           = bs_src->iostatus;
>  
>      /* dirty bitmap */
> -    bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
> +    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
>  
>      /* reference count */
>      bs_dest->refcnt             = bs_src->refcnt;
> @@ -1647,7 +1648,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
>  
>      /* bs_new must be anonymous and shouldn't have anything fancy enabled */
>      assert(bs_new->device_name[0] == '\0');
> -    assert(bs_new->dirty_bitmap == NULL);
> +    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
>      assert(bs_new->job == NULL);
>      assert(bs_new->dev == NULL);
>      assert(bs_new->in_use == 0);
> @@ -1708,6 +1709,7 @@ static void bdrv_delete(BlockDriverState *bs)
>      assert(!bs->job);
>      assert(!bs->in_use);
>      assert(!bs->refcnt);
> +    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
>  
>      bdrv_close(bs);
>  
> @@ -2784,9 +2786,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
>          ret = bdrv_co_flush(bs);
>      }
>  
> -    if (bs->dirty_bitmap) {
>          bdrv_set_dirty(bs, sector_num, nb_sectors);
> -    }
>  
>      if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
>          bs->wr_highest_sector = sector_num + nb_sectors - 1;
> @@ -3321,7 +3321,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
>      if (bdrv_check_request(bs, sector_num, nb_sectors))
>          return -EIO;
>  
> -    assert(!bs->dirty_bitmap);
> +    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
>  
>      return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
>  }
> @@ -4181,9 +4181,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
>          return -EROFS;
>      }
>  
> -    if (bs->dirty_bitmap) {
> -        bdrv_reset_dirty(bs, sector_num, nb_sectors);
> -    }
> +    bdrv_reset_dirty(bs, sector_num, nb_sectors);
>  
>      /* Do nothing if disabled.  */
>      if (!(bs->open_flags & BDRV_O_UNMAP)) {
> @@ -4345,58 +4343,68 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
>      return true;
>  }
>  
> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
> +HBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
>  {
>      int64_t bitmap_size;
> +    HBitmap *bitmap;
>  
>      assert((granularity & (granularity - 1)) == 0);
>  
> -    if (granularity) {
> -        granularity >>= BDRV_SECTOR_BITS;
> -        assert(!bs->dirty_bitmap);
> -        bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
> -        bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
> -    } else {
> -        if (bs->dirty_bitmap) {
> -            hbitmap_free(bs->dirty_bitmap);
> -            bs->dirty_bitmap = NULL;
> +    granularity >>= BDRV_SECTOR_BITS;
> +    assert(granularity);
> +    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
> +    bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
> +    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
> +    return bitmap;
> +}
> +
> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, HBitmap *bitmap)
> +{
> +    HBitmap *bm, *next;
> +    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
> +        if (bm == bitmap) {
> +            QLIST_REMOVE(bitmap, list);
> +            hbitmap_free(bitmap);
> +            return;
>          }
>      }
>  }
>  
> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
> +int bdrv_get_dirty(BlockDriverState *bs, HBitmap *bitmap, int64_t sector)
>  {
> -    if (bs->dirty_bitmap) {
> -        return hbitmap_get(bs->dirty_bitmap, sector);
> +    if (bitmap) {
> +        return hbitmap_get(bitmap, sector);
>      } else {
>          return 0;
>      }
>  }
>  
> -void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
> +void bdrv_dirty_iter_init(BlockDriverState *bs,
> +                          HBitmap *bitmap, HBitmapIter *hbi)
>  {
> -    hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
> +    hbitmap_iter_init(hbi, bitmap, 0);
>  }
>  
>  void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
>                      int nr_sectors)
>  {
> -    hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
> +    HBitmap *bitmap;
> +    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
> +        hbitmap_set(bitmap, cur_sector, nr_sectors);
> +    }
>  }
>  
> -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
> -                      int nr_sectors)
> +void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
>  {
> -    hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
> +    HBitmap *bitmap;
> +    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
> +        hbitmap_reset(bitmap, cur_sector, nr_sectors);
> +    }
>  }

I think callers outside block.c should only call
hbitmap_set/hbitmap_reset; resetting is typically done before processing
sectors and setting after an error (both of which happen privately to
each task).

Thus you probably should add a fourth patch which makes
bdrv_(re)set_dirty static and remove
bdrv_get_dirty/bdrv_dirty_iter_init/bdrv_get_dirty_count.

Otherwise, this is a very nice patch.  Great work!

Paolo

> -int64_t bdrv_get_dirty_count(BlockDriverState *bs)
> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, HBitmap *bitmap)
>  {
> -    if (bs->dirty_bitmap) {
> -        return hbitmap_count(bs->dirty_bitmap);
> -    } else {
> -        return 0;
> -    }
> +    return hbitmap_count(bitmap);
>  }
>  
>  /* Get a reference to bs */
> diff --git a/block/mirror.c b/block/mirror.c
> index 7b95acf..65e274b 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -39,6 +39,7 @@ typedef struct MirrorBlockJob {
>      int64_t granularity;
>      size_t buf_size;
>      unsigned long *cow_bitmap;
> +    HBitmap *dirty_bitmap;
>      HBitmapIter hbi;
>      uint8_t *buf;
>      QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
> @@ -145,9 +146,10 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
>  
>      s->sector_num = hbitmap_iter_next(&s->hbi);
>      if (s->sector_num < 0) {
> -        bdrv_dirty_iter_init(source, &s->hbi);
> +        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
>          s->sector_num = hbitmap_iter_next(&s->hbi);
> -        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
> +        trace_mirror_restart_iter(s,
> +                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
>          assert(s->sector_num >= 0);
>      }
>  
> @@ -183,7 +185,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
>      do {
>          int added_sectors, added_chunks;
>  
> -        if (!bdrv_get_dirty(source, next_sector) ||
> +        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
>              test_bit(next_chunk, s->in_flight_bitmap)) {
>              assert(nb_sectors > 0);
>              break;
> @@ -249,7 +251,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
>          /* Advance the HBitmapIter in parallel, so that we do not examine
>           * the same sector twice.
>           */
> -        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
> +        if (next_sector > hbitmap_next_sector
> +            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
>              hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
>          }
>  
> @@ -355,7 +358,7 @@ static void coroutine_fn mirror_run(void *opaque)
>          }
>      }
>  
> -    bdrv_dirty_iter_init(bs, &s->hbi);
> +    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
>      last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>      for (;;) {
>          uint64_t delay_ns;
> @@ -367,7 +370,7 @@ static void coroutine_fn mirror_run(void *opaque)
>              goto immediate_exit;
>          }
>  
> -        cnt = bdrv_get_dirty_count(bs);
> +        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
>  
>          /* Note that even when no rate limit is applied we need to yield
>           * periodically with no pending I/O so that qemu_aio_flush() returns.
> @@ -409,7 +412,7 @@ static void coroutine_fn mirror_run(void *opaque)
>  
>                  should_complete = s->should_complete ||
>                      block_job_is_cancelled(&s->common);
> -                cnt = bdrv_get_dirty_count(bs);
> +                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
>              }
>          }
>  
> @@ -424,7 +427,7 @@ static void coroutine_fn mirror_run(void *opaque)
>               */
>              trace_mirror_before_drain(s, cnt);
>              bdrv_drain_all();
> -            cnt = bdrv_get_dirty_count(bs);
> +            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
>          }
>  
>          ret = 0;
> @@ -471,7 +474,7 @@ immediate_exit:
>      qemu_vfree(s->buf);
>      g_free(s->cow_bitmap);
>      g_free(s->in_flight_bitmap);
> -    bdrv_set_dirty_tracking(bs, 0);
> +    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
>      bdrv_iostatus_disable(s->target);
>      if (s->should_complete && ret == 0) {
>          if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> @@ -575,7 +578,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>      s->granularity = granularity;
>      s->buf_size = MAX(buf_size, granularity);
>  
> -    bdrv_set_dirty_tracking(bs, granularity);
> +    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
>      bdrv_set_enable_write_cache(s->target, true);
>      bdrv_set_on_error(s->target, on_target_error, on_target_error);
>      bdrv_iostatus_enable(s->target);
> diff --git a/block/qapi.c b/block/qapi.c
> index 5880b3e..6b0cdcf 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -204,14 +204,6 @@ void bdrv_query_info(BlockDriverState *bs,
>          info->io_status = bs->iostatus;
>      }
>  
> -    if (bs->dirty_bitmap) {
> -        info->has_dirty = true;
> -        info->dirty = g_malloc0(sizeof(*info->dirty));
> -        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
> -        info->dirty->granularity =
> -         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
> -    }
> -
>      if (bs->drv) {
>          info->has_inserted = true;
>          info->inserted = g_malloc0(sizeof(*info->inserted));
> diff --git a/include/block/block.h b/include/block/block.h
> index 3560deb..bfc494a 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -388,12 +388,15 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size);
>  bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
>  
>  struct HBitmapIter;
> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
> +typedef struct HBitmap HBitmap;
> +HBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity);
> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, HBitmap *bitmap);
> +int bdrv_get_dirty(BlockDriverState *bs, HBitmap *bitmap, int64_t sector);
>  void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
>  void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
> -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
> -int64_t bdrv_get_dirty_count(BlockDriverState *bs);
> +void bdrv_dirty_iter_init(BlockDriverState *bs,
> +                          HBitmap *bitmap, struct HBitmapIter *hbi);
> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, HBitmap *bitmap);
>  
>  void bdrv_enable_copy_on_read(BlockDriverState *bs);
>  void bdrv_disable_copy_on_read(BlockDriverState *bs);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index a48731d..1dc5bb7 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -298,7 +298,7 @@ struct BlockDriverState {
>      bool iostatus_enabled;
>      BlockDeviceIoStatus iostatus;
>      char device_name[32];
> -    HBitmap *dirty_bitmap;
> +    QLIST_HEAD(, HBitmap) dirty_bitmaps;
>      int refcnt;
>      int in_use; /* users other than guest access, eg. block migration */
>      QTAILQ_ENTRY(BlockDriverState) list;
> 

  parent reply	other threads:[~2013-10-30  7:49 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-30  7:08 [Qemu-devel] [PATCH 0/3] block: per caller dirty bitmap Fam Zheng
2013-10-30  7:08 ` [Qemu-devel] [PATCH 1/3] HBitmap: move struct HBitmap to header Fam Zheng
2013-10-30  7:08 ` [Qemu-devel] [PATCH 2/3] HBitmap: add QLIST_ENTRY to HBitmap Fam Zheng
2013-10-30  7:49   ` Paolo Bonzini
2013-10-30  7:08 ` [Qemu-devel] [PATCH 3/3] block: per caller dirty bitmap Fam Zheng
2013-10-30  7:26   ` Fam Zheng
2013-10-30  7:49   ` Paolo Bonzini [this message]
2013-11-04  6:59     ` Fam Zheng
2013-11-04 10:34       ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5270BA10.2090904@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=famz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.