qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v8 0/2] mirror: Improve zero write and discard
@ 2015-12-24  3:15 Fam Zheng
  2015-12-24  3:15 ` [Qemu-devel] [PATCH v8 1/2] mirror: Rewrite mirror_iteration Fam Zheng
  2015-12-24  3:15 ` [Qemu-devel] [PATCH v8 2/2] mirror: Add mirror_wait_for_io Fam Zheng
  0 siblings, 2 replies; 5+ messages in thread
From: Fam Zheng @ 2015-12-24  3:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-block, mreitz

v8: Rebase onto master (didn't pick up Max's rev-by due to non-trivial code
    change).

    The conflict is around removed lines about "max_iov" and "IOV_MAX" due to
    commit 3515727f3, but this also reveals this series forgot that check. So
    this revision adds it back:
    - Two new fields in MirrorBlockJob are added to cache size info:
      target_cluster_sectors and max_iov.
    - mirror_align_cow compares nb_chunks and max_iov, and cut the tail
      appropriately.
    - The orphan trace point is removed from trace-events.

Patch 1 rewrites mirror_iteration. Patch 2 is a small DRY cleaning up.

The main benefit is copying unallocated sectors (both zeroed and discarded)
doesn't go through the iov setup loop, as they don't need it.


Fam Zheng (2):
  mirror: Rewrite mirror_iteration
  mirror: Add mirror_wait_for_io

 block/mirror.c | 362 +++++++++++++++++++++++++++++++++++----------------------
 trace-events   |   1 -
 2 files changed, 222 insertions(+), 141 deletions(-)

-- 
2.4.3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v8 1/2] mirror: Rewrite mirror_iteration
  2015-12-24  3:15 [Qemu-devel] [PATCH v8 0/2] mirror: Improve zero write and discard Fam Zheng
@ 2015-12-24  3:15 ` Fam Zheng
  2016-01-04 19:27   ` Max Reitz
  2015-12-24  3:15 ` [Qemu-devel] [PATCH v8 2/2] mirror: Add mirror_wait_for_io Fam Zheng
  1 sibling, 1 reply; 5+ messages in thread
From: Fam Zheng @ 2015-12-24  3:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-block, mreitz

The "pnum < nb_sectors" condition in deciding whether to actually copy
data is unnecessarily strict, and the qiov initialization is
unnecessarily for bdrv_aio_write_zeroes and bdrv_aio_discard.

Rewrite mirror_iteration to fix both flaws.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/mirror.c | 344 +++++++++++++++++++++++++++++++++++----------------------
 trace-events   |   1 -
 2 files changed, 213 insertions(+), 132 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index f201f2b..0081c2e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -46,7 +46,6 @@ typedef struct MirrorBlockJob {
     BlockdevOnError on_source_error, on_target_error;
     bool synced;
     bool should_complete;
-    int64_t sector_num;
     int64_t granularity;
     size_t buf_size;
     int64_t bdev_length;
@@ -63,6 +62,8 @@ typedef struct MirrorBlockJob {
     int ret;
     bool unmap;
     bool waiting_for_io;
+    int target_cluster_sectors;
+    int max_iov;
 } MirrorBlockJob;
 
 typedef struct MirrorOp {
@@ -158,115 +159,90 @@ static void mirror_read_complete(void *opaque, int ret)
                     mirror_write_complete, op);
 }
 
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
+ * return the offset of the adjusted tail sector against original. */
+static int mirror_cow_align(MirrorBlockJob *s,
+                            int64_t *sector_num,
+                            int *nb_sectors)
+{
+    bool head_need_cow, tail_need_cow;
+    int diff = 0;
+    int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
+
+    head_need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
+    tail_need_cow = !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
+                              s->cow_bitmap);
+    if (head_need_cow || tail_need_cow) {
+        int64_t align_sector_num;
+        int align_nb_sectors;
+        bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+                               &align_sector_num, &align_nb_sectors);
+        if (tail_need_cow) {
+            diff = align_sector_num + align_nb_sectors
+                   - (*sector_num + *nb_sectors);
+            assert(diff >= 0);
+            *nb_sectors += diff;
+        }
+        if (head_need_cow) {
+            int d = *sector_num - align_sector_num;
+            assert(d >= 0);
+            *sector_num = align_sector_num;
+            *nb_sectors += d;
+        }
+    }
+
+    /* If the resulting chunks are more than max_iov, we have to shrink it
+     * under the alignment restriction. */
+    if (*nb_sectors / chunk_sectors > s->max_iov) {
+        int shrink = *nb_sectors / chunk_sectors - s->max_iov;
+        if (tail_need_cow) {
+            shrink -= shrink % s->target_cluster_sectors;
+        }
+        diff -= shrink;
+        *nb_sectors -= shrink;
+    }
+
+    assert(*nb_sectors > 0);
+    return diff;
+}
+
+/* Submit async read while handling COW.
+ * Returns: nb_sectors if no alignment is necessary, or
+ *          (new_end - sector_num) if tail is rounded up or down due to
+ *          alignment or buffer limit.
+ */
+static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
+                          int nb_sectors)
 {
     BlockDriverState *source = s->common.bs;
-    int nb_sectors, sectors_per_chunk, nb_chunks, max_iov;
-    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
-    uint64_t delay_ns = 0;
+    int sectors_per_chunk, nb_chunks;
+    int ret = nb_sectors;
     MirrorOp *op;
-    int pnum;
-    int64_t ret;
 
-    max_iov = MIN(source->bl.max_iov, s->target->bl.max_iov);
-
-    s->sector_num = hbitmap_iter_next(&s->hbi);
-    if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
-        s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
-        assert(s->sector_num >= 0);
-    }
-
-    hbitmap_next_sector = s->sector_num;
-    sector_num = s->sector_num;
     sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    end = s->bdev_length / BDRV_SECTOR_SIZE;
 
-    /* Extend the QEMUIOVector to include all adjacent blocks that will
-     * be copied in this operation.
-     *
-     * We have to do this if we have no backing file yet in the destination,
-     * and the cluster size is very large.  Then we need to do COW ourselves.
-     * The first time a cluster is copied, copy it entirely.  Note that,
-     * because both the granularity and the cluster size are powers of two,
-     * the number of sectors to copy cannot exceed one cluster.
-     *
-     * We also want to extend the QEMUIOVector to include more adjacent
-     * dirty blocks if possible, to limit the number of I/O operations and
-     * run efficiently even with a small granularity.
-     */
-    nb_chunks = 0;
-    nb_sectors = 0;
-    next_sector = sector_num;
-    next_chunk = sector_num / sectors_per_chunk;
+    /* We can only handle as much as buf_size at a time. */
+    nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+    assert(nb_sectors);
 
-    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
-    while (test_bit(next_chunk, s->in_flight_bitmap)) {
+    if (s->cow_bitmap) {
+        ret += mirror_cow_align(s, &sector_num, &nb_sectors);
+    }
+    assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
+    /* The sector range must meet granularity because:
+     * 1) Caller passes in aligned values;
+     * 2) mirror_cow_align is used only when target cluster is larger. */
+    assert(!(nb_sectors % sectors_per_chunk));
+    assert(!(sector_num % sectors_per_chunk));
+    nb_chunks = nb_sectors / sectors_per_chunk;
+
+    while (s->buf_free_count < nb_chunks) {
         trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
         s->waiting_for_io = true;
         qemu_coroutine_yield();
         s->waiting_for_io = false;
     }
 
-    do {
-        int added_sectors, added_chunks;
-
-        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
-            test_bit(next_chunk, s->in_flight_bitmap)) {
-            assert(nb_sectors > 0);
-            break;
-        }
-
-        added_sectors = sectors_per_chunk;
-        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
-            bdrv_round_to_clusters(s->target,
-                                   next_sector, added_sectors,
-                                   &next_sector, &added_sectors);
-
-            /* On the first iteration, the rounding may make us copy
-             * sectors before the first dirty one.
-             */
-            if (next_sector < sector_num) {
-                assert(nb_sectors == 0);
-                sector_num = next_sector;
-                next_chunk = next_sector / sectors_per_chunk;
-            }
-        }
-
-        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
-        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
-
-        /* When doing COW, it may happen that there is not enough space for
-         * a full cluster.  Wait if that is the case.
-         */
-        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
-            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
-            s->waiting_for_io = true;
-            qemu_coroutine_yield();
-            s->waiting_for_io = false;
-        }
-        if (s->buf_free_count < nb_chunks + added_chunks) {
-            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
-            break;
-        }
-        if (max_iov < nb_chunks + added_chunks) {
-            trace_mirror_break_iov_max(s, nb_chunks, added_chunks);
-            break;
-        }
-
-        /* We have enough free space to copy these sectors.  */
-        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
-
-        nb_sectors += added_sectors;
-        nb_chunks += added_chunks;
-        next_sector += added_sectors;
-        next_chunk += added_chunks;
-        if (!s->synced && s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
-        }
-    } while (delay_ns == 0 && next_sector < end);
-
     /* Allocate a MirrorOp that is used as an AIO callback.  */
     op = g_new(MirrorOp, 1);
     op->s = s;
@@ -277,47 +253,152 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
      * from s->buf_free.
      */
     qemu_iovec_init(&op->qiov, nb_chunks);
-    next_sector = sector_num;
     while (nb_chunks-- > 0) {
         MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
-        size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+        size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
 
         QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
         s->buf_free_count--;
         qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
-
-        /* Advance the HBitmapIter in parallel, so that we do not examine
-         * the same sector twice.
-         */
-        if (next_sector > hbitmap_next_sector
-            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
-            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
-        }
-
-        next_sector += sectors_per_chunk;
     }
 
-    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
-
     /* Copy the dirty cluster.  */
     s->in_flight++;
     s->sectors_in_flight += nb_sectors;
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
 
-    ret = bdrv_get_block_status_above(source, NULL, sector_num,
-                                      nb_sectors, &pnum);
-    if (ret < 0 || pnum < nb_sectors ||
-            (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
-        bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
-                       mirror_read_complete, op);
-    } else if (ret & BDRV_BLOCK_ZERO) {
+    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+                   mirror_read_complete, op);
+    return ret;
+}
+
+static void mirror_do_zero_or_discard(MirrorBlockJob *s,
+                                      int64_t sector_num,
+                                      int nb_sectors,
+                                      bool is_discard)
+{
+    MirrorOp *op;
+
+    /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
+     * so the freeing in mirror_iteration_done is nop. */
+    op = g_new0(MirrorOp, 1);
+    op->s = s;
+    op->sector_num = sector_num;
+    op->nb_sectors = nb_sectors;
+
+    s->in_flight++;
+    s->sectors_in_flight += nb_sectors;
+    if (is_discard) {
+        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+                         mirror_write_complete, op);
+    } else {
         bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
                               s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                               mirror_write_complete, op);
-    } else {
-        assert(!(ret & BDRV_BLOCK_DATA));
-        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
-                         mirror_write_complete, op);
+    }
+}
+
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+    BlockDriverState *source = s->common.bs;
+    int64_t sector_num;
+    uint64_t delay_ns = 0;
+    /* At least the first dirty chunk is mirrored in one iteration. */
+    int nb_chunks = 1;
+    int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
+    int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+
+    sector_num = hbitmap_iter_next(&s->hbi);
+    if (sector_num < 0) {
+        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+        sector_num = hbitmap_iter_next(&s->hbi);
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+        assert(sector_num >= 0);
+    }
+
+    /* Find the number of consective dirty chunks following the first dirty
+     * one, and wait for in flight requests in them. */
+    while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+        int64_t hbitmap_next;
+        int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
+        int64_t next_chunk = next_sector / sectors_per_chunk;
+        if (next_sector >= end ||
+            !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+            break;
+        }
+        if (test_bit(next_chunk, s->in_flight_bitmap)) {
+            if (nb_chunks > 0) {
+                break;
+            }
+            trace_mirror_yield_in_flight(s, next_sector, s->in_flight);
+            s->waiting_for_io = true;
+            qemu_coroutine_yield();
+            s->waiting_for_io = false;
+            /* Now retry.  */
+        } else {
+            hbitmap_next = hbitmap_iter_next(&s->hbi);
+            assert(hbitmap_next == next_sector);
+            nb_chunks++;
+        }
+    }
+
+    /* Clear dirty bits before querying the block status, because
+     * calling bdrv_get_block_status_above could yield - if some blocks are
+     * marked dirty in this window, we need to know.
+     */
+    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
+                            nb_chunks * sectors_per_chunk);
+    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
+    while (nb_chunks > 0 && sector_num < end) {
+        int ret;
+        int io_sectors;
+        enum MirrorMethod {
+            MIRROR_METHOD_COPY,
+            MIRROR_METHOD_ZERO,
+            MIRROR_METHOD_DISCARD
+        } mirror_method = MIRROR_METHOD_COPY;
+
+        assert(!(sector_num % sectors_per_chunk));
+        ret = bdrv_get_block_status_above(source, NULL, sector_num,
+                                          nb_chunks * sectors_per_chunk,
+                                          &io_sectors);
+        if (ret < 0) {
+            io_sectors = nb_chunks * sectors_per_chunk;
+        }
+
+        io_sectors -= io_sectors % sectors_per_chunk;
+        if (io_sectors < sectors_per_chunk) {
+            io_sectors = sectors_per_chunk;
+        } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
+            int64_t target_sector_num;
+            int target_nb_sectors;
+            bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+                                   &target_sector_num, &target_nb_sectors);
+            if (target_sector_num == sector_num &&
+                target_nb_sectors == io_sectors) {
+                mirror_method = ret & BDRV_BLOCK_ZERO ?
+                                    MIRROR_METHOD_ZERO :
+                                    MIRROR_METHOD_DISCARD;
+            }
+        }
+
+        switch (mirror_method) {
+        case MIRROR_METHOD_COPY:
+            io_sectors = mirror_do_read(s, sector_num, io_sectors);
+            break;
+        case MIRROR_METHOD_ZERO:
+            mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
+            break;
+        case MIRROR_METHOD_DISCARD:
+            mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+            break;
+        default:
+            abort();
+        }
+        assert(io_sectors);
+        sector_num += io_sectors;
+        nb_chunks -= io_sectors / sectors_per_chunk;
+        delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
     }
     return delay_ns;
 }
@@ -447,16 +528,17 @@ static void coroutine_fn mirror_run(void *opaque)
      */
     bdrv_get_backing_filename(s->target, backing_filename,
                               sizeof(backing_filename));
-    if (backing_filename[0] && !s->target->backing) {
-        ret = bdrv_get_info(s->target, &bdi);
-        if (ret < 0) {
-            goto immediate_exit;
-        }
-        if (s->granularity < bdi.cluster_size) {
-            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
-            s->cow_bitmap = bitmap_new(length);
-        }
+    ret = bdrv_get_info(s->target, &bdi);
+    if (ret < 0) {
+        goto immediate_exit;
     }
+    if (backing_filename[0] && !s->target->backing
+        && s->granularity < bdi.cluster_size) {
+        s->buf_size = MAX(s->buf_size, bdi.cluster_size);
+        s->cow_bitmap = bitmap_new(length);
+    }
+    s->target_cluster_sectors = bdi.cluster_size >> BDRV_SECTOR_BITS;
+    s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
 
     end = s->bdev_length / BDRV_SECTOR_SIZE;
     s->buf = qemu_try_blockalign(bs, s->buf_size);
diff --git a/trace-events b/trace-events
index 6f03638..597b140 100644
--- a/trace-events
+++ b/trace-events
@@ -95,7 +95,6 @@ mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirt
 mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
 mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
 mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
-mirror_break_iov_max(void *s, int nb_chunks, int added_chunks) "s %p requested chunks %d added_chunks %d"
 
 # block/backup.c
 backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v8 2/2] mirror: Add mirror_wait_for_io
  2015-12-24  3:15 [Qemu-devel] [PATCH v8 0/2] mirror: Improve zero write and discard Fam Zheng
  2015-12-24  3:15 ` [Qemu-devel] [PATCH v8 1/2] mirror: Rewrite mirror_iteration Fam Zheng
@ 2015-12-24  3:15 ` Fam Zheng
  1 sibling, 0 replies; 5+ messages in thread
From: Fam Zheng @ 2015-12-24  3:15 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-block, mreitz

The three lines are duplicated a number of times now, refactor a
function.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/mirror.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 0081c2e..07ad068 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -206,6 +206,14 @@ static int mirror_cow_align(MirrorBlockJob *s,
     return diff;
 }
 
+static inline void mirror_wait_for_io(MirrorBlockJob *s)
+{
+    assert(!s->waiting_for_io);
+    s->waiting_for_io = true;
+    qemu_coroutine_yield();
+    s->waiting_for_io = false;
+}
+
 /* Submit async read while handling COW.
  * Returns: nb_sectors if no alignment is necessary, or
  *          (new_end - sector_num) if tail is rounded up or down due to
@@ -238,9 +246,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
 
     while (s->buf_free_count < nb_chunks) {
         trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
-        s->waiting_for_io = true;
-        qemu_coroutine_yield();
-        s->waiting_for_io = false;
+        mirror_wait_for_io(s);
     }
 
     /* Allocate a MirrorOp that is used as an AIO callback.  */
@@ -331,9 +337,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
                 break;
             }
             trace_mirror_yield_in_flight(s, next_sector, s->in_flight);
-            s->waiting_for_io = true;
-            qemu_coroutine_yield();
-            s->waiting_for_io = false;
+            mirror_wait_for_io(s);
             /* Now retry.  */
         } else {
             hbitmap_next = hbitmap_iter_next(&s->hbi);
@@ -423,9 +427,7 @@ static void mirror_free_init(MirrorBlockJob *s)
 static void mirror_drain(MirrorBlockJob *s)
 {
     while (s->in_flight > 0) {
-        s->waiting_for_io = true;
-        qemu_coroutine_yield();
-        s->waiting_for_io = false;
+        mirror_wait_for_io(s);
     }
 }
 
@@ -613,9 +615,7 @@ static void coroutine_fn mirror_run(void *opaque)
             if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
                 trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
-                s->waiting_for_io = true;
-                qemu_coroutine_yield();
-                s->waiting_for_io = false;
+                mirror_wait_for_io(s);
                 continue;
             } else if (cnt != 0) {
                 delay_ns = mirror_iteration(s);
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v8 1/2] mirror: Rewrite mirror_iteration
  2015-12-24  3:15 ` [Qemu-devel] [PATCH v8 1/2] mirror: Rewrite mirror_iteration Fam Zheng
@ 2016-01-04 19:27   ` Max Reitz
  2016-01-05  8:18     ` Fam Zheng
  0 siblings, 1 reply; 5+ messages in thread
From: Max Reitz @ 2016-01-04 19:27 UTC (permalink / raw)
  To: Fam Zheng, qemu-devel; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-block

[-- Attachment #1: Type: text/plain, Size: 3381 bytes --]

On 24.12.2015 04:15, Fam Zheng wrote:
> The "pnum < nb_sectors" condition in deciding whether to actually copy
> data is unnecessarily strict, and the qiov initialization is
> unnecessarily for bdrv_aio_write_zeroes and bdrv_aio_discard.
> 
> Rewrite mirror_iteration to fix both flaws.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block/mirror.c | 344 +++++++++++++++++++++++++++++++++++----------------------
>  trace-events   |   1 -
>  2 files changed, 213 insertions(+), 132 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index f201f2b..0081c2e 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -46,7 +46,6 @@ typedef struct MirrorBlockJob {
>      BlockdevOnError on_source_error, on_target_error;
>      bool synced;
>      bool should_complete;
> -    int64_t sector_num;
>      int64_t granularity;
>      size_t buf_size;
>      int64_t bdev_length;
> @@ -63,6 +62,8 @@ typedef struct MirrorBlockJob {
>      int ret;
>      bool unmap;
>      bool waiting_for_io;
> +    int target_cluster_sectors;
> +    int max_iov;
>  } MirrorBlockJob;
>  
>  typedef struct MirrorOp {
> @@ -158,115 +159,90 @@ static void mirror_read_complete(void *opaque, int ret)
>                      mirror_write_complete, op);
>  }
>  
> -static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
> +/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
> + * return the offset of the adjusted tail sector against original. */
> +static int mirror_cow_align(MirrorBlockJob *s,
> +                            int64_t *sector_num,
> +                            int *nb_sectors)
> +{
> +    bool head_need_cow, tail_need_cow;
> +    int diff = 0;
> +    int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
> +
> +    head_need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
> +    tail_need_cow = !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
> +                              s->cow_bitmap);
> +    if (head_need_cow || tail_need_cow) {
> +        int64_t align_sector_num;
> +        int align_nb_sectors;
> +        bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
> +                               &align_sector_num, &align_nb_sectors);
> +        if (tail_need_cow) {
> +            diff = align_sector_num + align_nb_sectors
> +                   - (*sector_num + *nb_sectors);
> +            assert(diff >= 0);
> +            *nb_sectors += diff;
> +        }
> +        if (head_need_cow) {
> +            int d = *sector_num - align_sector_num;
> +            assert(d >= 0);
> +            *sector_num = align_sector_num;
> +            *nb_sectors += d;
> +        }
> +    }
> +
> +    /* If the resulting chunks are more than max_iov, we have to shrink it
> +     * under the alignment restriction. */
> +    if (*nb_sectors / chunk_sectors > s->max_iov) {
> +        int shrink = *nb_sectors / chunk_sectors - s->max_iov;

Isn't this missing a "shrink *= chunk_sectors"? Because after this line,
shrink's unit seems to be chunks, but the following code seems to presume it

> +        if (tail_need_cow) {
> +            shrink -= shrink % s->target_cluster_sectors;
> +        }
> +        diff -= shrink;
> +        *nb_sectors -= shrink;
> +    }

Max

(The rest looks fine.)


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v8 1/2] mirror: Rewrite mirror_iteration
  2016-01-04 19:27   ` Max Reitz
@ 2016-01-05  8:18     ` Fam Zheng
  0 siblings, 0 replies; 5+ messages in thread
From: Fam Zheng @ 2016-01-05  8:18 UTC (permalink / raw)
  To: Max Reitz; +Cc: Kevin Wolf, pbonzini, Jeff Cody, qemu-devel, qemu-block

On Mon, 01/04 20:27, Max Reitz wrote:
> On 24.12.2015 04:15, Fam Zheng wrote:
> > The "pnum < nb_sectors" condition in deciding whether to actually copy
> > data is unnecessarily strict, and the qiov initialization is
> > unnecessarily for bdrv_aio_write_zeroes and bdrv_aio_discard.
> > 
> > Rewrite mirror_iteration to fix both flaws.
> > 
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> > ---
> >  block/mirror.c | 344 +++++++++++++++++++++++++++++++++++----------------------
> >  trace-events   |   1 -
> >  2 files changed, 213 insertions(+), 132 deletions(-)
> > 
> > diff --git a/block/mirror.c b/block/mirror.c
> > index f201f2b..0081c2e 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -46,7 +46,6 @@ typedef struct MirrorBlockJob {
> >      BlockdevOnError on_source_error, on_target_error;
> >      bool synced;
> >      bool should_complete;
> > -    int64_t sector_num;
> >      int64_t granularity;
> >      size_t buf_size;
> >      int64_t bdev_length;
> > @@ -63,6 +62,8 @@ typedef struct MirrorBlockJob {
> >      int ret;
> >      bool unmap;
> >      bool waiting_for_io;
> > +    int target_cluster_sectors;
> > +    int max_iov;
> >  } MirrorBlockJob;
> >  
> >  typedef struct MirrorOp {
> > @@ -158,115 +159,90 @@ static void mirror_read_complete(void *opaque, int ret)
> >                      mirror_write_complete, op);
> >  }
> >  
> > -static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
> > +/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
> > + * return the offset of the adjusted tail sector against original. */
> > +static int mirror_cow_align(MirrorBlockJob *s,
> > +                            int64_t *sector_num,
> > +                            int *nb_sectors)
> > +{
> > +    bool head_need_cow, tail_need_cow;
> > +    int diff = 0;
> > +    int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
> > +
> > +    head_need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
> > +    tail_need_cow = !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
> > +                              s->cow_bitmap);
> > +    if (head_need_cow || tail_need_cow) {
> > +        int64_t align_sector_num;
> > +        int align_nb_sectors;
> > +        bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
> > +                               &align_sector_num, &align_nb_sectors);
> > +        if (tail_need_cow) {
> > +            diff = align_sector_num + align_nb_sectors
> > +                   - (*sector_num + *nb_sectors);
> > +            assert(diff >= 0);
> > +            *nb_sectors += diff;
> > +        }
> > +        if (head_need_cow) {
> > +            int d = *sector_num - align_sector_num;
> > +            assert(d >= 0);
> > +            *sector_num = align_sector_num;
> > +            *nb_sectors += d;
> > +        }
> > +    }
> > +
> > +    /* If the resulting chunks are more than max_iov, we have to shrink it
> > +     * under the alignment restriction. */
> > +    if (*nb_sectors / chunk_sectors > s->max_iov) {
> > +        int shrink = *nb_sectors / chunk_sectors - s->max_iov;
> 
> Isn't this missing a "shrink *= chunk_sectors"? Because after this line,
> shrink's unit seems to be chunks, but the following code seems to presume it

You are right, and the if condition above should be careful of partial chunks
too. Will fix.

Fam

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-01-05  8:18 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-12-24  3:15 [Qemu-devel] [PATCH v8 0/2] mirror: Improve zero write and discard Fam Zheng
2015-12-24  3:15 ` [Qemu-devel] [PATCH v8 1/2] mirror: Rewrite mirror_iteration Fam Zheng
2016-01-04 19:27   ` Max Reitz
2016-01-05  8:18     ` Fam Zheng
2015-12-24  3:15 ` [Qemu-devel] [PATCH v8 2/2] mirror: Add mirror_wait_for_io Fam Zheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).