qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Chunqiang Tang <ctang@us.ibm.com>
To: qemu-devel@nongnu.org
Cc: Chunqiang Tang <ctang@us.ibm.com>
Subject: [Qemu-devel] [PATCH 18/26] FVD: add support for base image prefetching
Date: Fri, 25 Feb 2011 17:37:58 -0500	[thread overview]
Message-ID: <1298673486-3573-18-git-send-email-ctang@us.ibm.com> (raw)
In-Reply-To: <1298673486-3573-1-git-send-email-ctang@us.ibm.com>

This patch is part of the Fast Virtual Disk (FVD) proposal.
See http://wiki.qemu.org/Features/FVD.

This patch adds adaptive prefetching of base image to FVD.  FVD supports both
copy-on-write and copy-on-read of base image. Adaptive prefetching is similar
to copy-on-read except that it is initiated by the FVD driver rather than
triggered by the VM's read requests. FVD's prefetching is conservative in
that, if it detects resource contention, it will back off and temporarily
pause prefetching.

Signed-off-by: Chunqiang Tang <ctang@us.ibm.com>
---
 block/fvd-prefetch.c |  600 +++++++++++++++++++++++++++++++++++++++++++++++++-
 block/fvd-read.c     |    1 +
 qemu-io-sim.c        |   13 +
 3 files changed, 613 insertions(+), 1 deletions(-)

diff --git a/block/fvd-prefetch.c b/block/fvd-prefetch.c
index 5844aa7..b8be98c 100644
--- a/block/fvd-prefetch.c
+++ b/block/fvd-prefetch.c
@@ -11,7 +11,605 @@
  *
  */
 
+static void prefetch_read_cb(void *opaque, int ret);
+static void resume_prefetch(BlockDriverState * bs);
+static void do_next_prefetch_read(BlockDriverState * bs, int64_t current_time);
+
 void fvd_init_prefetch(void *opaque)
 {
-    /* To be implemented. */
+    BlockDriverState *bs = opaque;
+    BDRVFvdState *s = bs->opaque;
+    FvdAIOCB *acb;
+    int i;
+
+    QDEBUG("Start prefetching\n");
+
+    if (!s->data_region_prepared) {
+        init_data_region(s);
+    }
+
+    s->prefetch_acb = my_qemu_malloc(sizeof(FvdAIOCB *)*s->num_prefetch_slots);
+
+    for (i = 0; i < s->num_prefetch_slots; i++) {
+        acb = my_qemu_aio_get(&fvd_aio_pool, bs, prefetch_null_cb, NULL);
+        s->prefetch_acb[i] = acb;
+        if (!acb) {
+            int j;
+            for (j = 0; j < i; j++) {
+                my_qemu_aio_release(s->prefetch_acb[j]);
+                s->prefetch_acb[j] = NULL;
+            }
+
+            my_qemu_free(s->prefetch_acb);
+            s->prefetch_acb = NULL;
+            fprintf(stderr, "No acb and cannot start prefetching.\n");
+            return;
+        }
+
+        acb->type = OP_COPY;
+        acb->cancel_in_progress = false;
+    }
+
+    s->prefetch_state = PREFETCH_STATE_RUNNING;
+
+    for (i = 0; i < s->num_prefetch_slots; i++) {
+        acb = s->prefetch_acb[i];
+        acb->copy.buffered_sector_begin = acb->copy.buffered_sector_end = 0;
+        QLIST_INIT(&acb->copy_lock.dependent_writes);
+        acb->copy_lock.next.le_prev = NULL;
+        acb->copy.hd_acb = NULL;
+        acb->sector_num = 0;
+        acb->nb_sectors = 0;
+        acb->copy.iov.iov_len = s->sectors_per_prefetch * 512;
+        acb->copy.buf = acb->copy.iov.iov_base =
+            my_qemu_blockalign(bs->backing_hd, acb->copy.iov.iov_len);
+        qemu_iovec_init_external(&acb->copy.qiov, &acb->copy.iov, 1);
+    }
+
+    if (s->prefetch_timer) {
+        qemu_free_timer(s->prefetch_timer);
+        s->prefetch_timer =
+            qemu_new_timer(rt_clock, (QEMUTimerCB *) resume_prefetch, bs);
+    }
+
+    s->pause_prefetch_requested = false;
+    s->unclaimed_prefetch_region_start = 0;
+    s->prefetch_read_throughput = -1;   /* Indicate not initialized. */
+    s->prefetch_write_throughput = -1;  /* Indicate not initialized. */
+    s->prefetch_read_time = 0;
+    s->prefetch_write_time = 0;
+    s->prefetch_data_read = 0;
+    s->prefetch_data_written = 0;
+    s->next_prefetch_read_slot = 0;
+    s->num_filled_prefetch_slots = 0;
+    s->prefetch_read_active = false;
+
+    do_next_prefetch_read(bs, qemu_get_clock(rt_clock));
+}
+
+static void pause_prefetch(BDRVFvdState * s)
+{
+    int64_t ms = 1 + (int64_t) ((rand() / ((double)RAND_MAX))
+                                * s->prefetch_throttle_time);
+    QDEBUG("Pause prefetch for %" PRId64 " milliseconds\n", ms);
+    /* When the timer expires, it goes to resume_prefetch(). */
+    qemu_mod_timer(s->prefetch_timer, qemu_get_clock(rt_clock) + ms);
+}
+
+/* Return true if every bit of freshbitmap is set to 1. */
+static bool all_data_prefetched(BDRVFvdState *s)
+{
+    uint64_t n = s->base_img_sectors / s->block_size / sizeof(uint64_t) / 8;
+    uint64_t *p = (uint64_t*)s->fresh_bitmap;
+    uint64_t i;
+
+    for (i = 0; i < n; i++, p++) {
+        if (*p != UINT64_C(0xFFFFFFFFFFFFFFFF)) {
+            return false;
+        }
+    }
+
+    uint64_t sec = n * sizeof(uint64_t) * 8 * s->block_size;
+    while (sec < s->base_img_sectors) {
+        if (fresh_bitmap_show_sector_in_base_img(sec, s)) {
+            return false;
+        }
+        sec += s->block_size;
+    }
+
+    return true;
+}
+
+static void terminate_prefetch(BlockDriverState * bs, int final_state)
+{
+    BDRVFvdState *s = bs->opaque;
+    int i;
+
+    ASSERT(!s->prefetch_read_active && s->num_filled_prefetch_slots == 0);
+
+    for (i = 0; i < s->num_prefetch_slots; i++) {
+        if (s->prefetch_acb) {
+            my_qemu_vfree(s->prefetch_acb[i]->copy.buf);
+            my_qemu_aio_release(s->prefetch_acb[i]);
+            s->prefetch_acb[i] = NULL;
+        }
+    }
+    my_qemu_free(s->prefetch_acb);
+    s->prefetch_acb = NULL;
+
+    if (s->prefetch_timer) {
+        qemu_del_timer(s->prefetch_timer);
+        qemu_free_timer(s->prefetch_timer);
+        s->prefetch_timer = NULL;
+    }
+
+    if (final_state == PREFETCH_STATE_FINISHED) {
+        if (all_data_prefetched(s)) {
+            s->prefetch_state = PREFETCH_STATE_FINISHED;
+            s->copy_on_read = false;
+        } else {
+            s->prefetch_state = PREFETCH_STATE_DISABLED;
+        }
+    } else {
+        s->prefetch_state = final_state;
+    }
+
+    if (s->prefetch_state == PREFETCH_STATE_FINISHED) {
+        flush_metadata_to_disk(bs, false/*journal*/, true/*all_prefetched*/);
+        QDEBUG("FVD prefetching finished successfully.\n");
+    } else {
+        flush_metadata_to_disk(bs, false/*journal*/, false/*all_prefetched*/);
+        QDEBUG("FVD prefetching disabled.\n");
+    }
+}
+
+static void do_next_prefetch_read(BlockDriverState * bs, int64_t current_time)
+{
+    FvdAIOCB *acb;
+    BDRVFvdState *s = bs->opaque;
+    int64_t begin, end;
+
+    ASSERT(!s->prefetch_read_active
+           && s->num_filled_prefetch_slots < s->num_prefetch_slots
+           && !s->pause_prefetch_requested);
+
+    /* Find the next region to prefetch. */
+    begin = s->unclaimed_prefetch_region_start;
+    while (1) {
+        /*Check the bitmap to determine if it is truly finished. If not
+            schedule a timer to retry again. */
+
+        if (begin >= s->base_img_sectors) {
+            s->unclaimed_prefetch_region_start = s->base_img_sectors;
+            if (s->num_filled_prefetch_slots == 0) {
+                terminate_prefetch(bs, PREFETCH_STATE_FINISHED);
+            }
+            return;
+        }
+        end = begin + s->sectors_per_prefetch;
+        if (end > s->base_img_sectors) {
+            end = s->base_img_sectors;
+        }
+        if (find_region_in_base_img(s, &begin, &end)) {
+            break;
+        }
+        begin = end;
+    }
+
+    ASSERT(begin % s->block_size == 0 && (end % s->block_size == 0
+           || end == s->base_img_sectors));
+
+    acb = s->prefetch_acb[s->next_prefetch_read_slot];
+    acb->copy.buffered_sector_begin = acb->sector_num = begin;
+    acb->copy.buffered_sector_end = s->unclaimed_prefetch_region_start = end;
+    acb->nb_sectors = end - begin;
+    acb->copy.qiov.size = acb->copy.iov.iov_len = acb->nb_sectors * 512;
+    acb->copy.iov.iov_base = acb->copy.buf;
+    acb->copy.last_prefetch_op_start_time = current_time;
+    acb->copy.hd_acb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
+                                      &acb->copy.qiov, acb->nb_sectors,
+                                      prefetch_read_cb, acb);
+
+
+    if (acb->copy.hd_acb == NULL) {
+        QDEBUG("PREFETCH: error when starting read for sector_num=%" PRId64
+               " nb_sectors=%d\n", acb->sector_num, acb->nb_sectors);
+        s->prefetch_state = PREFETCH_STATE_DISABLED;
+        if (s->num_filled_prefetch_slots == 0) {
+            terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+        }
+    } else {
+        s->prefetch_read_active = true;
+        QDEBUG("PREFETCH: start read for sector_num=%" PRId64
+               " nb_sectors=%d total_prefetched_bytes=%" PRId64 "\n",
+               acb->sector_num, acb->nb_sectors, s->total_prefetch_data);
+#ifdef FVD_DEBUG
+        s->total_prefetch_data += acb->copy.iov.iov_len;
+#endif
+    }
+}
+
+static void prefetch_write_cb(void *opaque, int ret)
+{
+    FvdAIOCB *acb = (FvdAIOCB *) opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVFvdState *s = bs->opaque;
+    int64_t begin, end;
+    const int64_t current_time = qemu_get_clock(rt_clock);
+
+    if (acb->cancel_in_progress) {
+        return;
+    }
+
+    ASSERT(acb->nb_sectors > 0 && s->num_filled_prefetch_slots > 0);
+
+    if (ret == 0) {
+        /* No need to update the on-disk bitmap or the stale bitmap.
+         * See Section 3.3.4 of the FVD-cow paper. */
+        update_fresh_bitmap(acb->sector_num, acb->nb_sectors, s);
+    }
+
+    QLIST_REMOVE(acb, copy_lock.next);
+    restart_dependent_writes(acb);
+    acb->copy.hd_acb = NULL;
+    QLIST_INIT(&acb->copy_lock.dependent_writes);
+
+    if (ret != 0) {
+        QDEBUG("PREFETCH: finished write with error for sector_num=%" PRId64
+               " nb_sectors=%d\n", acb->sector_num, acb->nb_sectors);
+        s->num_filled_prefetch_slots = 0;
+        s->prefetch_state = PREFETCH_STATE_DISABLED;
+        if (!s->prefetch_read_active) {
+            terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+        }
+        return;
+    }
+
+    const int64_t write_time =
+        current_time - acb->copy.last_prefetch_op_start_time;
+    s->prefetch_write_time += write_time;
+    s->prefetch_data_written += acb->nb_sectors * 512;
+
+    QDEBUG("PREFETCH: write_finished  sector_num=%" PRId64
+           " nb_sectors=%d  write_time=%"PRId64" (ms)\n", acb->sector_num,
+           acb->nb_sectors, write_time);
+
+    /* Calculate throughput and determine if it needs to pause prefetching due
+     * to low throughput. */
+    if (s->prefetch_timer && s->prefetch_throttle_time > 0
+        && !s->pause_prefetch_requested
+        && s->prefetch_write_time > s->prefetch_write_throughput_measure_time) {
+        const double this_round_throughput =
+            s->prefetch_data_written / (double)s->prefetch_write_time;
+        if (s->prefetch_write_throughput < 0) {
+            /* Previously not initialized. */
+            s->prefetch_write_throughput = this_round_throughput;
+        } else {
+            s->prefetch_write_throughput =
+                PREFETCH_PERF_CALC_ALPHA * s->prefetch_write_throughput +
+                (1 - PREFETCH_PERF_CALC_ALPHA) * this_round_throughput;
+        }
+        if (s->prefetch_write_throughput < s->prefetch_min_write_throughput) {
+            QDEBUG("PREFETCH: slow_write  this_write=%"PRId64" (ms)  "
+                   "this_write_throughput=%.3lf (MB/s)   "
+                   "avg_write_throughput=%.3lf (MB/s)\n",
+                   write_time, this_round_throughput / 1048576 * 1000,
+                   s->prefetch_write_throughput / 1048576 * 1000);
+
+            /* Make a randomized decision to pause prefetching. This avoids
+             * pausing all contending FVD drivers. See Section 3.4.2 of the
+             * FVD-cow paper. */
+            if (rand() > (RAND_MAX / 2)) {
+                QDEBUG("PREFETCH: pause requested.\n");
+                s->pause_prefetch_requested = true;
+            } else {
+                QDEBUG("PREFETCH: continue due to 50%% probability, despite "
+                       "slow write.\n");
+                s->prefetch_write_throughput = -1; /*Indicate not initialized*/
+            }
+        } else {
+            QDEBUG("PREFETCH: this_write_throughput=%.3lf (MB/s)   "
+                   "avg_write_throughput=%.3lf (MB/s)\n",
+                   this_round_throughput / 1048576 * 1000,
+                   s->prefetch_write_throughput / 1048576 * 1000);
+        }
+
+        /* Preparing for measuring the next round of throughput. */
+        s->prefetch_data_written = 0;
+        s->prefetch_write_time = 0;
+    }
+
+    /* Find in this prefetch slot the next section of prefetched but
+     * not-yet-written data. */
+    begin = acb->sector_num + acb->nb_sectors;
+    if (begin < acb->copy.buffered_sector_end) {
+        end = acb->copy.buffered_sector_end;
+        if (find_region_in_base_img(s, &begin, &end)) {
+            acb->sector_num = begin;
+            acb->nb_sectors = end - begin;
+            acb->copy.iov.iov_base = acb->copy.buf +
+                (begin - acb->copy.buffered_sector_begin) * 512;
+            acb->copy.qiov.size = acb->copy.iov.iov_len = acb->nb_sectors * 512;
+            QDEBUG("PREFETCH: write_data  sector_num=%" PRId64
+                   " nb_sectors=%d\n", acb->sector_num, acb->nb_sectors);
+            acb->copy.hd_acb = store_data(true, acb, bs, acb->sector_num,
+                                          &acb->copy.qiov, acb->nb_sectors,
+                                          prefetch_write_cb, acb);
+            if (acb->copy.hd_acb == NULL) {
+                QDEBUG("PREFETCH: error in starting bdrv_aio_writev().\n");
+                s->num_filled_prefetch_slots = 0;
+                s->prefetch_state = PREFETCH_STATE_DISABLED;
+                if (!s->prefetch_read_active) {
+                    terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+                }
+            } else {
+                acb->copy_lock.begin = begin;
+                acb->copy_lock.end = end;
+                QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+            }
+
+            return;
+        }
+    }
+
+    s->num_filled_prefetch_slots--;
+
+    if (s->prefetch_state == PREFETCH_STATE_DISABLED) {
+        if (s->num_filled_prefetch_slots == 0 && !s->prefetch_read_active) {
+            terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+        }
+        return;
+    }
+
+    if (begin >= s->base_img_sectors) {
+        /* Prefetching finished. */
+        ASSERT(s->num_filled_prefetch_slots == 0 && !s->prefetch_read_active);
+        terminate_prefetch(bs, PREFETCH_STATE_FINISHED);
+        return;
+    }
+
+    if (s->pause_prefetch_requested) {
+        if (s->num_filled_prefetch_slots == 0) {
+            if (!s->prefetch_read_active) {
+                pause_prefetch(s);
+            } else {
+                QDEBUG("PREFETCH: wait for the read operation to finish in "
+                       "order to pause prefetch.\n");
+            }
+            return;
+        }
+    }
+
+    /* Write out data in the next prefetched slot. */
+    while (s->num_filled_prefetch_slots > 0) {
+        int k = s->next_prefetch_read_slot - s->num_filled_prefetch_slots;
+        if (k < 0) {
+            k += s->num_prefetch_slots;
+        }
+        acb = s->prefetch_acb[k];
+
+        int64_t begin = acb->copy.buffered_sector_begin;
+        int64_t end = acb->copy.buffered_sector_end;
+        if (find_region_in_base_img(s, &begin, &end)) {
+            acb->copy.last_prefetch_op_start_time = current_time;
+            acb->sector_num = begin;
+            acb->nb_sectors = end - begin;
+            acb->copy.iov.iov_base =
+                acb->copy.buf + (begin - acb->copy.buffered_sector_begin) * 512;
+            acb->copy.qiov.size = acb->copy.iov.iov_len = acb->nb_sectors * 512;
+            QDEBUG("PREFETCH: writes data: sector_num=%" PRId64
+                   " nb_sectors=%d\n", acb->sector_num, acb->nb_sectors);
+            acb->copy.hd_acb = store_data(true, acb, bs, acb->sector_num,
+                                          &acb->copy.qiov, acb->nb_sectors,
+                                          prefetch_write_cb, acb);
+
+            if (acb->copy.hd_acb == NULL) {
+                QDEBUG("PREFETCH: error cannot get a control block to write "
+                       "a prefetched block.\n");
+                s->prefetch_state = PREFETCH_STATE_DISABLED;
+                s->num_filled_prefetch_slots = 0;
+                if (!s->prefetch_read_active) {
+                    terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+                }
+                return;
+            }
+
+            acb->copy_lock.begin = begin;
+            acb->copy_lock.end = end;
+            QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+            break;
+        } else {
+            QDEBUG("PREFETCH: discard prefetched data as they have been "
+                   "covered: sector_num=%" PRId64 " nb_sectors=%d\n",
+                   acb->sector_num, acb->nb_sectors);
+            s->num_filled_prefetch_slots--;
+        }
+    }
+
+    /* If the reader was stopped due to lack of slots, start the reader. */
+    if (!s->prefetch_read_active && !s->pause_prefetch_requested) {
+        do_next_prefetch_read(bs, current_time);
+    }
+}
+
+static void prefetch_read_cb(void *opaque, int ret)
+{
+    FvdAIOCB *acb = (FvdAIOCB *) opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVFvdState *s = bs->opaque;
+
+    if (acb->cancel_in_progress) {
+        return;
+    }
+
+    ASSERT(s->prefetch_read_active && s->num_filled_prefetch_slots >= 0
+           && s->num_filled_prefetch_slots < s->num_prefetch_slots);
+
+    s->prefetch_read_active = false;
+    acb->copy.hd_acb = NULL;
+
+    if (s->prefetch_state == PREFETCH_STATE_DISABLED) {
+        if (s->num_filled_prefetch_slots == 0) {
+            terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+        }
+        return;
+    }
+
+    if (ret != 0) {
+        QDEBUG("PREFETCH: read_error  sector_num=%" PRId64 " nb_sectors=%d.\n",
+               acb->sector_num, acb->nb_sectors);
+        s->prefetch_state = PREFETCH_STATE_DISABLED;
+        if (s->num_filled_prefetch_slots == 0) {
+            terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+        }
+        return;
+    }
+
+    const int64_t current_time = qemu_get_clock(rt_clock);
+    const int64_t read_time = current_time -
+        acb->copy.last_prefetch_op_start_time;
+    s->prefetch_read_time += read_time;
+    s->prefetch_data_read += acb->nb_sectors * 512;
+
+    QDEBUG("PREFETCH: read_finished  sector_num=%" PRId64
+           " nb_sectors=%d  read_time=%"PRId64" (ms)\n", acb->sector_num,
+           acb->nb_sectors, read_time);
+
+    /* Calculate throughput and determine if it needs to pause prefetching due
+     * to low throughput. */
+    if (s->prefetch_timer && s->prefetch_throttle_time > 0
+        && !s->pause_prefetch_requested
+        && s->prefetch_read_time > s->prefetch_read_throughput_measure_time) {
+        const double this_round_throughput =
+            s->prefetch_data_read / (double)s->prefetch_read_time;
+        if (s->prefetch_read_throughput < 0) {
+            /* Previously not initialized. */
+            s->prefetch_read_throughput = this_round_throughput;
+        } else {
+            s->prefetch_read_throughput = PREFETCH_PERF_CALC_ALPHA *
+                s->prefetch_read_throughput +
+                (1 - PREFETCH_PERF_CALC_ALPHA) * this_round_throughput;
+        }
+        if (s->prefetch_read_throughput < s->prefetch_min_read_throughput) {
+            QDEBUG("PREFETCH: slow_read read_time=%"PRId64" (ms)   "
+                   "this_read_throughput=%.3lf (MB/s) "
+                   "avg_read_throughput=%.3lf (MB/s)\n",
+                   read_time, this_round_throughput / 1048576 * 1000,
+                   s->prefetch_read_throughput / 1048576 * 1000);
+
+            /* Make a randomized decision to pause prefetching. This avoids
+             * pausing all contending FVD drivers. See Section 3.4.2 of the
+             * FVD-cow paper. */
+            if (rand() > (RAND_MAX / 2)) {
+                QDEBUG("PREFETCH: pause requested.\n");
+                s->pause_prefetch_requested = true;
+            } else {
+                QDEBUG("PREFETCH: continue due to 50%% probability, "
+                       "despite slow read.\n");
+                s->prefetch_read_throughput = -1;  /*Indicate not initialized*/
+            }
+        } else {
+            QDEBUG("PREFETCH: this_read_throughput=%.3lf (MB/s)    "
+                   "avg_read_throughput=%.3lf (MB/s)\n",
+                   this_round_throughput / 1048576 * 1000,
+                   s->prefetch_read_throughput / 1048576 * 1000);
+        }
+
+        /* Preparing for measuring the next round of throughput. */
+        s->prefetch_data_read = 0;
+        s->prefetch_read_time = 0;
+    }
+
+    if (s->num_filled_prefetch_slots > 0) {
+        /* There is one ongoing write for prefetched data. This slot will be
+         * written out later. */
+        s->num_filled_prefetch_slots++;
+        s->next_prefetch_read_slot++;
+        if (s->next_prefetch_read_slot >= s->num_prefetch_slots) {
+            s->next_prefetch_read_slot = 0;
+        }
+    } else {
+        /* The writer is not active. Start the writer. */
+        int64_t begin = acb->copy.buffered_sector_begin;
+        int64_t end = acb->copy.buffered_sector_end;
+        if (find_region_in_base_img(s, &begin, &end)) {
+            acb->copy.last_prefetch_op_start_time = current_time;
+            acb->sector_num = begin;
+            acb->nb_sectors = end - begin;
+            acb->copy.iov.iov_base =
+                acb->copy.buf + (begin - acb->copy.buffered_sector_begin) * 512;
+            acb->copy.qiov.size = acb->copy.iov.iov_len = acb->nb_sectors * 512;
+            QDEBUG("PREFETCH: writes_data sector_num=%" PRId64
+                   " nb_sectors=%d\n", acb->sector_num, acb->nb_sectors);
+            acb->copy.hd_acb = store_data(true, acb, bs, acb->sector_num,
+                                          &acb->copy.qiov, acb->nb_sectors,
+                                          prefetch_write_cb, acb);
+
+            if (acb->copy.hd_acb == NULL) {
+                QDEBUG("PREFETCH: error cannot get control block to write a "
+                       "prefetched block.\n");
+                s->prefetch_state = PREFETCH_STATE_DISABLED;
+                if (s->num_filled_prefetch_slots == 0) {
+                    terminate_prefetch(bs, PREFETCH_STATE_DISABLED);
+                }
+                return;
+            }
+
+            acb->copy_lock.begin = begin;
+            acb->copy_lock.end = end;
+            QLIST_INSERT_HEAD(&s->copy_locks, acb, copy_lock.next);
+            s->num_filled_prefetch_slots++;
+            s->next_prefetch_read_slot++;
+            if (s->next_prefetch_read_slot >= s->num_prefetch_slots) {
+                s->next_prefetch_read_slot = 0;
+            }
+        } else {
+            /* The current prefetch slot will be reused to prefetch the next
+             * bunch of data. */
+            QDEBUG("PREFETCH: discard prefetched data as they have been "
+                   "covered: sector_num=%" PRId64 " nb_sectors=%d\n",
+                   acb->sector_num, acb->nb_sectors);
+        }
+    }
+
+    if (s->num_filled_prefetch_slots >= s->num_prefetch_slots) {
+        QDEBUG("PREFETCH: halt read because no slot is available.\n");
+    } else {
+        if (s->pause_prefetch_requested) {
+            if (s->num_filled_prefetch_slots == 0) {
+                pause_prefetch(s);
+            }
+        } else {
+            do_next_prefetch_read(bs, current_time);
+        }
+    }
+}
+
+static void resume_prefetch(BlockDriverState * bs)
+{
+    BDRVFvdState *s = bs->opaque;
+
+    if (s->prefetch_state != PREFETCH_STATE_RUNNING) {
+        return;
+    }
+
+    ASSERT(s->num_filled_prefetch_slots == 0 && !s->prefetch_read_active);
+    QDEBUG("PREFETCH: resume.\n");
+
+    s->pause_prefetch_requested = false;
+    s->prefetch_read_throughput = -1;   /* Indicate not initialized. */
+    s->prefetch_write_throughput = -1;  /* Indicate not initialized. */
+    s->prefetch_read_time = 0;
+    s->prefetch_write_time = 0;
+    s->prefetch_data_read = 0;
+    s->prefetch_data_written = 0;
+
+    do_next_prefetch_read(bs, qemu_get_clock(rt_clock));
+}
+
+static void prefetch_null_cb(void *opaque, int ret)
+{
+    /* Nothing to do and will never be invoked. Only need it to distinguish
+     * copy-on-read from prefetch. */
+    ASSERT(false);
 }
diff --git a/block/fvd-read.c b/block/fvd-read.c
index cd041e5..675af9e 100644
--- a/block/fvd-read.c
+++ b/block/fvd-read.c
@@ -11,6 +11,7 @@
  *
  */
 
+static void prefetch_null_cb(void *opaque, int ret);
 static void read_backing_for_copy_on_read_cb(void *opaque, int ret);
 static void read_fvd_cb(void *opaque, int ret);
 static inline void calc_read_region(BDRVFvdState * s, int64_t sector_num,
diff --git a/qemu-io-sim.c b/qemu-io-sim.c
index 923c1b8..d420fdb 100644
--- a/qemu-io-sim.c
+++ b/qemu-io-sim.c
@@ -77,6 +77,17 @@ wrote 1024/1024 bytes at offset 65536
 *=============================================================================*/
 
 #include "block/blksim.h"
+#include "block/fvd-ext.h"
+
+static void sim_start_prefetch(void)
+{
+    if (!bs->drv->format_name || !strncmp(bs->drv->format_name, "fvd", 3)) {
+        printf("This image does not support prefetching.\n");
+        return;
+    }
+    fvd_init_prefetch(bs);
+    printf("Prefetching started\n");
+}
 
 static void sim_help(void)
 {
@@ -101,6 +112,8 @@ static int sim_f(int argc, char **argv)
 
     if (strcmp(argv[1], "list") == 0) {
         blksim_list_tasks();
+    } else if (strcmp(argv[1], "prefetch") == 0) {
+        sim_start_prefetch();
     } else if (strcmp(argv[1], "all") == 0) {
         blksim_set_disk_io_return_code(ret);
         int n = blksim_run_all_tasks();
-- 
1.7.0.4

  parent reply	other threads:[~2011-02-25 22:48 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-25 22:37 [Qemu-devel] [PATCH 01/26] FVD: add simulated block driver 'blksim' Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 02/26] FVD: extend qemu-io to do fully automated testing Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 03/26] FVD: add fully automated test-qcow2.sh Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 04/26] FVD: add fully automated test-vdi.sh Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 05/26] FVD: add the 'qemu-img update' command Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 06/26] FVD: skeleton of Fast Virtual Disk Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 07/26] FVD: extend FVD header fvd.h to be more complete Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 08/26] FVD: add debugging utilities Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 09/26] FVD: add impl of interface bdrv_create() Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 10/26] FVD: add impl of interface bdrv_file_open() Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 11/26] FVD: add impl of interface bdrv_aio_writev() Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 12/26] FVD: add impl of interface bdrv_aio_readv() Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 13/26] FVD: add impl of storing data in compact image Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 14/26] FVD: add impl of loading data from " Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 15/26] FVD: add basic journal functionality Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 16/26] FVD: add impl for buffered journal updates Chunqiang Tang
2011-02-25 22:37 ` [Qemu-devel] [PATCH 17/26] FVD: add impl of bdrv_flush() and bdrv_aio_flush() Chunqiang Tang
2011-02-25 22:37 ` Chunqiang Tang [this message]
2011-02-25 22:37 ` [Qemu-devel] [PATCH 19/26] FVD: add support for aio_cancel Chunqiang Tang
2011-02-25 22:38 ` [Qemu-devel] [PATCH 20/26] FVD: add impl of interface bdrv_get_info() Chunqiang Tang
2011-02-25 22:38 ` [Qemu-devel] [PATCH 21/26] FVD: add impl of interface bdrv_close() Chunqiang Tang
2011-02-25 22:38 ` [Qemu-devel] [PATCH 22/26] FVD: add impl of interface bdrv_update() Chunqiang Tang
2011-02-25 22:38 ` [Qemu-devel] [PATCH 23/26] FVD: add impl of interface bdrv_is_allocated() Chunqiang Tang
2011-02-25 22:38 ` [Qemu-devel] [PATCH 24/26] FVD: add impl of interface bdrv_has_zero_init() Chunqiang Tang
2011-02-25 22:38 ` [Qemu-devel] [PATCH 25/26] FVD: add impl of interface bdrv_probe() Chunqiang Tang
2011-02-25 22:38 ` [Qemu-devel] [PATCH 26/26] FVD: add fully automated test-fvd.sh Chunqiang Tang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1298673486-3573-18-git-send-email-ctang@us.ibm.com \
    --to=ctang@us.ibm.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).