[Qemu-devel] [PATCH 16/20] qcow2: Reduce number of I/O requests

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Kevin Wolf <kwolf@redhat.com>
To: anthony@codemonkey.ws
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 16/20] qcow2: Reduce number of I/O requests
Date: Mon, 12 Mar 2012 16:19:47 +0100	[thread overview]
Message-ID: <1331565591-8414-17-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1331565591-8414-1-git-send-email-kwolf@redhat.com>

If the first part of a write request is allocated, but the second isn't
and it can be allocated so that the resulting area is contiguous, handle
it at once. This is a common case for sequential writes.

After this patch, alloc_cluster_offset() only checks if the clusters are
already allocated or how many new clusters can be allocated contigouosly.
The actual cluster allocation is split off into a new function
do_alloc_cluster_offset().

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block/qcow2-cluster.c |  243 +++++++++++++++++++++++++++++++++----------------
 block/qcow2.h         |    1 +
 trace-events          |    1 +
 3 files changed, 168 insertions(+), 77 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 903454d..e0fb907 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -589,7 +589,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     BDRVQcowState *s = bs->opaque;
     int i, j = 0, l2_index, ret;
     uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
-    uint64_t cluster_offset = m->cluster_offset;
+    uint64_t cluster_offset = m->alloc_offset;
     bool cow = false;
 
     trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
@@ -712,12 +712,94 @@ static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
 }
 
 /*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+    uint64_t *host_offset, unsigned int *nb_clusters, uint64_t *l2_table)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t cluster_offset;
+    QCowL2Meta *old_alloc;
+
+    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+                                         *host_offset, *nb_clusters);
+
+    /*
+     * Check if there already is an AIO write request in flight which allocates
+     * the same cluster. In this case we need to wait until the previous
+     * request has completed and updated the L2 table accordingly.
+     */
+    QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
+
+        uint64_t start = guest_offset >> s->cluster_bits;
+        uint64_t end = start + *nb_clusters;
+        uint64_t old_start = old_alloc->offset >> s->cluster_bits;
+        uint64_t old_end = old_start + old_alloc->nb_clusters;
+
+        if (end < old_start || start > old_end) {
+            /* No intersection */
+        } else {
+            if (start < old_start) {
+                /* Stop at the start of a running allocation */
+                *nb_clusters = old_start - start;
+            } else {
+                *nb_clusters = 0;
+            }
+
+            if (*nb_clusters == 0) {
+                /* Wait for the dependency to complete. We need to recheck
+                 * the free/allocated clusters when we continue. */
+                qemu_co_mutex_unlock(&s->lock);
+                qemu_co_queue_wait(&old_alloc->dependent_requests);
+                qemu_co_mutex_lock(&s->lock);
+                return -EAGAIN;
+            }
+        }
+    }
+
+    if (!*nb_clusters) {
+        abort();
+    }
+
+    /* Allocate new clusters */
+    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
+    if (*host_offset == 0) {
+        cluster_offset = qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
+    } else {
+        cluster_offset = *host_offset;
+        *nb_clusters = qcow2_alloc_clusters_at(bs, cluster_offset, *nb_clusters);
+    }
+
+    if (cluster_offset < 0) {
+        return cluster_offset;
+    }
+    *host_offset = cluster_offset;
+    return 0;
+}
+
+/*
  * alloc_cluster_offset
  *
- * For a given offset of the disk image, return cluster offset in qcow2 file.
- * If the offset is not found, allocate a new cluster.
+ * For a given offset on the virtual disk, find the cluster offset in qcow2
+ * file. If the offset is not found, allocate a new cluster.
  *
- * If the cluster was already allocated, m->nb_clusters is set to 0,
+ * If the cluster was already allocated, m->nb_clusters is set to 0 and
  * other fields in m are meaningless.
  *
  * If the cluster is newly allocated, m->nb_clusters is set to the number of
@@ -734,119 +816,126 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int n_start, int n_end, int *num, QCowL2Meta *m)
 {
     BDRVQcowState *s = bs->opaque;
-    int l2_index, ret;
+    int l2_index, ret, sectors;
     uint64_t l2_offset, *l2_table;
-    int64_t cluster_offset;
-    unsigned int nb_clusters, i = 0;
-    QCowL2Meta *old_alloc;
+    unsigned int nb_clusters, keep_clusters;
+    uint64_t cluster_offset;
 
     trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
                                       n_start, n_end);
 
+    /* Find L2 entry for the first involved cluster */
     ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
     if (ret < 0) {
         return ret;
     }
 
+    /*
+     * Calculate the number of clusters to look for. We stop at L2 table
+     * boundaries to keep things simple.
+     */
 again:
-    nb_clusters = size_to_clusters(s, n_end << 9);
-
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS),
+                      s->l2_size - l2_index);
 
     cluster_offset = be64_to_cpu(l2_table[l2_index]);
 
-    /* We keep all QCOW_OFLAG_COPIED clusters */
-
+    /*
+     * Check how many clusters are already allocated and don't need COW, and how
+     * many need a new allocation.
+     */
     if (cluster_offset & QCOW_OFLAG_COPIED) {
-        nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], 0, 0);
-
-        cluster_offset &= ~QCOW_OFLAG_COPIED;
-        m->nb_clusters = 0;
+        /* We keep all QCOW_OFLAG_COPIED clusters */
+        keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
+                                                  &l2_table[l2_index], 0, 0);
+        assert(keep_clusters <= nb_clusters);
+        nb_clusters -= keep_clusters;
+    } else {
+        /* For the moment, overwrite compressed clusters one by one */
+        if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+            nb_clusters = 1;
+        } else {
+            nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+        }
 
-        goto out;
+        keep_clusters = 0;
+        cluster_offset = 0;
     }
 
-    /* for the moment, multiple compressed clusters are not managed */
+    cluster_offset &= ~QCOW_OFLAG_COPIED;
 
-    if (cluster_offset & QCOW_OFLAG_COMPRESSED)
-        nb_clusters = 1;
+    /* If there is something left to allocate, do that now */
+    *m = (QCowL2Meta) {
+        .cluster_offset     = cluster_offset,
+        .nb_clusters        = 0,
+    };
+    qemu_co_queue_init(&m->dependent_requests);
 
-    /* how many available clusters ? */
+    if (nb_clusters > 0) {
+        uint64_t alloc_offset;
+        uint64_t alloc_cluster_offset;
+        uint64_t keep_bytes = keep_clusters * s->cluster_size;
 
-    i = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
-    assert(i <= nb_clusters);
-    nb_clusters = i;
+        /* Calculate start and size of allocation */
+        alloc_offset = offset + keep_bytes;
 
-    /*
-     * Check if there already is an AIO write request in flight which allocates
-     * the same cluster. In this case we need to wait until the previous
-     * request has completed and updated the L2 table accordingly.
-     */
-    QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
-
-        uint64_t start = offset >> s->cluster_bits;
-        uint64_t end = start + nb_clusters;
-        uint64_t old_start = old_alloc->offset >> s->cluster_bits;
-        uint64_t old_end = old_start + old_alloc->nb_clusters;
-
-        if (end < old_start || start > old_end) {
-            /* No intersection */
+        if (keep_clusters == 0) {
+            alloc_cluster_offset = 0;
         } else {
-            if (start < old_start) {
-                /* Stop at the start of a running allocation */
-                nb_clusters = old_start - start;
-            } else {
-                nb_clusters = 0;
-            }
-
-            if (nb_clusters == 0) {
-                /* Wait for the dependency to complete. We need to recheck
-                 * the free/allocated clusters when we continue. */
-                qemu_co_mutex_unlock(&s->lock);
-                qemu_co_queue_wait(&old_alloc->dependent_requests);
-                qemu_co_mutex_lock(&s->lock);
-                goto again;
-            }
+            alloc_cluster_offset = cluster_offset + keep_bytes;
         }
-    }
 
-    if (!nb_clusters) {
-        abort();
-    }
-
-    /* save info needed for meta data update */
-    m->offset = offset;
-    m->n_start = n_start;
-    m->nb_clusters = nb_clusters;
-
-    QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+        /* Allocate, if necessary at a given offset in the image file */
+        ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset,
+                                      &nb_clusters, l2_table);
+        if (ret == -EAGAIN) {
+            goto again;
+        } else if (ret < 0) {
+            goto fail;
+        }
 
-    /* allocate a new cluster */
-    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
-    cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
-    if (cluster_offset < 0) {
-        ret = cluster_offset;
-        goto fail;
+        /* save info needed for meta data update */
+        if (nb_clusters > 0) {
+            int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
+            int avail_sectors = (keep_clusters + nb_clusters)
+                                << (s->cluster_bits - BDRV_SECTOR_BITS);
+
+            *m = (QCowL2Meta) {
+                .cluster_offset = keep_clusters == 0 ?
+                                  alloc_cluster_offset : cluster_offset,
+                .alloc_offset   = alloc_cluster_offset,
+                .offset         = alloc_offset,
+                .n_start        = keep_clusters == 0 ? n_start : 0,
+                .nb_clusters    = nb_clusters,
+                .nb_available   = MIN(requested_sectors, avail_sectors),
+            };
+            qemu_co_queue_init(&m->dependent_requests);
+            QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+        }
     }
 
-out:
+    /* Some cleanup work */
     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     if (ret < 0) {
         goto fail_put;
     }
 
-    m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
-    m->cluster_offset = cluster_offset;
+    sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9);
+    if (sectors > n_end) {
+        sectors = n_end;
+    }
 
-    *num = m->nb_available - n_start;
+    assert(sectors > n_start);
+    *num = sectors - n_start;
 
     return 0;
 
 fail:
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
 fail_put:
-    QLIST_REMOVE(m, next_in_flight);
+    if (nb_clusters > 0) {
+        QLIST_REMOVE(m, next_in_flight);
+    }
     return ret;
 }
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 5129e3e..e4ac366 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -155,6 +155,7 @@ typedef struct QCowL2Meta
 {
     uint64_t offset;
     uint64_t cluster_offset;
+    uint64_t alloc_offset;
     int n_start;
     int nb_available;
     int nb_clusters;
diff --git a/trace-events b/trace-events
index d818ff1..606d903 100644
--- a/trace-events
+++ b/trace-events
@@ -320,6 +320,7 @@ qcow2_writev_done_part(void *co, int cur_nr_sectors) "co %p cur_nr_sectors %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
 
 qcow2_alloc_clusters_offset(void *co, uint64_t offset, int n_start, int n_end) "co %p offet %" PRIx64 " n_start %d n_end %d"
+qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offet %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"
 qcow2_cluster_alloc_phys(void *co) "co %p"
 qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d"
 
-- 
1.7.6.5

next prev parent reply	other threads:[~2012-03-12 15:17 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-12 15:19 [Qemu-devel] [PULL 00/20] Block patches Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 01/20] Group snapshot: Fix format name for backing file Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 02/20] qed: do not evict in-use L2 table cache entries Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 03/20] qcow2: Add some tracing Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 04/20] block: handle -EBUSY in bdrv_commit_all() Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 05/20] qcow2: Add error messages in qcow2_truncate Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 06/20] qemu-iotests: Mark some tests as quick Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 07/20] make check: Add qemu-iotests subset Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 08/20] Add 'make check-block' Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 09/20] use QSIMPLEQ_FOREACH_SAFE when freeing list elements Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 10/20] qapi: complete implementation of unions Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 11/20] rename blockdev-group-snapshot-sync Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 12/20] add mode field to blockdev-snapshot-sync transaction item Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 13/20] qmp: convert blockdev-snapshot-sync to a wrapper around transactions Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 14/20] qcow2: Factor out count_cow_clusters Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 15/20] qcow2: Add qcow2_alloc_clusters_at() Kevin Wolf
2012-03-12 15:19 ` Kevin Wolf [this message]
2012-03-12 15:19 ` [Qemu-devel] [PATCH 17/20] coroutine: adding sigaltstack method (.c source) Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 18/20] coroutine: adding configure choose mechanism for coroutine backend Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 19/20] coroutine: adding configure option for sigaltstack " Kevin Wolf
2012-03-12 15:19 ` [Qemu-devel] [PATCH 20/20] test-coroutine: add performance test for nesting Kevin Wolf
2012-03-13  2:23 ` [Qemu-devel] [PULL 00/20] Block patches Anthony Liguori

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:903454d dfblob:e0fb907 dfblob:5129e3e dfblob:e4ac366
dfblob:d818ff1 dfblob:606d903 )
 OR (
bs:"[Qemu-devel] [PATCH 16/20] qcow2: Reduce number of I/O requests" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1331565591-8414-17-git-send-email-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=anthony@codemonkey.ws \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).