qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	Anthony Liguori <aliguori@us.ibm.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Subject: [Qemu-devel] [PATCH 2/3] qed: add zero write detection support
Date: Fri,  7 Oct 2011 16:49:48 +0100	[thread overview]
Message-ID: <1318002589-11315-3-git-send-email-stefanha@linux.vnet.ibm.com> (raw)
In-Reply-To: <1318002589-11315-1-git-send-email-stefanha@linux.vnet.ibm.com>

The QED image format is able to efficiently represent clusters
containing zeroes with a magic offset value.  This patch implements zero
write detection for allocating writes so that image streaming can copy
over zero clusters from a backing file without expanding the image file
unnecessarily.

This is based code by Anthony Liguori <aliguori@us.ibm.com>.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 block/qed.c |   81 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/block/qed.c b/block/qed.c
index e87dc4d..ec3113b 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -947,9 +947,8 @@ static void qed_aio_write_l1_update(void *opaque, int ret)
 /**
  * Update L2 table with new cluster offsets and write them out
  */
-static void qed_aio_write_l2_update(void *opaque, int ret)
+static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
 {
-    QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
     bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
     int index;
@@ -965,7 +964,7 @@ static void qed_aio_write_l2_update(void *opaque, int ret)
 
     index = qed_l2_index(s, acb->cur_pos);
     qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
-                         acb->cur_cluster);
+                         offset);
 
     if (need_alloc) {
         /* Write out the whole new L2 table */
@@ -982,6 +981,51 @@ err:
     qed_aio_complete(acb, ret);
 }
 
+static void qed_aio_write_l2_update_cb(void *opaque, int ret)
+{
+    QEDAIOCB *acb = opaque;
+    qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
+}
+
+/**
+ * Determine if we have a zero write to a block of clusters
+ *
+ * We validate that the write is aligned to a cluster boundary, and that it's
+ * a multiple of cluster size with all zeros.
+ */
+static bool qed_is_zero_write(QEDAIOCB *acb)
+{
+    BDRVQEDState *s = acb_to_s(acb);
+    int i;
+
+    if (!qed_offset_is_cluster_aligned(s, acb->cur_pos)) {
+        return false;
+    }
+
+    if (!qed_offset_is_cluster_aligned(s, acb->cur_qiov.size)) {
+        return false;
+    }
+
+    for (i = 0; i < acb->cur_qiov.niov; i++) {
+        struct iovec *iov = &acb->cur_qiov.iov[i];
+        uint64_t *v;
+        int j;
+
+        if ((iov->iov_len & 0x07)) {
+            return false;
+        }
+
+        v = iov->iov_base;
+        for (j = 0; j < iov->iov_len; j += sizeof(v[0])) {
+            if (v[j >> 3]) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
 /**
  * Flush new data clusters before updating the L2 table
  *
@@ -996,7 +1040,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
     QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
 
-    if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) {
+    if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
         qed_aio_complete(acb, -EIO);
     }
 }
@@ -1026,7 +1070,7 @@ static void qed_aio_write_main(void *opaque, int ret)
         if (s->bs->backing_hd) {
             next_fn = qed_aio_write_flush_before_l2_update;
         } else {
-            next_fn = qed_aio_write_l2_update;
+            next_fn = qed_aio_write_l2_update_cb;
         }
     }
 
@@ -1092,6 +1136,18 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
     return !(s->header.features & QED_F_NEED_CHECK);
 }
 
+static void qed_aio_write_zero_cluster(void *opaque, int ret)
+{
+    QEDAIOCB *acb = opaque;
+
+    if (ret) {
+        qed_aio_complete(acb, ret);
+        return;
+    }
+
+    qed_aio_write_l2_update(acb, 0, 1);
+}
+
 /**
  * Write new data cluster
  *
@@ -1103,6 +1159,7 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
 static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
+    BlockDriverCompletionFunc *cb;
 
     /* Cancel timer when the first allocating request comes in */
     if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
@@ -1120,14 +1177,21 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 
     acb->cur_nclusters = qed_bytes_to_clusters(s,
             qed_offset_into_cluster(s, acb->cur_pos) + len);
-    acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
     qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
+    /* Zero write detection */
+    if (s->bs->use_zero_detection && qed_is_zero_write(acb)) {
+        cb = qed_aio_write_zero_cluster;
+    } else {
+        cb = qed_aio_write_prefill;
+        acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
+    }
+
     if (qed_should_set_need_check(s)) {
         s->header.features |= QED_F_NEED_CHECK;
-        qed_write_header(s, qed_aio_write_prefill, acb);
+        qed_write_header(s, cb, acb);
     } else {
-        qed_aio_write_prefill(acb, 0);
+        cb(acb, 0);
     }
 }
 
@@ -1474,6 +1538,7 @@ static BlockDriver bdrv_qed = {
     .format_name              = "qed",
     .instance_size            = sizeof(BDRVQEDState),
     .create_options           = qed_create_options,
+    .has_zero_detection       = true,
 
     .bdrv_probe               = bdrv_qed_probe,
     .bdrv_open                = bdrv_qed_open,
-- 
1.7.6.3

  parent reply	other threads:[~2011-10-07 15:50 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-10-07 15:49 [Qemu-devel] [PATCH 0/3] block: zero write detection Stefan Hajnoczi
2011-10-07 15:49 ` [Qemu-devel] [PATCH 1/3] block: add zero write detection interface Stefan Hajnoczi
2011-10-07 15:49 ` Stefan Hajnoczi [this message]
2011-10-07 15:49 ` [Qemu-devel] [PATCH 3/3] qemu-io: add zero write detection option Stefan Hajnoczi
2011-10-09  9:52 ` [Qemu-devel] [PATCH 0/3] block: zero write detection Mars.cao
2011-10-11 13:46 ` Kevin Wolf
2011-10-12 10:39   ` Stefan Hajnoczi
2011-10-12 11:03     ` Kevin Wolf
2011-10-12 11:59       ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1318002589-11315-3-git-send-email-stefanha@linux.vnet.ibm.com \
    --to=stefanha@linux.vnet.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=kwolf@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).