From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
Anthony Liguori <aliguori@us.ibm.com>,
Stefan Hajnoczi <stefanha@redhat.com>
Subject: [Qemu-devel] [PATCH 21/23] qcow2: Batch discards
Date: Mon, 24 Jun 2013 11:10:33 +0200 [thread overview]
Message-ID: <1372065035-19601-22-git-send-email-stefanha@redhat.com> (raw)
In-Reply-To: <1372065035-19601-1-git-send-email-stefanha@redhat.com>
From: Kevin Wolf <kwolf@redhat.com>
This optimises the discard operation for freed clusters by batching
discard requests (both snapshot deletion and bdrv_discard end up
updating the refcounts cluster by cluster).
Note that we don't discard asynchronously, but keep s->lock held. This
is to avoid that a freed cluster is reallocated and written to while the
discard is still in flight.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/qcow2-cluster.c | 22 +++++++++++---
block/qcow2-refcount.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++--
block/qcow2.c | 1 +
block/qcow2.h | 11 +++++++
4 files changed, 109 insertions(+), 7 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 3191d6b..cca76d4 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1377,18 +1377,25 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
nb_clusters = size_to_clusters(s, end_offset - offset);
+ s->cache_discards = true;
+
/* Each L2 table is handled by its own loop iteration */
while (nb_clusters > 0) {
ret = discard_single_l2(bs, offset, nb_clusters);
if (ret < 0) {
- return ret;
+ goto fail;
}
nb_clusters -= ret;
offset += (ret * s->cluster_size);
}
- return 0;
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
}
/*
@@ -1450,15 +1457,22 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
/* Each L2 table is handled by its own loop iteration */
nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+ s->cache_discards = true;
+
while (nb_clusters > 0) {
ret = zero_single_l2(bs, offset, nb_clusters);
if (ret < 0) {
- return ret;
+ goto fail;
}
nb_clusters -= ret;
offset += (ret * s->cluster_size);
}
- return 0;
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 7488988..1244693 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -420,6 +420,74 @@ fail_block:
return ret;
}
+void qcow2_process_discards(BlockDriverState *bs, int ret)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *next;
+
+ QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
+ QTAILQ_REMOVE(&s->discards, d, next);
+
+ /* Discard is optional, ignore the return value */
+ if (ret >= 0) {
+ bdrv_discard(bs->file,
+ d->offset >> BDRV_SECTOR_BITS,
+ d->bytes >> BDRV_SECTOR_BITS);
+ }
+
+ g_free(d);
+ }
+}
+
+static void update_refcount_discard(BlockDriverState *bs,
+ uint64_t offset, uint64_t length)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *p, *next;
+
+ QTAILQ_FOREACH(d, &s->discards, next) {
+ uint64_t new_start = MIN(offset, d->offset);
+ uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
+
+ if (new_end - new_start <= length + d->bytes) {
+ /* There can't be any overlap, areas ending up here have no
+ * references any more and therefore shouldn't get freed another
+ * time. */
+ assert(d->bytes + length == new_end - new_start);
+ d->offset = new_start;
+ d->bytes = new_end - new_start;
+ goto found;
+ }
+ }
+
+ d = g_malloc(sizeof(*d));
+ *d = (Qcow2DiscardRegion) {
+ .bs = bs,
+ .offset = offset,
+ .bytes = length,
+ };
+ QTAILQ_INSERT_TAIL(&s->discards, d, next);
+
+found:
+ /* Merge discard requests if they are adjacent now */
+ QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
+ if (p == d
+ || p->offset > d->offset + d->bytes
+ || d->offset > p->offset + p->bytes)
+ {
+ continue;
+ }
+
+ /* Still no overlap possible */
+ assert(p->offset == d->offset + d->bytes
+ || d->offset == p->offset + p->bytes);
+
+ QTAILQ_REMOVE(&s->discards, p, next);
+ d->offset = MIN(d->offset, p->offset);
+ d->bytes += p->bytes;
+ }
+}
+
/* XXX: cache several refcount block clusters ? */
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
@@ -488,15 +556,18 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
s->free_cluster_index = cluster_index;
}
refcount_block[block_index] = cpu_to_be16(refcount);
+
if (refcount == 0 && s->discard_passthrough[type]) {
- /* Try discarding, ignore errors */
- /* FIXME Doing this cluster by cluster will be painfully slow */
- bdrv_discard(bs->file, cluster_offset, 1);
+ update_refcount_discard(bs, cluster_offset, s->cluster_size);
}
}
ret = 0;
fail:
+ if (!s->cache_discards) {
+ qcow2_process_discards(bs, ret);
+ }
+
/* Write last changed block to disk */
if (refcount_block) {
int wret;
@@ -755,6 +826,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t);
+ s->cache_discards = true;
+
/* WARNING: qcow2_snapshot_goto relies on this function not using the
* l1_table_offset when it is the current s->l1_table_offset! Be careful
* when changing this! */
@@ -867,6 +940,9 @@ fail:
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
}
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
/* Update L1 only if it isn't deleted anyway (addend = -1) */
if (ret == 0 && addend >= 0 && l1_modified) {
for (i = 0; i < l1_size; i++) {
diff --git a/block/qcow2.c b/block/qcow2.c
index ef8a2ca..9383990 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -486,6 +486,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
}
QLIST_INIT(&s->cluster_allocs);
+ QTAILQ_INIT(&s->discards);
/* read qcow2 extensions */
if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
diff --git a/block/qcow2.h b/block/qcow2.h
index 6f91b9a..3b2d5cd 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -147,6 +147,13 @@ typedef struct Qcow2Feature {
char name[46];
} QEMU_PACKED Qcow2Feature;
+typedef struct Qcow2DiscardRegion {
+ BlockDriverState *bs;
+ uint64_t offset;
+ uint64_t bytes;
+ QTAILQ_ENTRY(Qcow2DiscardRegion) next;
+} Qcow2DiscardRegion;
+
typedef struct BDRVQcowState {
int cluster_bits;
int cluster_size;
@@ -199,6 +206,8 @@ typedef struct BDRVQcowState {
size_t unknown_header_fields_size;
void* unknown_header_fields;
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
+ QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
+ bool cache_discards;
} BDRVQcowState;
/* XXX: use std qcow open function ? */
@@ -374,6 +383,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
+void qcow2_process_discards(BlockDriverState *bs, int ret);
+
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
--
1.8.1.4
next prev parent reply other threads:[~2013-06-24 9:11 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-06-24 9:10 [Qemu-devel] [PULL 00/23] Block patches Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 01/23] ide: Add handler to ide_cmd_table Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 02/23] ide: Convert WIN_DSM to ide_cmd_table handler Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 03/23] ide: Convert WIN_IDENTIFY " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 04/23] ide: Convert cmd_nop commands " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 05/23] ide: Convert verify " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 06/23] ide: Convert read/write multiple " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 07/23] ide: Convert PIO read/write " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 08/23] ide: Convert DMA " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 09/23] ide: Convert READ NATIVE MAX ADDRESS " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 10/23] ide: Convert CHECK POWER MDOE " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 11/23] ide: Convert SET FEATURES " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 12/23] ide: Convert FLUSH CACHE " Stefan Hajnoczi
2013-07-03 21:41 ` Alex Williamson
2013-07-03 21:51 ` Alex Williamson
2013-07-04 7:58 ` Kevin Wolf
2013-06-24 9:10 ` [Qemu-devel] [PATCH 13/23] ide: Convert SEEK " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 14/23] ide: Convert ATAPI commands " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 15/23] ide: Convert CF-ATA " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 16/23] ide: Convert SMART " Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 17/23] ide: Clean up ide_exec_cmd() Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 18/23] Revert "block: Disable driver-specific options for 1.5" Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 19/23] qcow2: Add refcount update reason to all callers Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 20/23] qcow2: Options to enable discard for freed clusters Stefan Hajnoczi
2013-06-24 9:10 ` Stefan Hajnoczi [this message]
2013-06-24 9:10 ` [Qemu-devel] [PATCH 22/23] block: Always enable discard on the protocol level Stefan Hajnoczi
2013-06-24 9:10 ` [Qemu-devel] [PATCH 23/23] vmdk: refuse to open higher version than supported Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1372065035-19601-22-git-send-email-stefanha@redhat.com \
--to=stefanha@redhat.com \
--cc=aliguori@us.ibm.com \
--cc=kwolf@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).