From: Kevin Wolf <kwolf@redhat.com>
To: anthony@codemonkey.ws
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 09/18] qed: Periodically flush and clear need check bit
Date: Thu, 19 May 2011 14:33:23 +0200 [thread overview]
Message-ID: <1305808412-16994-10-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1305808412-16994-1-git-send-email-kwolf@redhat.com>
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
One strategy to limit the startup delay of consistency check when
opening image files is to ensure that the file is marked dirty for as
little time as possible.
QED currently marks the image dirty when the first allocating write
request is issued and clears the dirty bit again when the image is
cleanly closed. In practice that means the image is marked dirty for
most of a guest's lifetime and prone to being in a dirty state upon
crash or power failure.
It is safe to clear the dirty bit after all allocating write requests
have completed and a flush has been performed. This patch adds a timer
after the last allocating write request completes. When the timer fires
it will flush and then clear the dirty bit. The timer is set to 5
seconds and is cancelled upon arrival of a new allocating write request.
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/qed.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
block/qed.h | 7 ++++
trace-events | 3 ++
3 files changed, 112 insertions(+), 2 deletions(-)
diff --git a/block/qed.c b/block/qed.c
index c8c5930..d8d6ea2 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -12,6 +12,7 @@
*
*/
+#include "qemu-timer.h"
#include "trace.h"
#include "qed.h"
#include "qerror.h"
@@ -291,6 +292,88 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
static void qed_aio_next_io(void *opaque, int ret);
+static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+{
+ assert(!s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = true;
+}
+
+static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+{
+ QEDAIOCB *acb;
+
+ assert(s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = false;
+
+ acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+ if (acb) {
+ qed_aio_next_io(acb, 0);
+ }
+}
+
+static void qed_finish_clear_need_check(void *opaque, int ret)
+{
+ /* Do nothing */
+}
+
+static void qed_flush_after_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+
+ /* No need to wait until flush completes */
+ qed_unplug_allocating_write_reqs(s);
+}
+
+static void qed_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ if (ret) {
+ qed_unplug_allocating_write_reqs(s);
+ return;
+ }
+
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header(s, qed_flush_after_clear_need_check, s);
+}
+
+static void qed_need_check_timer_cb(void *opaque)
+{
+ BDRVQEDState *s = opaque;
+
+ /* The timer should only fire when allocating writes have drained */
+ assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
+
+ trace_qed_need_check_timer_cb(s);
+
+ qed_plug_allocating_write_reqs(s);
+
+ /* Ensure writes are on disk before clearing flag */
+ bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
+static void qed_start_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_start_need_check_timer(s);
+
+ /* Use vm_clock so we don't alter the image file while suspended for
+ * migration.
+ */
+ qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+ get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+}
+
+/* It's okay to call this multiple times or when no timer is started */
+static void qed_cancel_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_cancel_need_check_timer(s);
+ qemu_del_timer(s->need_check_timer);
+}
+
static int bdrv_qed_open(BlockDriverState *bs, int flags)
{
BDRVQEDState *s = bs->opaque;
@@ -406,7 +489,10 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
BdrvCheckResult result = {0};
ret = qed_check(s, &result, true);
- if (!ret && !result.corruptions && !result.check_errors) {
+ if (ret) {
+ goto out;
+ }
+ if (!result.corruptions && !result.check_errors) {
/* Ensure fixes reach storage before clearing check bit */
bdrv_flush(s->bs);
@@ -416,6 +502,9 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
}
}
+ s->need_check_timer = qemu_new_timer_ns(vm_clock,
+ qed_need_check_timer_cb, s);
+
out:
if (ret) {
qed_free_l2_cache(&s->l2_cache);
@@ -428,6 +517,9 @@ static void bdrv_qed_close(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
+ qed_cancel_need_check_timer(s);
+ qemu_free_timer(s->need_check_timer);
+
/* Ensure writes reach stable storage */
bdrv_flush(bs->file);
@@ -809,6 +901,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
if (acb) {
qed_aio_next_io(acb, 0);
+ } else if (s->header.features & QED_F_NEED_CHECK) {
+ qed_start_need_check_timer(s);
}
}
}
@@ -1014,11 +1108,17 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
{
BDRVQEDState *s = acb_to_s(acb);
+ /* Cancel timer when the first allocating request comes in */
+ if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+ qed_cancel_need_check_timer(s);
+ }
+
/* Freeze this request if another allocating write is in progress */
if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
}
- if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
+ s->allocating_write_reqs_plugged) {
return; /* wait for existing request to finish */
}
diff --git a/block/qed.h b/block/qed.h
index 1d1421f..388fdb3 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -78,6 +78,9 @@ enum {
QED_MIN_TABLE_SIZE = 1, /* in clusters */
QED_MAX_TABLE_SIZE = 16,
QED_DEFAULT_TABLE_SIZE = 4,
+
+ /* Delay to flush and clean image after last allocating write completes */
+ QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */
};
typedef struct {
@@ -157,6 +160,10 @@ typedef struct {
/* Allocating write request queue */
QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
+ bool allocating_write_reqs_plugged;
+
+ /* Periodic flush and clear need check flag */
+ QEMUTimer *need_check_timer;
} BDRVQEDState;
enum {
diff --git a/trace-events b/trace-events
index a00b63c..385cb00 100644
--- a/trace-events
+++ b/trace-events
@@ -220,6 +220,9 @@ disable qed_write_table(void *s, uint64_t offset, void *table, unsigned int inde
disable qed_write_table_cb(void *s, void *table, int flush, int ret) "s %p table %p flush %d ret %d"
# block/qed.c
+disable qed_need_check_timer_cb(void *s) "s %p"
+disable qed_start_need_check_timer(void *s) "s %p"
+disable qed_cancel_need_check_timer(void *s) "s %p"
disable qed_aio_complete(void *s, void *acb, int ret) "s %p acb %p ret %d"
disable qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int is_write) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p is_write %d"
disable qed_aio_next_io(void *s, void *acb, int ret, uint64_t cur_pos) "s %p acb %p ret %d cur_pos %"PRIu64""
--
1.7.2.3
next prev parent reply other threads:[~2011-05-19 12:31 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-19 12:33 [Qemu-devel] [PULL 00/18] Block patches Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 01/18] ide: cleanup warnings Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 02/18] posix-aio-compat: Fix idle_threads counter Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 03/18] qemu-img.c: Remove superfluous parenthesis Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 04/18] hw/xen_disk: Remove unused local variable Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 05/18] ide: Turn debug messages into assertions Kevin Wolf
2011-05-26 21:12 ` Luiz Capitulino
2011-05-27 6:39 ` Kevin Wolf
2011-05-27 13:12 ` Luiz Capitulino
2011-06-01 13:44 ` Luiz Capitulino
2011-06-01 14:02 ` Kevin Wolf
2011-06-01 14:07 ` Luiz Capitulino
2011-06-01 15:32 ` Markus Armbruster
2011-06-06 9:08 ` Kevin Wolf
2011-06-06 11:57 ` Markus Armbruster
2011-06-06 12:56 ` Kevin Wolf
2011-06-06 13:52 ` Markus Armbruster
2011-06-06 15:54 ` Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 06/18] Add documentation for qemu_progress_{init, print}() Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 07/18] ahci: Fix crashes on duplicate BH registration Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 08/18] qemu-tool: Stub out qemu-timer functions Kevin Wolf
2011-05-19 12:33 ` Kevin Wolf [this message]
2011-05-19 12:33 ` [Qemu-devel] [PATCH 10/18] qemu_img: is_not_zero() optimization Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 11/18] qed: support for growing images Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 12/18] ide: Split qdev "ide-drive" into "ide-hd" and "ide-cd" Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 13/18] scsi: Split qdev "scsi-disk" into "scsi-hd" and "scsi-cd" Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 14/18] defaults: ide-cd, ide-hd and scsi-cd devices suppress default CD-ROM Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 15/18] block QMP: Deprecate query-block's "type", drop info block's "type=" Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 16/18] blockdev: Store -drive option media in DriveInfo Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 17/18] block: Remove type hint, it's guest matter, doesn't belong here Kevin Wolf
2011-05-19 12:33 ` [Qemu-devel] [PATCH 18/18] ahci: Fix non-NCQ accesses for LBA > 16bits Kevin Wolf
2011-05-19 15:09 ` [Qemu-devel] [PULL 00/18] Block patches Anthony Liguori
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1305808412-16994-10-git-send-email-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=anthony@codemonkey.ws \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).