From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 24/48] replay: introduce block devices record/replay
Date: Tue, 29 Mar 2016 17:08:24 +0200 [thread overview]
Message-ID: <1459264128-12761-25-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1459264128-12761-1-git-send-email-kwolf@redhat.com>
From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
This patch introduces block driver that implement recording
and replaying of block devices' operations.
All block completion operations are added to the queue.
Queue is flushed at checkpoints and information about processed requests
is recorded to the log. In replay phase the queue is matched with
events read from the log. Therefore block devices requests are processed
deterministically.
Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
[ kwolf: Rebased onto modified and already applied part of the series ]
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/Makefile.objs | 2 +-
block/blkreplay.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++
docs/replay.txt | 20 ++++++
include/sysemu/replay.h | 2 +
replay/replay-events.c | 20 ++++++
replay/replay-internal.h | 1 +
replay/replay.c | 2 +-
stubs/replay.c | 4 ++
8 files changed, 208 insertions(+), 2 deletions(-)
create mode 100755 block/blkreplay.c
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 3426a15..44a5416 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,7 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
block-obj-y += block-backend.o snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkreplay.c b/block/blkreplay.c
new file mode 100755
index 0000000..df81de0
--- /dev/null
+++ b/block/blkreplay.c
@@ -0,0 +1,159 @@
+/*
+ * Block protocol for record/replay
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "sysemu/replay.h"
+
+typedef struct Request {
+ Coroutine *co;
+ QEMUBH *bh;
+} Request;
+
+/* Next request id.
+ This counter is global, because requests from different
+ block devices should not get overlapping ids. */
+static uint64_t request_id;
+
+static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ Error *local_err = NULL;
+ int ret;
+
+ /* Open the image file */
+ bs->file = bdrv_open_child(NULL, options, "image",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ }
+ return ret;
+}
+
+static void blkreplay_close(BlockDriverState *bs)
+{
+}
+
+static int64_t blkreplay_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file->bs);
+}
+
+/* This bh is used for synchronization of return from coroutines.
+ It continues yielded coroutine which then finishes its execution.
+ BH is called adjusted to some replay checkpoint, therefore
+ record and replay will always finish coroutines deterministically.
+*/
+static void blkreplay_bh_cb(void *opaque)
+{
+ Request *req = opaque;
+ qemu_coroutine_enter(req->co, NULL);
+ qemu_bh_delete(req->bh);
+ g_free(req);
+}
+
+static void block_request_create(uint64_t reqid, BlockDriverState *bs,
+ Coroutine *co)
+{
+ Request *req = g_new(Request, 1);
+ *req = (Request) {
+ .co = co,
+ .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req),
+ };
+ replay_block_event(req->bh, reqid);
+}
+
+static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_flush(bs->file->bs);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static BlockDriver bdrv_blkreplay = {
+ .format_name = "blkreplay",
+ .protocol_name = "blkreplay",
+ .instance_size = 0,
+
+ .bdrv_file_open = blkreplay_open,
+ .bdrv_close = blkreplay_close,
+ .bdrv_getlength = blkreplay_getlength,
+
+ .bdrv_co_readv = blkreplay_co_readv,
+ .bdrv_co_writev = blkreplay_co_writev,
+
+ .bdrv_co_write_zeroes = blkreplay_co_write_zeroes,
+ .bdrv_co_discard = blkreplay_co_discard,
+ .bdrv_co_flush = blkreplay_co_flush,
+};
+
+static void bdrv_blkreplay_init(void)
+{
+ bdrv_register(&bdrv_blkreplay);
+}
+
+block_init(bdrv_blkreplay_init);
diff --git a/docs/replay.txt b/docs/replay.txt
index 3cedc25..779c6c0 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -175,3 +175,23 @@ Sometimes the block layer uses asynchronous callbacks for its internal purposes
(like reading or writing VM snapshots or disk image cluster tables). In this
case bottom halves are not marked as "replayable" and do not saved
into the log.
+
+Block devices
+-------------
+
+Block devices record/replay module intercepts calls of
+bdrv coroutine functions at the top of block drivers stack.
+To record and replay block operations the drive must be configured
+as following:
+ -drive file=disk.qcow,if=none,id=img-direct
+ -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay
+ -device ide-hd,drive=img-blkreplay
+
+blkreplay driver should be inserted between disk image and virtual driver
+controller. Therefore all disk requests may be recorded and replayed.
+
+All block completion operations are added to the queue in the coroutines.
+Queue is flushed at checkpoints and information about processed requests
+is recorded to the log. In replay phase the queue is matched with
+events read from the log. Therefore block devices requests are processed
+deterministically.
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index e798919..57492da 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -114,6 +114,8 @@ void replay_bh_schedule_event(QEMUBH *bh);
void replay_input_event(QemuConsole *src, InputEvent *evt);
/*! Adds input sync event to the queue */
void replay_input_sync_event(void);
+/*! Adds block layer event to the queue */
+void replay_block_event(QEMUBH *bh, uint64_t id);
/* Character device */
diff --git a/replay/replay-events.c b/replay/replay-events.c
index 873e435..3807245 100644
--- a/replay/replay-events.c
+++ b/replay/replay-events.c
@@ -51,6 +51,9 @@ static void replay_run_event(Event *event)
case REPLAY_ASYNC_EVENT_CHAR_READ:
replay_event_char_read_run(event->opaque);
break;
+ case REPLAY_ASYNC_EVENT_BLOCK:
+ aio_bh_call(event->opaque);
+ break;
default:
error_report("Replay: invalid async event ID (%d) in the queue",
event->event_kind);
@@ -153,6 +156,15 @@ void replay_add_input_sync_event(void)
replay_add_event(REPLAY_ASYNC_EVENT_INPUT_SYNC, NULL, NULL, 0);
}
+void replay_block_event(QEMUBH *bh, uint64_t id)
+{
+ if (replay_mode != REPLAY_MODE_NONE && events_enabled) {
+ replay_add_event(REPLAY_ASYNC_EVENT_BLOCK, bh, NULL, id);
+ } else {
+ qemu_bh_schedule(bh);
+ }
+}
+
static void replay_save_event(Event *event, int checkpoint)
{
if (replay_mode != REPLAY_MODE_PLAY) {
@@ -174,6 +186,9 @@ static void replay_save_event(Event *event, int checkpoint)
case REPLAY_ASYNC_EVENT_CHAR_READ:
replay_event_char_read_save(event->opaque);
break;
+ case REPLAY_ASYNC_EVENT_BLOCK:
+ replay_put_qword(event->id);
+ break;
default:
error_report("Unknown ID %" PRId64 " of replay event", event->id);
exit(1);
@@ -232,6 +247,11 @@ static Event *replay_read_event(int checkpoint)
event->event_kind = read_event_kind;
event->opaque = replay_event_char_read_load();
return event;
+ case REPLAY_ASYNC_EVENT_BLOCK:
+ if (read_id == -1) {
+ read_id = replay_get_qword();
+ }
+ break;
default:
error_report("Unknown ID %d of replay event", read_event_kind);
exit(1);
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index 11f9a85..efbf14c 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -49,6 +49,7 @@ enum ReplayAsyncEventKind {
REPLAY_ASYNC_EVENT_INPUT,
REPLAY_ASYNC_EVENT_INPUT_SYNC,
REPLAY_ASYNC_EVENT_CHAR_READ,
+ REPLAY_ASYNC_EVENT_BLOCK,
REPLAY_ASYNC_COUNT
};
diff --git a/replay/replay.c b/replay/replay.c
index fcfde4f..810db14 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -20,7 +20,7 @@
/* Current version of the replay mechanism.
Increase it when file format changes. */
-#define REPLAY_VERSION 0xe02003
+#define REPLAY_VERSION 0xe02004
/* Size of replay log header */
#define HEADER_SIZE (sizeof(uint32_t) + sizeof(uint64_t))
diff --git a/stubs/replay.c b/stubs/replay.c
index 2f1a6dc..de9fa1e 100644
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -63,3 +63,7 @@ void replay_char_read_all_save_buf(uint8_t *buf, int offset)
{
abort();
}
+
+void replay_block_event(QEMUBH *bh, uint64_t id)
+{
+}
--
1.8.3.1
next prev parent reply other threads:[~2016-03-29 15:10 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-29 15:08 [Qemu-devel] [PULL 00/48] Block layer patches Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 01/48] block: Remove bdrv_make_anon() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 02/48] block: Remove copy-on-read from bdrv_move_feature_fields() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 03/48] block: Remove dirty bitmaps " Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 04/48] block: Remove cache.writeback from blockdev-add Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 05/48] block: Make backing files always writeback Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 06/48] block: Reject writethrough mode except at the root Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 07/48] block/raw-posix.c: Make physical devices usable in QEMU under Mac OS X host Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 08/48] block: Remove blk_set_bs() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 09/48] block/qapi: make two printf() formats literal Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 10/48] block/qapi: fix unbounded stack for dump_qdict Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 11/48] block/qapi: Set s->device in bdrv_query_stats() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 12/48] block/qapi: Pass bdrv_query_blk_stats() s->stats Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 13/48] block: add flag to indicate that no I/O will be performed Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 14/48] qemu-img/qemu-io: don't prompt for passwords if not required Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 15/48] tests: redirect stderr to stdout for iotests Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 16/48] tests: refactor python I/O tests helper main method Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 17/48] tests: add output filter to python I/O tests helper Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 18/48] block: add generic full disk encryption driver Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 19/48] block: move encryption deprecation warning into qcow code Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 20/48] block: an interoperability test for luks vs dm-crypt/cryptsetup Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 21/48] block: add flush callback Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 22/48] replay: bh scheduling fix Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 23/48] replay: fix error message Kevin Wolf
2016-03-29 15:08 ` Kevin Wolf [this message]
2016-03-29 15:08 ` [Qemu-devel] [PULL 25/48] block: Add bdrv_parse_cache_mode() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 26/48] qemu-nbd: Call blk_set_enable_write_cache() explicitly Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 27/48] qemu-io: " Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 28/48] qemu-img: Expand all BDRV_O_FLAGS uses Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 29/48] qemu-img: Call blk_set_enable_write_cache() explicitly Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 30/48] xen_disk: " Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 31/48] block: blockdev_init(): " Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 32/48] block: Always set writeback mode in blk_new_open() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 33/48] block: Handle flush error in bdrv_pwrite_sync() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 34/48] block: Move enable_write_cache to BB level Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 35/48] block/qapi: Use blk_enable_write_cache() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 36/48] block: Introduce bdrv_co_writev_flags() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 37/48] iscsi: Support BDRV_REQ_FUA Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 38/48] nbd: " Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 39/48] raw: " Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 40/48] block: Use bdrv_parse_cache_mode() in drive_init() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 41/48] qemu-io: Use bdrv_parse_cache_mode() in reopen_f() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 42/48] block: Remove bdrv_parse_cache_flags() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 43/48] block: Remove BDRV_O_CACHE_WB Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 44/48] block: Remove bdrv_(set_)enable_write_cache() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 45/48] qemu-img: Fix preallocation with -S 0 for convert Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 46/48] block/null-{co, aio}: Allow reading zeroes Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 47/48] block/null-{co, aio}: Implement get_block_status() Kevin Wolf
2016-03-29 15:08 ` [Qemu-devel] [PULL 48/48] iotests: Test qemu-img convert -S 0 behavior Kevin Wolf
2016-04-07 14:40 ` Paolo Bonzini
2016-04-08 1:18 ` Fam Zheng
2016-04-08 10:21 ` Kevin Wolf
2016-04-08 10:42 ` Fam Zheng
2016-03-29 19:56 ` [Qemu-devel] [PULL 00/48] Block layer patches Peter Maydell
2016-03-30 8:57 ` Kevin Wolf
2016-03-30 11:29 ` Peter Maydell
2016-03-30 12:07 ` Kevin Wolf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1459264128-12761-25-git-send-email-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).