From: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
To: "qemu-devel@nongnu.org" <qemu-devel@nongnu.org>,
"qemu-block@nongnu.org" <qemu-block@nongnu.org>
Cc: "jcody@redhat.com" <jcody@redhat.com>,
"kwolf@redhat.com" <kwolf@redhat.com>,
"mreitz@redhat.com" <mreitz@redhat.com>,
Denis Lunev <den@virtuozzo.com>,
Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Subject: Re: [Qemu-devel] [PATCH RFC 1/1] Stream block job involves copy-on-read filter
Date: Thu, 31 Jan 2019 14:02:14 +0000 [thread overview]
Message-ID: <7ccbcc44-9d41-2df9-e1af-88715f81c99a@virtuozzo.com> (raw)
In-Reply-To: <1548244464-633186-1-git-send-email-andrey.shinkevich@virtuozzo.com>
PINGing
Please help!
On 23/01/2019 14:54, Andrey Shinkevich wrote:
> The copy-on-read filter driver is applied to block-stream operations.
> The 'test_stream_parallel' in the file tests/qemu-iotests/030 runs
> jobs that use nodes for streaming in parallel through the backing chain.
> We've got filters being inserted to and removed from the backing chain
> while jobs are running. As a result, a filter node may be passed as the
> 'base' parameter to the stream_start() function when the base node name
> is not specified (the base node is identified by its file name which is
> the same to the related filter node).
> Another issue is that a function keeps the pointer to the filter BDS
> object that can be replaced and deleted after the co-routine switch.
> For example, the function backing_bs() returns the pointer to the
> backing BDS and the BDS reference counter is not incremented.
> A solution (or workaround) made with the given patch for block-stream
> job helps to pass all the iotests in the file tests/qemu-iotests/030.
> Any piece of advice how to amend the solution will be appreciated.
> I am looking forward to hearing from you.
>
> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
> ---
> block/stream.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 143 insertions(+), 11 deletions(-)
>
> diff --git a/block/stream.c b/block/stream.c
> index 7a49ac0..18e51b3 100644
> --- a/block/stream.c
> +++ b/block/stream.c
> @@ -16,6 +16,7 @@
> #include "block/block_int.h"
> #include "block/blockjob_int.h"
> #include "qapi/error.h"
> +#include "qapi/qmp/qdict.h"
> #include "qapi/qmp/qerror.h"
> #include "qemu/ratelimit.h"
> #include "sysemu/block-backend.h"
> @@ -35,8 +36,26 @@ typedef struct StreamBlockJob {
> BlockdevOnError on_error;
> char *backing_file_str;
> bool bs_read_only;
> + BlockDriverState *cor_filter_bs;
> + BlockDriverState *target_bs;
> } StreamBlockJob;
>
> +static BlockDriverState *child_file_bs(BlockDriverState *bs)
> +{
> + return bs->file ? bs->file->bs : NULL;
> +}
> +
> +static BlockDriverState *skip_filter(BlockDriverState *bs)
> +{
> + BlockDriverState *ret_bs = bs;
> +
> + while (ret_bs && ret_bs->drv && ret_bs->drv->is_filter) {
> + ret_bs = child_file_bs(ret_bs);
> + }
> +
> + return ret_bs;
> +}
> +
> static int coroutine_fn stream_populate(BlockBackend *blk,
> int64_t offset, uint64_t bytes,
> void *buf)
> @@ -51,14 +70,12 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
> qemu_iovec_init_external(&qiov, &iov, 1);
>
> /* Copy-on-read the unallocated clusters */
> - return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
> + return blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
> }
>
> -static int stream_prepare(Job *job)
> +static int stream_change_backing_file(StreamBlockJob *s)
> {
> - StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> - BlockJob *bjob = &s->common;
> - BlockDriverState *bs = blk_bs(bjob->blk);
> + BlockDriverState *bs = s->target_bs;
> BlockDriverState *base = s->base;
> Error *local_err = NULL;
> int ret = 0;
> @@ -82,11 +99,53 @@ static int stream_prepare(Job *job)
> return ret;
> }
>
> +static int stream_exit(Job *job, bool abort)
> +{
> + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> + BlockJob *bjob = &s->common;
> + BlockDriverState *target_bs = s->target_bs;
> + int ret = 0;
> +
> + /* Retain the BDS until we complete the graph change. */
> + bdrv_ref(target_bs);
> + /* Hold a guest back from writing while permissions are being reset. */
> + bdrv_drained_begin(target_bs);
> + /* Drop permissions before the graph change. */
> + bdrv_child_try_set_perm(s->cor_filter_bs->file, 0, BLK_PERM_ALL,
> + &error_abort);
> + if (!abort) {
> + ret = stream_change_backing_file(s);
> + }
> +
> + bdrv_replace_node(s->cor_filter_bs, target_bs, &error_abort);
> + /* Switch the BB back to the filter so that job terminated properly. */
> + blk_remove_bs(bjob->blk);
> + blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
> + blk_insert_bs(bjob->blk, s->cor_filter_bs, &error_abort);
> +
> + bdrv_drained_end(target_bs);
> + bdrv_unref(target_bs);
> + /* Submit control over filter to the job instance. */
> + bdrv_unref(s->cor_filter_bs);
> +
> + return ret;
> +}
> +
> +static int stream_prepare(Job *job)
> +{
> + return stream_exit(job, false);
> +}
> +
> +static void stream_abort(Job *job)
> +{
> + stream_exit(job, job->ret < 0);
> +}
> +
> static void stream_clean(Job *job)
> {
> StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> BlockJob *bjob = &s->common;
> - BlockDriverState *bs = blk_bs(bjob->blk);
> + BlockDriverState *bs = s->target_bs;
>
> /* Reopen the image back in read-only mode if necessary */
> if (s->bs_read_only) {
> @@ -102,7 +161,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
> {
> StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
> BlockBackend *blk = s->common.blk;
> - BlockDriverState *bs = blk_bs(blk);
> + BlockDriverState *bs = s->target_bs;
> BlockDriverState *base = s->base;
> int64_t len;
> int64_t offset = 0;
> @@ -213,12 +272,64 @@ static const BlockJobDriver stream_job_driver = {
> .free = block_job_free,
> .run = stream_run,
> .prepare = stream_prepare,
> + .abort = stream_abort,
> .clean = stream_clean,
> .user_resume = block_job_user_resume,
> .drain = block_job_drain,
> },
> };
>
> +static BlockDriverState *create_filter_node(BlockDriverState *bs,
> + Error **errp)
> +{
> + QDict *opts = qdict_new();
> +
> + qdict_put_str(opts, "driver", "copy-on-read");
> + qdict_put_str(opts, "file", bdrv_get_node_name(bs));
> +
> + return bdrv_open(NULL, NULL, opts, BDRV_O_RDWR, errp);
> +}
> +
> +static void remove_filter(BlockDriverState *cor_filter_bs)
> +{
> + BlockDriverState *bs = child_file_bs(cor_filter_bs);
> +
> + /* Hold a guest back from writing until we remove the filter */
> + bdrv_drained_begin(bs);
> + bdrv_child_try_set_perm(cor_filter_bs->file, 0, BLK_PERM_ALL,
> + &error_abort);
> + bdrv_replace_node(cor_filter_bs, bs, &error_abort);
> + bdrv_drained_end(bs);
> +
> + bdrv_unref(cor_filter_bs);
> +}
> +
> +static BlockDriverState *insert_filter(BlockDriverState *bs, Error **errp)
> +{
> + BlockDriverState *cor_filter_bs;
> + Error *local_err = NULL;
> +
> + cor_filter_bs = create_filter_node(bs, errp);
> + if (cor_filter_bs == NULL) {
> + error_prepend(errp, "Could not create filter node: ");
> + return NULL;
> + }
> +
> + bdrv_set_aio_context(cor_filter_bs, bdrv_get_aio_context(bs));
> +
> + bdrv_drained_begin(bs);
> + bdrv_replace_node(bs, cor_filter_bs, &local_err);
> + bdrv_drained_end(bs);
> +
> + if (local_err) {
> + bdrv_unref(cor_filter_bs);
> + error_propagate(errp, local_err);
> + return NULL;
> + }
> +
> + return cor_filter_bs;
> +}
> +
> void stream_start(const char *job_id, BlockDriverState *bs,
> BlockDriverState *base, const char *backing_file_str,
> int creation_flags, int64_t speed,
> @@ -227,6 +338,14 @@ void stream_start(const char *job_id, BlockDriverState *bs,
> StreamBlockJob *s;
> BlockDriverState *iter;
> bool bs_read_only;
> + BlockDriverState *cor_filter_bs;
> +
> + /*
> + * The base node might be identified by its file name rather than
> + * by its node name. In that case, we can encounter a filter node
> + * instead which has other BS pointer and the same file name.
> + */
> + base = skip_filter(base);
>
> /* Make sure that the image is opened in read-write mode */
> bs_read_only = bdrv_is_read_only(bs);
> @@ -236,10 +355,15 @@ void stream_start(const char *job_id, BlockDriverState *bs,
> }
> }
>
> + cor_filter_bs = insert_filter(bs, errp);
> + if (cor_filter_bs == NULL) {
> + goto fail;
> + }
> +
> /* Prevent concurrent jobs trying to modify the graph structure here, we
> * already have our own plans. Also don't allow resize as the image size is
> * queried only at the job start and then cached. */
> - s = block_job_create(job_id, &stream_job_driver, NULL, bs,
> + s = block_job_create(job_id, &stream_job_driver, NULL, cor_filter_bs,
> BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
> BLK_PERM_GRAPH_MOD,
> BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
> @@ -249,16 +373,21 @@ void stream_start(const char *job_id, BlockDriverState *bs,
> goto fail;
> }
>
> - /* Block all intermediate nodes between bs and base, because they will
> + /*
> + * Block all intermediate nodes between bs and base, because they will
> * disappear from the chain after this operation. The streaming job reads
> * every block only once, assuming that it doesn't change, so block writes
> - * and resizes. */
> - for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
> + * and resizes. We account a filter node may be a part of the graph.
> + */
> + for (iter = skip_filter(backing_bs(bs)); iter && iter != base;
> + iter = skip_filter(backing_bs(iter))) {
> block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
> BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
> &error_abort);
> }
>
> + s->cor_filter_bs = cor_filter_bs;
> + s->target_bs = bs;
> s->base = base;
> s->backing_file_str = g_strdup(backing_file_str);
> s->bs_read_only = bs_read_only;
> @@ -269,6 +398,9 @@ void stream_start(const char *job_id, BlockDriverState *bs,
> return;
>
> fail:
> + if (cor_filter_bs) {
> + remove_filter(cor_filter_bs);
> + }
> if (bs_read_only) {
> bdrv_reopen_set_read_only(bs, true, NULL);
> }
>
--
With the best regards,
Andrey Shinkevich
next prev parent reply other threads:[~2019-01-31 14:02 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-01-23 11:54 [Qemu-devel] [PATCH RFC 1/1] Stream block job involves copy-on-read filter Andrey Shinkevich
2019-01-23 13:10 ` Vladimir Sementsov-Ogievskiy
2019-01-31 14:02 ` Andrey Shinkevich [this message]
2019-02-08 13:13 ` [Qemu-devel] [Qemu-block] " Alberto Garcia
2019-02-08 15:29 ` Andrey Shinkevich
2019-02-11 14:07 ` Alberto Garcia
2019-02-11 14:51 ` Vladimir Sementsov-Ogievskiy
2019-02-11 15:52 ` Alberto Garcia
2019-02-11 16:58 ` Vladimir Sementsov-Ogievskiy
2019-02-12 11:35 ` Alberto Garcia
2019-02-14 13:43 ` Andrey Shinkevich
2019-02-18 10:08 ` Vladimir Sementsov-Ogievskiy
2019-02-20 9:10 ` Andrey Shinkevich
2019-02-11 14:54 ` Andrey Shinkevich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7ccbcc44-9d41-2df9-e1af-88715f81c99a@virtuozzo.com \
--to=andrey.shinkevich@virtuozzo.com \
--cc=den@virtuozzo.com \
--cc=jcody@redhat.com \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=vsementsov@virtuozzo.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).