From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, vsementsov@virtuozzo.com,
qemu-devel@nongnu.org, mreitz@redhat.com,
andrey.shinkevich@virtuozzo.com, jsnow@redhat.com
Subject: [PATCH v3 8/9] block/block-copy: reduce intersecting request lock
Date: Fri, 6 Mar 2020 10:38:30 +0300 [thread overview]
Message-ID: <20200306073831.7737-9-vsementsov@virtuozzo.com> (raw)
In-Reply-To: <20200306073831.7737-1-vsementsov@virtuozzo.com>
Currently, block_copy operation lock the whole requested region. But
there is no reason to lock clusters, which are already copied, it will
disturb other parallel block_copy requests for no reason.
Let's instead do the following:
Lock only sub-region, which we are going to operate on. Then, after
copying all dirty sub-regions, we should wait for intersecting
requests block-copy, if they failed, we should retry these new dirty
clusters.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
---
block/block-copy.c | 128 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 104 insertions(+), 24 deletions(-)
diff --git a/block/block-copy.c b/block/block-copy.c
index 2b29131653..d66b8eb691 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -39,29 +39,71 @@ static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
return NULL;
}
-static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
- int64_t offset,
- int64_t bytes)
+/*
+ * If there are no intersecting requests return false. Otherwise, wait for the
+ * first found intersecting request to finish and return true.
+ */
+static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
+ int64_t bytes)
{
- BlockCopyInFlightReq *req;
+ BlockCopyInFlightReq *req = find_conflicting_inflight_req(s, offset, bytes);
- while ((req = find_conflicting_inflight_req(s, offset, bytes))) {
- qemu_co_queue_wait(&req->wait_queue, NULL);
+ if (!req) {
+ return false;
}
+
+ qemu_co_queue_wait(&req->wait_queue, NULL);
+
+ return true;
}
+/* Called only on full-dirty region */
static void block_copy_inflight_req_begin(BlockCopyState *s,
BlockCopyInFlightReq *req,
int64_t offset, int64_t bytes)
{
+ assert(!find_conflicting_inflight_req(s, offset, bytes));
+
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
+ s->in_flight_bytes += bytes;
+
req->offset = offset;
req->bytes = bytes;
qemu_co_queue_init(&req->wait_queue);
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
}
-static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
+/*
+ * block_copy_inflight_req_shrink
+ *
+ * Drop the tail of the request to be handled later. Set dirty bits back and
+ * wake up all requests waiting for us (may be some of them are not intersecting
+ * with shrunk request)
+ */
+static void coroutine_fn block_copy_inflight_req_shrink(BlockCopyState *s,
+ BlockCopyInFlightReq *req, int64_t new_bytes)
{
+ if (new_bytes == req->bytes) {
+ return;
+ }
+
+ assert(new_bytes > 0 && new_bytes < req->bytes);
+
+ bdrv_set_dirty_bitmap(s->copy_bitmap,
+ req->offset + new_bytes, req->bytes - new_bytes);
+
+ req->bytes = new_bytes;
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+static void coroutine_fn block_copy_inflight_req_end(BlockCopyState *s,
+ BlockCopyInFlightReq *req,
+ int ret)
+{
+ s->in_flight_bytes -= req->bytes;
+ if (ret < 0) {
+ bdrv_set_dirty_bitmap(s->copy_bitmap, req->offset, req->bytes);
+ }
QLIST_REMOVE(req, list);
qemu_co_queue_restart_all(&req->wait_queue);
}
@@ -357,12 +399,19 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
return ret;
}
-int coroutine_fn block_copy(BlockCopyState *s,
- int64_t offset, int64_t bytes,
- bool *error_is_read)
+/*
+ * block_copy_dirty_clusters
+ *
+ * Copy dirty clusters in @offset/@bytes range.
+ * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
+ * clusters found and -errno on failure.
+ */
+static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s,
+ int64_t offset, int64_t bytes,
+ bool *error_is_read)
{
int ret = 0;
- BlockCopyInFlightReq req;
+ bool found_dirty = false;
/*
* block_copy() user is responsible for keeping source and target in same
@@ -374,10 +423,8 @@ int coroutine_fn block_copy(BlockCopyState *s,
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
- block_copy_wait_inflight_reqs(s, offset, bytes);
- block_copy_inflight_req_begin(s, &req, offset, bytes);
-
while (bytes) {
+ BlockCopyInFlightReq req;
int64_t next_zero, cur_bytes, status_bytes;
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
@@ -387,6 +434,8 @@ int coroutine_fn block_copy(BlockCopyState *s,
continue; /* already copied */
}
+ found_dirty = true;
+
cur_bytes = MIN(bytes, s->copy_size);
next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
@@ -396,10 +445,14 @@ int coroutine_fn block_copy(BlockCopyState *s,
assert(next_zero < offset + cur_bytes); /* no need to do MIN() */
cur_bytes = next_zero - offset;
}
+ block_copy_inflight_req_begin(s, &req, offset, cur_bytes);
ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
+ assert(ret >= 0); /* never fail */
+ cur_bytes = MIN(cur_bytes, status_bytes);
+ block_copy_inflight_req_shrink(s, &req, cur_bytes);
if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, status_bytes);
+ block_copy_inflight_req_end(s, &req, 0);
progress_set_remaining(s->progress,
bdrv_get_dirty_count(s->copy_bitmap) +
s->in_flight_bytes);
@@ -409,21 +462,15 @@ int coroutine_fn block_copy(BlockCopyState *s,
continue;
}
- cur_bytes = MIN(cur_bytes, status_bytes);
-
trace_block_copy_process(s, offset);
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
- s->in_flight_bytes += cur_bytes;
-
co_get_from_shres(s->mem, cur_bytes);
ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
error_is_read);
co_put_to_shres(s->mem, cur_bytes);
- s->in_flight_bytes -= cur_bytes;
+ block_copy_inflight_req_end(s, &req, ret);
if (ret < 0) {
- bdrv_set_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
- break;
+ return ret;
}
progress_work_done(s->progress, cur_bytes);
@@ -432,7 +479,40 @@ int coroutine_fn block_copy(BlockCopyState *s,
bytes -= cur_bytes;
}
- block_copy_inflight_req_end(&req);
+ return found_dirty;
+}
+
+/*
+ * block_copy
+ *
+ * Copy requested region, accordingly to dirty bitmap.
+ * Collaborate with parallel block_copy requests: if they success it help us. If
+ * they fail, we retry not-copied regions. So, if we return error, it means that
+ * io operation failed in context of _this_ block_copy call, not some parallel
+ * operation.
+ */
+int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
+ bool *error_is_read)
+{
+ int ret;
+
+ do {
+ ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read);
+
+ if (ret == 0) {
+ ret = block_copy_wait_one(s, offset, bytes);
+ }
+
+ /*
+ * We retry in two cases:
+ * 1. Some progress done
+ * Something was copied, which means that there were yield points
+ * and some new dirty bits may have appeared (due to failed parallel
+ * block-copy requests).
+ * 2. We have waited for some intersecting block-copy request
+ * It may have failed and produced new dirty bits.
+ */
+ } while (ret > 0);
return ret;
}
--
2.21.0
next prev parent reply other threads:[~2020-03-06 7:42 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-06 7:38 [PATCH v3 0/9] block-copy improvements: part I Vladimir Sementsov-Ogievskiy
2020-03-06 7:38 ` [PATCH v3 1/9] job: refactor progress to separate object Vladimir Sementsov-Ogievskiy
2020-03-10 12:22 ` Andrey Shinkevich
2020-03-10 12:40 ` Max Reitz
2020-03-06 7:38 ` [PATCH v3 2/9] block/block-copy: fix progress calculation Vladimir Sementsov-Ogievskiy
2020-03-10 13:32 ` Max Reitz
2020-03-06 7:38 ` [PATCH v3 3/9] block/block-copy: specialcase first copy_range request Vladimir Sementsov-Ogievskiy
2020-03-10 13:42 ` Max Reitz
2020-03-06 7:38 ` [PATCH v3 4/9] block/block-copy: use block_status Vladimir Sementsov-Ogievskiy
2020-03-10 14:21 ` Max Reitz
2020-03-06 7:38 ` [PATCH v3 5/9] block/block-copy: factor out find_conflicting_inflight_req Vladimir Sementsov-Ogievskiy
2020-03-10 14:27 ` Max Reitz
2020-03-06 7:38 ` [PATCH v3 6/9] block/block-copy: refactor interfaces to use bytes instead of end Vladimir Sementsov-Ogievskiy
2020-03-10 14:44 ` Max Reitz
2020-03-06 7:38 ` [PATCH v3 7/9] block/block-copy: rename start to offset in interfaces Vladimir Sementsov-Ogievskiy
2020-03-10 14:50 ` Max Reitz
2020-03-10 14:55 ` Andrey Shinkevich
2020-03-10 15:14 ` Max Reitz
2020-03-10 16:15 ` Andrey Shinkevich
2020-03-06 7:38 ` Vladimir Sementsov-Ogievskiy [this message]
2020-03-10 15:32 ` [PATCH v3 8/9] block/block-copy: reduce intersecting request lock Max Reitz
2020-03-11 9:39 ` Vladimir Sementsov-Ogievskiy
2020-03-06 7:38 ` [PATCH v3 9/9] block/block-copy: hide structure definitions Vladimir Sementsov-Ogievskiy
2020-03-10 14:55 ` Andrey Shinkevich
2020-03-10 15:38 ` Max Reitz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200306073831.7737-9-vsementsov@virtuozzo.com \
--to=vsementsov@virtuozzo.com \
--cc=andrey.shinkevich@virtuozzo.com \
--cc=jsnow@redhat.com \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.