From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: qemu-block@nongnu.org
Cc: fam@euphon.net, kwolf@redhat.com, vsementsov@virtuozzo.com,
Anton.Nefedov@acronis.com, armbru@redhat.com,
qemu-devel@nongnu.org, mreitz@redhat.com, stefanha@redhat.com,
den@openvz.org
Subject: [PATCH 2/5] block/io: introduce bdrv_co_range_try_lock
Date: Sat, 20 Jun 2020 17:36:46 +0300 [thread overview]
Message-ID: <20200620143649.225852-3-vsementsov@virtuozzo.com> (raw)
In-Reply-To: <20200620143649.225852-1-vsementsov@virtuozzo.com>
Introduce an interface to make a "critical section" on top of
serialising requests feature. This is needed to avoid intersecting with
other requests during a set of operations. Will be used in a next
commit to implement preallocate filter.
To keep assertions during intersecting requests handling, we need to
add _locked versions of read/write requests, to be used inside locked
section. The following commit will need only locked version of
write-zeroes, so add only it.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
include/block/block.h | 9 ++++
include/block/block_int.h | 16 ++++++++
include/qemu/typedefs.h | 1 +
block/io.c | 86 +++++++++++++++++++++++++++++++--------
4 files changed, 95 insertions(+), 17 deletions(-)
diff --git a/include/block/block.h b/include/block/block.h
index 25e299605e..c4b2a493b4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -391,6 +391,15 @@ int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
*/
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags);
+
+/*
+ * Version of bdrv_co_pwrite_zeroes to be used inside bdrv_co_range_try_lock()
+ * .. bdrv_co_range_unlock() pair.
+ */
+int coroutine_fn bdrv_co_pwrite_zeroes_locked(BdrvChild *child,
+ int64_t offset, int bytes, BdrvRequestFlags flags,
+ BdrvTrackedRequest *lock);
+
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
void bdrv_refresh_filename(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 791de6a59c..9ec56f1825 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -66,6 +66,7 @@ enum BdrvTrackedRequestType {
BDRV_TRACKED_WRITE,
BDRV_TRACKED_DISCARD,
BDRV_TRACKED_TRUNCATE,
+ BDRV_TRACKED_LOCK,
};
typedef struct BdrvTrackedRequest {
@@ -83,6 +84,12 @@ typedef struct BdrvTrackedRequest {
CoQueue wait_queue; /* coroutines blocked on this request */
struct BdrvTrackedRequest *waiting_for;
+
+ /*
+ * If non-zero, the request is under lock, so it's allowed to intersect
+ * (actully it must be inside) the @lock request.
+ */
+ struct BdrvTrackedRequest *lock;
} BdrvTrackedRequest;
struct BlockDriver {
@@ -994,6 +1001,15 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+/*
+ * Creates serializing BDRV_TRACKED_LOCK request if no conflicts, on success
+ * return pointer to it, to be used in bdrv_co_range_unlock().
+ */
+BdrvTrackedRequest *coroutine_fn bdrv_co_range_try_lock(BlockDriverState *bs,
+ int64_t offset,
+ int64_t bytes);
+void coroutine_fn bdrv_co_range_unlock(BdrvTrackedRequest *req);
+
static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
{
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index ecf3cde26c..e2bbe3af96 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -28,6 +28,7 @@ typedef struct Aml Aml;
typedef struct AnnounceTimer AnnounceTimer;
typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter;
+typedef struct BdrvTrackedRequest BdrvTrackedRequest;
typedef struct BlockBackend BlockBackend;
typedef struct BlockBackendRootState BlockBackendRootState;
typedef struct BlockDriverState BlockDriverState;
diff --git a/block/io.c b/block/io.c
index 60cee1d759..0de72cdb4c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -691,7 +691,8 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
BlockDriverState *bs,
int64_t offset,
uint64_t bytes,
- enum BdrvTrackedRequestType type)
+ enum BdrvTrackedRequestType type,
+ BdrvTrackedRequest *lock)
{
assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
@@ -704,6 +705,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
.serialising = false,
.overlap_offset = offset,
.overlap_bytes = bytes,
+ .lock = lock,
};
qemu_co_queue_init(&req->wait_queue);
@@ -745,15 +747,26 @@ static bool coroutine_fn wait_or_find_conflicts(BdrvTrackedRequest *self,
if (tracked_request_overlaps(req, self->overlap_offset,
self->overlap_bytes))
{
- /* Hitting this means there was a reentrant request, for
- * example, a block driver issuing nested requests. This must
- * never happen since it means deadlock.
+ if (self->lock == req) {
+ /* This is atomic request under range_lock */
+ assert(req->type == BDRV_TRACKED_LOCK);
+ assert(self->offset >= req->offset);
+ assert(self->bytes <= req->bytes);
+ continue;
+ }
+ /*
+ * Hitting this means there was a reentrant request (except for
+ * range_lock, handled above), for example, a block driver
+ * issuing nested requests. This must never happen since it
+ * means deadlock.
*/
assert(qemu_coroutine_self() != req->co);
- /* If the request is already (indirectly) waiting for us, or
+ /*
+ * If the request is already (indirectly) waiting for us, or
* will wait for us as soon as it wakes up, then just go on
- * (instead of producing a deadlock in the former case). */
+ * (instead of producing a deadlock in the former case).
+ */
if (!req->waiting_for) {
if (!wait) {
return true;
@@ -821,7 +834,6 @@ bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
}
/* Return true on success, false if there are some conflicts */
-__attribute__ ((unused))
static bool bdrv_try_mark_request_serialising(BdrvTrackedRequest *req,
uint64_t align)
{
@@ -1796,7 +1808,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
- tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ, NULL);
ret = bdrv_aligned_preadv(child, &req, offset, bytes,
bs->bl.request_alignment,
qiov, qiov_offset, flags);
@@ -2169,9 +2181,9 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
}
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+static int coroutine_fn bdrv_co_pwritev_part_locked(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags)
+ BdrvRequestFlags flags, BdrvTrackedRequest *lock)
{
BlockDriverState *bs = child->bs;
BdrvTrackedRequest req;
@@ -2215,7 +2227,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
* Pad qiov with the read parts and be sure to have a tracked request not
* only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
*/
- tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE, lock);
if (flags & BDRV_REQ_ZERO_WRITE) {
ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
@@ -2239,8 +2251,23 @@ out:
return ret;
}
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ return bdrv_co_pwritev_part_locked(child, offset, bytes, qiov, qiov_offset,
+ flags, NULL);
+}
+
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
+{
+ return bdrv_co_pwrite_zeroes_locked(child, offset, bytes, flags, NULL);
+}
+
+int coroutine_fn bdrv_co_pwrite_zeroes_locked(BdrvChild *child, int64_t offset,
+ int bytes, BdrvRequestFlags flags,
+ BdrvTrackedRequest *lock)
{
trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
@@ -2248,8 +2275,8 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
flags &= ~BDRV_REQ_MAY_UNMAP;
}
- return bdrv_co_pwritev(child, offset, bytes, NULL,
- BDRV_REQ_ZERO_WRITE | flags);
+ return bdrv_co_pwritev_part_locked(child, offset, bytes, NULL, 0,
+ BDRV_REQ_ZERO_WRITE | flags, lock);
}
/*
@@ -2980,7 +3007,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
tail = (offset + bytes) % align;
bdrv_inc_in_flight(bs);
- tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD, NULL);
ret = bdrv_co_write_req_prepare(child, offset, bytes, &req, 0);
if (ret < 0) {
@@ -3252,7 +3279,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
if (recurse_src) {
bdrv_inc_in_flight(src->bs);
tracked_request_begin(&req, src->bs, src_offset, bytes,
- BDRV_TRACKED_READ);
+ BDRV_TRACKED_READ, NULL);
/* BDRV_REQ_SERIALISING is only for write operation */
assert(!(read_flags & BDRV_REQ_SERIALISING));
@@ -3269,7 +3296,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
} else {
bdrv_inc_in_flight(dst->bs);
tracked_request_begin(&req, dst->bs, dst_offset, bytes,
- BDRV_TRACKED_WRITE);
+ BDRV_TRACKED_WRITE, NULL);
ret = bdrv_co_write_req_prepare(dst, dst_offset, bytes, &req,
write_flags);
if (!ret) {
@@ -3381,7 +3408,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
bdrv_inc_in_flight(bs);
tracked_request_begin(&req, bs, offset - new_bytes, new_bytes,
- BDRV_TRACKED_TRUNCATE);
+ BDRV_TRACKED_TRUNCATE, NULL);
/* If we are growing the image and potentially using preallocation for the
* new area, we need to make sure that no write requests are made to it
@@ -3494,3 +3521,28 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco);
}
+
+BdrvTrackedRequest *coroutine_fn bdrv_co_range_try_lock(BlockDriverState *bs,
+ int64_t offset,
+ int64_t bytes)
+{
+ BdrvTrackedRequest *req = g_new(BdrvTrackedRequest, 1);
+ bool success;
+
+ tracked_request_begin(req, bs, offset, bytes, BDRV_TRACKED_LOCK, NULL);
+ success = bdrv_try_mark_request_serialising(req, bdrv_get_cluster_size(bs));
+ if (!success) {
+ g_free(req);
+ req = NULL;
+ }
+
+ return req;
+}
+
+void coroutine_fn bdrv_co_range_unlock(BdrvTrackedRequest *req)
+{
+ assert(req->type == BDRV_TRACKED_LOCK);
+
+ tracked_request_end(req);
+ g_free(req);
+}
--
2.18.0
next prev parent reply other threads:[~2020-06-20 14:38 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-06-20 14:36 [PATCH 0/5] preallocate filter Vladimir Sementsov-Ogievskiy
2020-06-20 14:36 ` [PATCH 1/5] block/io: introduce bdrv_try_mark_request_serialising Vladimir Sementsov-Ogievskiy
2020-07-07 15:56 ` Stefan Hajnoczi
2020-07-08 15:51 ` Vladimir Sementsov-Ogievskiy
2020-07-13 11:23 ` Stefan Hajnoczi
2020-06-20 14:36 ` Vladimir Sementsov-Ogievskiy [this message]
2020-07-07 16:10 ` [PATCH 2/5] block/io: introduce bdrv_co_range_try_lock Stefan Hajnoczi
2020-07-08 15:56 ` Vladimir Sementsov-Ogievskiy
2020-07-13 11:23 ` Stefan Hajnoczi
2020-06-20 14:36 ` [PATCH 3/5] block: introduce preallocate filter Vladimir Sementsov-Ogievskiy
2020-07-08 12:07 ` Stefan Hajnoczi
2020-07-08 16:17 ` Vladimir Sementsov-Ogievskiy
2020-07-13 11:27 ` Stefan Hajnoczi
2020-06-20 14:36 ` [PATCH 4/5] iotests: QemuIoInteractive: use qemu_io_args_no_fmt Vladimir Sementsov-Ogievskiy
2020-07-08 12:08 ` Stefan Hajnoczi
2020-06-20 14:36 ` [PATCH 5/5] iotests: add 298 to test new preallocate filter driver Vladimir Sementsov-Ogievskiy
2020-07-08 12:09 ` Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200620143649.225852-3-vsementsov@virtuozzo.com \
--to=vsementsov@virtuozzo.com \
--cc=Anton.Nefedov@acronis.com \
--cc=armbru@redhat.com \
--cc=den@openvz.org \
--cc=fam@euphon.net \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).