From: "Benoît Canet" <benoit.canet@irqsave.net>
To: Kevin Wolf <kwolf@redhat.com>
Cc: pl@kamp.de, qemu-devel@nongnu.org, mreitz@redhat.com,
stefanha@redhat.com, pbonzini@redhat.com,
xiawenc@linux.vnet.ibm.com
Subject: Re: [Qemu-devel] [PATCH v3 18/29] block: Make overlap range for serialisation dynamic
Date: Wed, 22 Jan 2014 21:15:10 +0100 [thread overview]
Message-ID: <20140122201510.GD3053@irqsave.net> (raw)
In-Reply-To: <1389968119-24771-19-git-send-email-kwolf@redhat.com>
Le Friday 17 Jan 2014 à 15:15:08 (+0100), Kevin Wolf a écrit :
> Copy on Read wants to serialise with all requests touching the same
> cluster, so wait_serialising_requests() rounded to cluster boundaries.
> Other users like alignment RMW will have different requirements, though
> (requests touching the same sector), so make it dynamic.
>
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> Reviewed-by: Max Reitz <mreitz@redhat.com>
> ---
> block.c | 53 ++++++++++++++++++++++++-----------------------
> include/block/block_int.h | 4 ++++
> 2 files changed, 31 insertions(+), 26 deletions(-)
>
> diff --git a/block.c b/block.c
> index efa8979..e72966a 100644
> --- a/block.c
> +++ b/block.c
> @@ -2051,6 +2051,8 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
> .is_write = is_write,
> .co = qemu_coroutine_self(),
> .serialising = false,
> + .overlap_offset = offset,
> + .overlap_bytes = bytes,
> };
>
> qemu_co_queue_init(&req->wait_queue);
> @@ -2058,12 +2060,19 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
> QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
> }
>
> -static void mark_request_serialising(BdrvTrackedRequest *req)
> +static void mark_request_serialising(BdrvTrackedRequest *req, size_t align)
> {
> + int64_t overlap_offset = req->offset & ~(align - 1);
> + int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
> + - overlap_offset;
> +
> if (!req->serialising) {
> req->bs->serialising_in_flight++;
> req->serialising = true;
> }
> +
> + req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
> + req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
> }
>
> /**
> @@ -2087,20 +2096,16 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
> }
> }
>
> -static void round_bytes_to_clusters(BlockDriverState *bs,
> - int64_t offset, unsigned int bytes,
> - int64_t *cluster_offset,
> - unsigned int *cluster_bytes)
> +static int bdrv_get_cluster_size(BlockDriverState *bs)
> {
> BlockDriverInfo bdi;
> + int ret;
>
> - if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
> - *cluster_offset = offset;
> - *cluster_bytes = bytes;
> + ret = bdrv_get_info(bs, &bdi);
> + if (ret < 0 || bdi.cluster_size == 0) {
> + return bs->request_alignment;
> } else {
> - *cluster_offset = QEMU_ALIGN_DOWN(offset, bdi.cluster_size);
> - *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes,
> - bdi.cluster_size);
> + return bdi.cluster_size;
> }
> }
>
> @@ -2108,11 +2113,11 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
> int64_t offset, unsigned int bytes)
> {
> /* aaaa bbbb */
> - if (offset >= req->offset + req->bytes) {
> + if (offset >= req->overlap_offset + req->overlap_bytes) {
> return false;
> }
> /* bbbb aaaa */
> - if (req->offset >= offset + bytes) {
> + if (req->overlap_offset >= offset + bytes) {
> return false;
> }
> return true;
> @@ -2122,30 +2127,21 @@ static void coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
> {
> BlockDriverState *bs = self->bs;
> BdrvTrackedRequest *req;
> - int64_t cluster_offset;
> - unsigned int cluster_bytes;
> bool retry;
>
> if (!bs->serialising_in_flight) {
> return;
> }
>
> - /* If we touch the same cluster it counts as an overlap. This guarantees
> - * that allocating writes will be serialized and not race with each other
> - * for the same cluster. For example, in copy-on-read it ensures that the
> - * CoR read and write operations are atomic and guest writes cannot
> - * interleave between them.
> - */
> - round_bytes_to_clusters(bs, self->offset, self->bytes,
> - &cluster_offset, &cluster_bytes);
> -
> do {
> retry = false;
> QLIST_FOREACH(req, &bs->tracked_requests, list) {
> if (req == self || (!req->serialising && !self->serialising)) {
> continue;
> }
> - if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
> + if (tracked_request_overlaps(req, self->overlap_offset,
> + self->overlap_bytes))
> + {
> /* Hitting this means there was a reentrant request, for
> * example, a block driver issuing nested requests. This must
> * never happen since it means deadlock.
> @@ -2761,7 +2757,12 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
>
> /* Handle Copy on Read and associated serialisation */
> if (flags & BDRV_REQ_COPY_ON_READ) {
> - mark_request_serialising(req);
> + /* If we touch the same cluster it counts as an overlap. This
> + * guarantees that allocating writes will be serialized and not race
> + * with each other for the same cluster. For example, in copy-on-read
> + * it ensures that the CoR read and write operations are atomic and
> + * guest writes cannot interleave between them. */
> + mark_request_serialising(req, bdrv_get_cluster_size(bs));
> }
>
> wait_serialising_requests(req);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index d8443df..ccd2c68 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -60,7 +60,11 @@ typedef struct BdrvTrackedRequest {
> int64_t offset;
> unsigned int bytes;
> bool is_write;
> +
> bool serialising;
> + int64_t overlap_offset;
> + unsigned int overlap_bytes;
> +
> QLIST_ENTRY(BdrvTrackedRequest) list;
> Coroutine *co; /* owner, used for deadlock detection */
> CoQueue wait_queue; /* coroutines blocked on this request */
> --
> 1.8.1.4
>
>
Reviewed-by: Benoit Canet <benoit@irqsave.net>
next prev parent reply other threads:[~2014-01-22 20:15 UTC|newest]
Thread overview: 70+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-01-17 14:14 [Qemu-devel] [PATCH v3 00/29] block: Support for 512b-on-4k emulation Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 01/29] block: Move initialisation of BlockLimits to bdrv_refresh_limits() Kevin Wolf
2014-01-17 22:39 ` Benoît Canet
2014-01-20 9:31 ` Kevin Wolf
2014-01-20 9:49 ` Peter Lieven
2014-01-21 12:49 ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 02/29] block: Inherit opt_transfer_length Kevin Wolf
2014-01-17 22:42 ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 03/29] block: Update BlockLimits when they might have changed Kevin Wolf
2014-01-17 22:47 ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 04/29] qemu_memalign: Allow small alignments Kevin Wolf
2014-01-17 22:49 ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 05/29] block: Detect unaligned length in bdrv_qiov_is_aligned() Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 06/29] block: Don't use guest sector size for qemu_blockalign() Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 07/29] block: rename buffer_alignment to guest_block_size Kevin Wolf
2014-01-21 12:54 ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 08/29] raw: Probe required direct I/O alignment Kevin Wolf
2014-01-21 13:03 ` Benoît Canet
2014-01-21 13:29 ` Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 09/29] block: Introduce bdrv_aligned_preadv() Kevin Wolf
2014-01-21 13:13 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 10/29] block: Introduce bdrv_co_do_preadv() Kevin Wolf
2014-01-17 23:59 ` Max Reitz
2014-01-21 13:29 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 11/29] block: Introduce bdrv_aligned_pwritev() Kevin Wolf
2014-01-21 13:31 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 12/29] block: write: Handle COR dependency after I/O throttling Kevin Wolf
2014-01-21 13:33 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 13/29] block: Introduce bdrv_co_do_pwritev() Kevin Wolf
2014-01-18 0:00 ` Max Reitz
2014-01-21 13:36 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 14/29] block: Switch BdrvTrackedRequest to byte granularity Kevin Wolf
2014-01-17 23:19 ` Max Reitz
2014-01-21 13:49 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 15/29] block: Allow waiting for overlapping requests between begin/end Kevin Wolf
2014-01-22 19:46 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 16/29] block: Make zero-after-EOF work with larger alignment Kevin Wolf
2014-01-17 23:21 ` Max Reitz
2014-01-22 19:50 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 17/29] block: Generalise and optimise COR serialisation Kevin Wolf
2014-01-22 20:00 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 18/29] block: Make overlap range for serialisation dynamic Kevin Wolf
2014-01-22 20:15 ` Benoît Canet [this message]
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 19/29] block: Allow wait_serialising_requests() at any point Kevin Wolf
2014-01-22 20:21 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 20/29] block: Align requests in bdrv_co_do_pwritev() Kevin Wolf
2014-01-22 20:29 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 21/29] block: Assert serialisation assumptions in pwritev Kevin Wolf
2014-01-17 23:42 ` Max Reitz
2014-01-24 16:09 ` Benoît Canet
2014-01-24 16:18 ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 22/29] block: Change coroutine wrapper to byte granularity Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 23/29] block: Make bdrv_pread() a bdrv_prwv_co() wrapper Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 24/29] block: Make bdrv_pwrite() " Kevin Wolf
2014-01-17 23:43 ` Max Reitz
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 25/29] iscsi: Set bs->request_alignment Kevin Wolf
2014-01-24 16:29 ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 26/29] blkdebug: Make required alignment configurable Kevin Wolf
2014-01-17 23:50 ` Max Reitz
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 27/29] qemu-io: New command 'sleep' Kevin Wolf
2014-01-17 23:55 ` Max Reitz
2014-01-20 9:58 ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 28/29] qemu-iotests: Test pwritev RMW logic Kevin Wolf
2014-01-18 16:01 ` Max Reitz
2014-01-20 9:44 ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 29/29] block: Switch bdrv_io_limits_intercept() to byte granularity Kevin Wolf
2014-01-17 23:59 ` Max Reitz
2014-01-22 20:30 ` [Qemu-devel] [PATCH v3 00/29] block: Support for 512b-on-4k emulation Christian Borntraeger
2014-01-23 10:29 ` Kevin Wolf
2014-01-23 11:12 ` Christian Borntraeger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140122201510.GD3053@irqsave.net \
--to=benoit.canet@irqsave.net \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=pbonzini@redhat.com \
--cc=pl@kamp.de \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
--cc=xiawenc@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).