qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Benoît Canet" <benoit.canet@irqsave.net>
To: Kevin Wolf <kwolf@redhat.com>
Cc: pl@kamp.de, qemu-devel@nongnu.org, mreitz@redhat.com,
	stefanha@redhat.com, pbonzini@redhat.com,
	xiawenc@linux.vnet.ibm.com
Subject: Re: [Qemu-devel] [PATCH v3 18/29] block: Make overlap range for serialisation dynamic
Date: Wed, 22 Jan 2014 21:15:10 +0100	[thread overview]
Message-ID: <20140122201510.GD3053@irqsave.net> (raw)
In-Reply-To: <1389968119-24771-19-git-send-email-kwolf@redhat.com>

Le Friday 17 Jan 2014 à 15:15:08 (+0100), Kevin Wolf a écrit :
> Copy on Read wants to serialise with all requests touching the same
> cluster, so wait_serialising_requests() rounded to cluster boundaries.
> Other users like alignment RMW will have different requirements, though
> (requests touching the same sector), so make it dynamic.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> Reviewed-by: Max Reitz <mreitz@redhat.com>
> ---
>  block.c                   | 53 ++++++++++++++++++++++++-----------------------
>  include/block/block_int.h |  4 ++++
>  2 files changed, 31 insertions(+), 26 deletions(-)
> 
> diff --git a/block.c b/block.c
> index efa8979..e72966a 100644
> --- a/block.c
> +++ b/block.c
> @@ -2051,6 +2051,8 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
>          .is_write       = is_write,
>          .co             = qemu_coroutine_self(),
>          .serialising    = false,
> +        .overlap_offset = offset,
> +        .overlap_bytes  = bytes,
>      };
>  
>      qemu_co_queue_init(&req->wait_queue);
> @@ -2058,12 +2060,19 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
>      QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
>  }
>  
> -static void mark_request_serialising(BdrvTrackedRequest *req)
> +static void mark_request_serialising(BdrvTrackedRequest *req, size_t align)
>  {
> +    int64_t overlap_offset = req->offset & ~(align - 1);
> +    int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
> +                      - overlap_offset;
> +
>      if (!req->serialising) {
>          req->bs->serialising_in_flight++;
>          req->serialising = true;
>      }
> +
> +    req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
> +    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
>  }
>  
>  /**
> @@ -2087,20 +2096,16 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
>      }
>  }
>  
> -static void round_bytes_to_clusters(BlockDriverState *bs,
> -                                    int64_t offset, unsigned int bytes,
> -                                    int64_t *cluster_offset,
> -                                    unsigned int *cluster_bytes)
> +static int bdrv_get_cluster_size(BlockDriverState *bs)
>  {
>      BlockDriverInfo bdi;
> +    int ret;
>  
> -    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
> -        *cluster_offset = offset;
> -        *cluster_bytes = bytes;
> +    ret = bdrv_get_info(bs, &bdi);
> +    if (ret < 0 || bdi.cluster_size == 0) {
> +        return bs->request_alignment;
>      } else {
> -        *cluster_offset = QEMU_ALIGN_DOWN(offset, bdi.cluster_size);
> -        *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes,
> -                                       bdi.cluster_size);
> +        return bdi.cluster_size;
>      }
>  }
>  
> @@ -2108,11 +2113,11 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
>                                       int64_t offset, unsigned int bytes)
>  {
>      /*        aaaa   bbbb */
> -    if (offset >= req->offset + req->bytes) {
> +    if (offset >= req->overlap_offset + req->overlap_bytes) {
>          return false;
>      }
>      /* bbbb   aaaa        */
> -    if (req->offset >= offset + bytes) {
> +    if (req->overlap_offset >= offset + bytes) {
>          return false;
>      }
>      return true;
> @@ -2122,30 +2127,21 @@ static void coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>  {
>      BlockDriverState *bs = self->bs;
>      BdrvTrackedRequest *req;
> -    int64_t cluster_offset;
> -    unsigned int cluster_bytes;
>      bool retry;
>  
>      if (!bs->serialising_in_flight) {
>          return;
>      }
>  
> -    /* If we touch the same cluster it counts as an overlap.  This guarantees
> -     * that allocating writes will be serialized and not race with each other
> -     * for the same cluster.  For example, in copy-on-read it ensures that the
> -     * CoR read and write operations are atomic and guest writes cannot
> -     * interleave between them.
> -     */
> -    round_bytes_to_clusters(bs, self->offset, self->bytes,
> -                            &cluster_offset, &cluster_bytes);
> -
>      do {
>          retry = false;
>          QLIST_FOREACH(req, &bs->tracked_requests, list) {
>              if (req == self || (!req->serialising && !self->serialising)) {
>                  continue;
>              }
> -            if (tracked_request_overlaps(req, cluster_offset, cluster_bytes)) {
> +            if (tracked_request_overlaps(req, self->overlap_offset,
> +                                         self->overlap_bytes))
> +            {
>                  /* Hitting this means there was a reentrant request, for
>                   * example, a block driver issuing nested requests.  This must
>                   * never happen since it means deadlock.
> @@ -2761,7 +2757,12 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
>  
>      /* Handle Copy on Read and associated serialisation */
>      if (flags & BDRV_REQ_COPY_ON_READ) {
> -        mark_request_serialising(req);
> +        /* If we touch the same cluster it counts as an overlap.  This
> +         * guarantees that allocating writes will be serialized and not race
> +         * with each other for the same cluster.  For example, in copy-on-read
> +         * it ensures that the CoR read and write operations are atomic and
> +         * guest writes cannot interleave between them. */
> +        mark_request_serialising(req, bdrv_get_cluster_size(bs));
>      }
>  
>      wait_serialising_requests(req);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index d8443df..ccd2c68 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -60,7 +60,11 @@ typedef struct BdrvTrackedRequest {
>      int64_t offset;
>      unsigned int bytes;
>      bool is_write;
> +
>      bool serialising;
> +    int64_t overlap_offset;
> +    unsigned int overlap_bytes;
> +
>      QLIST_ENTRY(BdrvTrackedRequest) list;
>      Coroutine *co; /* owner, used for deadlock detection */
>      CoQueue wait_queue; /* coroutines blocked on this request */
> -- 
> 1.8.1.4
> 
> 
Reviewed-by: Benoit Canet <benoit@irqsave.net>

  reply	other threads:[~2014-01-22 20:15 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-17 14:14 [Qemu-devel] [PATCH v3 00/29] block: Support for 512b-on-4k emulation Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 01/29] block: Move initialisation of BlockLimits to bdrv_refresh_limits() Kevin Wolf
2014-01-17 22:39   ` Benoît Canet
2014-01-20  9:31     ` Kevin Wolf
2014-01-20  9:49       ` Peter Lieven
2014-01-21 12:49   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 02/29] block: Inherit opt_transfer_length Kevin Wolf
2014-01-17 22:42   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 03/29] block: Update BlockLimits when they might have changed Kevin Wolf
2014-01-17 22:47   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 04/29] qemu_memalign: Allow small alignments Kevin Wolf
2014-01-17 22:49   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 05/29] block: Detect unaligned length in bdrv_qiov_is_aligned() Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 06/29] block: Don't use guest sector size for qemu_blockalign() Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 07/29] block: rename buffer_alignment to guest_block_size Kevin Wolf
2014-01-21 12:54   ` Benoît Canet
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 08/29] raw: Probe required direct I/O alignment Kevin Wolf
2014-01-21 13:03   ` Benoît Canet
2014-01-21 13:29     ` Kevin Wolf
2014-01-17 14:14 ` [Qemu-devel] [PATCH v3 09/29] block: Introduce bdrv_aligned_preadv() Kevin Wolf
2014-01-21 13:13   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 10/29] block: Introduce bdrv_co_do_preadv() Kevin Wolf
2014-01-17 23:59   ` Max Reitz
2014-01-21 13:29   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 11/29] block: Introduce bdrv_aligned_pwritev() Kevin Wolf
2014-01-21 13:31   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 12/29] block: write: Handle COR dependency after I/O throttling Kevin Wolf
2014-01-21 13:33   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 13/29] block: Introduce bdrv_co_do_pwritev() Kevin Wolf
2014-01-18  0:00   ` Max Reitz
2014-01-21 13:36   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 14/29] block: Switch BdrvTrackedRequest to byte granularity Kevin Wolf
2014-01-17 23:19   ` Max Reitz
2014-01-21 13:49   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 15/29] block: Allow waiting for overlapping requests between begin/end Kevin Wolf
2014-01-22 19:46   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 16/29] block: Make zero-after-EOF work with larger alignment Kevin Wolf
2014-01-17 23:21   ` Max Reitz
2014-01-22 19:50   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 17/29] block: Generalise and optimise COR serialisation Kevin Wolf
2014-01-22 20:00   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 18/29] block: Make overlap range for serialisation dynamic Kevin Wolf
2014-01-22 20:15   ` Benoît Canet [this message]
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 19/29] block: Allow wait_serialising_requests() at any point Kevin Wolf
2014-01-22 20:21   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 20/29] block: Align requests in bdrv_co_do_pwritev() Kevin Wolf
2014-01-22 20:29   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 21/29] block: Assert serialisation assumptions in pwritev Kevin Wolf
2014-01-17 23:42   ` Max Reitz
2014-01-24 16:09   ` Benoît Canet
2014-01-24 16:18     ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 22/29] block: Change coroutine wrapper to byte granularity Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 23/29] block: Make bdrv_pread() a bdrv_prwv_co() wrapper Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 24/29] block: Make bdrv_pwrite() " Kevin Wolf
2014-01-17 23:43   ` Max Reitz
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 25/29] iscsi: Set bs->request_alignment Kevin Wolf
2014-01-24 16:29   ` Benoît Canet
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 26/29] blkdebug: Make required alignment configurable Kevin Wolf
2014-01-17 23:50   ` Max Reitz
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 27/29] qemu-io: New command 'sleep' Kevin Wolf
2014-01-17 23:55   ` Max Reitz
2014-01-20  9:58     ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 28/29] qemu-iotests: Test pwritev RMW logic Kevin Wolf
2014-01-18 16:01   ` Max Reitz
2014-01-20  9:44     ` Kevin Wolf
2014-01-17 14:15 ` [Qemu-devel] [PATCH v3 29/29] block: Switch bdrv_io_limits_intercept() to byte granularity Kevin Wolf
2014-01-17 23:59   ` Max Reitz
2014-01-22 20:30 ` [Qemu-devel] [PATCH v3 00/29] block: Support for 512b-on-4k emulation Christian Borntraeger
2014-01-23 10:29   ` Kevin Wolf
2014-01-23 11:12     ` Christian Borntraeger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140122201510.GD3053@irqsave.net \
    --to=benoit.canet@irqsave.net \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=pl@kamp.de \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=xiawenc@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).