qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Fam Zheng <famz@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: stefanha@redhat.com, qemu-devel@nongnu.org, qemu-block@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 06/16] block: add BDS field to count in-flight requests
Date: Wed, 9 Mar 2016 11:35:04 +0800	[thread overview]
Message-ID: <20160309033504.GE17947@ad.usersys.redhat.com> (raw)
In-Reply-To: <1455645388-32401-7-git-send-email-pbonzini@redhat.com>

On Tue, 02/16 18:56, Paolo Bonzini wrote:
> Unlike tracked_requests, this field also counts throttled requests,
> and remains non-zero if an AIO operation needs a BH to be "really"
> completed.
> 
> With this change, it is no longer necessary to have a dummy
> BdrvTrackedRequest for requests that are never serialising, and
> it is no longer necessary to poll the AioContext once after
> bdrv_requests_pending(bs) returns false.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  block/io.c                | 71 +++++++++++++++++++++++++++++++----------------
>  block/mirror.c            |  2 ++
>  include/block/block_int.h |  8 ++++--
>  3 files changed, 54 insertions(+), 27 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index d8d50b7..a9a23a6 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -224,13 +224,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)
>  {
>      BdrvChild *child;
>  
> -    if (!QLIST_EMPTY(&bs->tracked_requests)) {
> -        return true;
> -    }
> -    if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
> -        return true;
> -    }
> -    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
> +    if (atomic_read(&bs->in_flight)) {
>          return true;
>      }
>  
> @@ -268,15 +262,12 @@ static void bdrv_drain_recurse(BlockDriverState *bs)
>   */
>  void bdrv_drain(BlockDriverState *bs)
>  {
> -    bool busy = true;
> -
>      bdrv_no_throttling_begin(bs);
>      bdrv_io_unplugged_begin(bs);
>      bdrv_drain_recurse(bs);
> -    while (busy) {
> +    while (bdrv_requests_pending(bs)) {
>          /* Keep iterating */
> -         busy = bdrv_requests_pending(bs);
> -         busy |= aio_poll(bdrv_get_aio_context(bs), busy);
> +         aio_poll(bdrv_get_aio_context(bs), true);
>      }
>      bdrv_io_unplugged_end(bs);
>      bdrv_no_throttling_end(bs);
> @@ -291,7 +282,7 @@ void bdrv_drain(BlockDriverState *bs)
>  void bdrv_drain_all(void)
>  {
>      /* Always run first iteration so any pending completion BHs run */
> -    bool busy = true;
> +    bool waited = true;
>      BlockDriverState *bs = NULL;
>      GSList *aio_ctxs = NULL, *ctx;
>  
> @@ -318,8 +309,8 @@ void bdrv_drain_all(void)
>       * request completion.  Therefore we must keep looping until there was no
>       * more activity rather than simply draining each device independently.
>       */
> -    while (busy) {
> -        busy = false;
> +    while (waited) {
> +        waited = false;
>  
>          for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
>              AioContext *aio_context = ctx->data;
> @@ -329,12 +320,11 @@ void bdrv_drain_all(void)
>              while ((bs = bdrv_next(bs))) {
>                  if (aio_context == bdrv_get_aio_context(bs)) {
>                      if (bdrv_requests_pending(bs)) {
> -                        busy = true;
> -                        aio_poll(aio_context, busy);
> +                        aio_poll(aio_context, true);
> +                        waited = true;
>                      }
>                  }
>              }
> -            busy |= aio_poll(aio_context, false);
>              aio_context_release(aio_context);
>          }
>      }
> @@ -457,6 +447,16 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
>      return true;
>  }
>  
> +void bdrv_inc_in_flight(BlockDriverState *bs)
> +{
> +    atomic_inc(&bs->in_flight);
> +}
> +
> +void bdrv_dec_in_flight(BlockDriverState *bs)
> +{
> +    atomic_dec(&bs->in_flight);
> +}
> +
>  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>  {
>      BlockDriverState *bs = self->bs;
> @@ -963,6 +963,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
>          return ret;
>      }
>  
> +    bdrv_inc_in_flight(bs);
> +
>      /* Don't do copy-on-read if we read data before write operation */
>      if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
>          flags |= BDRV_REQ_COPY_ON_READ;
> @@ -1003,6 +1005,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
>                                use_local_qiov ? &local_qiov : qiov,
>                                flags);
>      tracked_request_end(&req);
> +    bdrv_dec_in_flight(bs);
>  
>      if (use_local_qiov) {
>          qemu_iovec_destroy(&local_qiov);
> @@ -1310,6 +1313,8 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
>          return ret;
>      }
>  
> +    bdrv_inc_in_flight(bs);
> +
>      /* throttling disk I/O */
>      if (bs->throttle_state) {
>          throttle_group_co_io_limits_intercept(bs, bytes, true);
> @@ -1408,6 +1413,7 @@ fail:
>      qemu_vfree(tail_buf);
>  out:
>      tracked_request_end(&req);
> +    bdrv_dec_in_flight(bs);
>      return ret;
>  }
>  
> @@ -2065,6 +2071,7 @@ static const AIOCBInfo bdrv_em_aiocb_info = {
>  static void bdrv_aio_bh_cb(void *opaque)
>  {
>      BlockAIOCBSync *acb = opaque;
> +    BlockDriverState *bs = acb->common.bs;
>  
>      if (!acb->is_write && acb->ret >= 0) {
>          qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
> @@ -2072,6 +2079,7 @@ static void bdrv_aio_bh_cb(void *opaque)
>      qemu_vfree(acb->bounce);
>      acb->common.cb(acb->common.opaque, acb->ret);
>      qemu_bh_delete(acb->bh);
> +    bdrv_dec_in_flight(bs);
>      acb->bh = NULL;
>      qemu_aio_unref(acb);
>  }
> @@ -2087,6 +2095,7 @@ static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
>  {
>      BlockAIOCBSync *acb;
>  
> +    bdrv_inc_in_flight(bs);
>      acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
>      acb->is_write = is_write;
>      acb->qiov = qiov;
> @@ -2139,6 +2148,7 @@ static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
>  {
>      if (!acb->need_bh) {
>          acb->common.cb(acb->common.opaque, acb->req.error);
> +        bdrv_dec_in_flight(acb->common.bs);
>          qemu_aio_unref(acb);
>      }
>  }
> @@ -2192,6 +2202,9 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
>      Coroutine *co;
>      BlockAIOCBCoroutine *acb;
>  
> +    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
> +    bdrv_inc_in_flight(bs);
> +
>      acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
>      acb->need_bh = true;
>      acb->req.error = -EINPROGRESS;
> @@ -2225,6 +2238,9 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
>      Coroutine *co;
>      BlockAIOCBCoroutine *acb;
>  
> +    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
> +    bdrv_inc_in_flight(bs);
> +
>      acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
>      acb->need_bh = true;
>      acb->req.error = -EINPROGRESS;
> @@ -2254,6 +2270,9 @@ BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
>  
>      trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
>  
> +    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
> +    bdrv_inc_in_flight(bs);
> +
>      acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
>      acb->need_bh = true;
>      acb->req.error = -EINPROGRESS;
> @@ -2361,14 +2380,14 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
>  int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>  {
>      int ret;
> -    BdrvTrackedRequest req;
>  
>      if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
>          bdrv_is_sg(bs)) {
>          return 0;
>      }
>  
> -    tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
> +    bdrv_inc_in_flight(bs);
> +
>      /* Write back cached data to the OS even with cache=unsafe */
>      BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
>      if (bs->drv->bdrv_co_flush_to_os) {
> @@ -2423,7 +2442,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>  flush_parent:
>      ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
>  out:
> -    tracked_request_end(&req);
> +    bdrv_dec_in_flight(bs);
>      return ret;
>  }
>  
> @@ -2491,6 +2510,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
>          return 0;
>      }
>  
> +    bdrv_inc_in_flight(bs);
>      tracked_request_begin(&req, bs, sector_num, nb_sectors,
>                            BDRV_TRACKED_DISCARD);
>      bdrv_set_dirty(bs, sector_num, nb_sectors);
> @@ -2543,6 +2563,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
>      ret = 0;
>  out:
>      tracked_request_end(&req);
> +    bdrv_dec_in_flight(bs);
>      return ret;
>  }
>  
> @@ -2588,13 +2609,12 @@ static void bdrv_ioctl_bh_cb(void *opaque)
>  static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
>  {
>      BlockDriver *drv = bs->drv;
> -    BdrvTrackedRequest tracked_req;
>      CoroutineIOCompletion co = {
>          .coroutine = qemu_coroutine_self(),
>      };
>      BlockAIOCB *acb;
>  
> -    tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
> +    bdrv_inc_in_flight(bs);
>      if (!drv || !drv->bdrv_aio_ioctl) {
>          co.ret = -ENOTSUP;
>          goto out;
> @@ -2610,7 +2630,7 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
>      }
>      qemu_coroutine_yield();
>  out:
> -    tracked_request_end(&tracked_req);
> +    bdrv_dec_in_flight(bs);
>      return co.ret;
>  }
>  
> @@ -2667,6 +2687,9 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
>                                              bs, cb, opaque);
>      Coroutine *co;
>  
> +    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
> +    bdrv_inc_in_flight(bs);
> +
>      acb->need_bh = true;
>      acb->req.error = -EINPROGRESS;
>      acb->req.req = req;
> diff --git a/block/mirror.c b/block/mirror.c
> index 793c20c..3f163b8 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -94,6 +94,7 @@ static void mirror_bh_cb(void *opaque)
>  
>      qemu_bh_delete(op->co_enter_bh);
>      g_free(op);
> +    bdrv_dec_in_flight(s->common.bs);
>      if (s->waiting_for_io) {
>          qemu_coroutine_enter(s->common.co, NULL);
>      }
> @@ -134,6 +135,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
>       * If we call qemu_coroutine_enter here, there is the possibility
>       * of a deadlock when the coroutine calls bdrv_drained_begin.
>       */
> +    bdrv_inc_in_flight(s->common.bs);
>      op->co_enter_bh = qemu_bh_new(mirror_bh_cb, op);
>      qemu_bh_schedule(op->co_enter_bh);
>  }
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 2838814..89c38c0 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -63,8 +63,6 @@
>  enum BdrvTrackedRequestType {
>      BDRV_TRACKED_READ,
>      BDRV_TRACKED_WRITE,
> -    BDRV_TRACKED_FLUSH,
> -    BDRV_TRACKED_IOCTL,
>      BDRV_TRACKED_DISCARD,

Okay, so flush and ioctl are not needed, but why is discard different?

Fam

>  };
>  
> @@ -406,7 +404,8 @@ struct BlockDriverState {
>      /* Callback before write request is processed */
>      NotifierWithReturnList before_write_notifiers;
>  
> -    /* number of in-flight serialising requests */
> +    /* number of in-flight requests; overall and serialising */
> +    unsigned int in_flight;
>      unsigned int serialising_in_flight;
>  
>      /* I/O throttling.
> @@ -713,6 +712,9 @@ bool bdrv_requests_pending(BlockDriverState *bs);
>  void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
>  void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
>  
> +void bdrv_inc_in_flight(BlockDriverState *bs);
> +void bdrv_dec_in_flight(BlockDriverState *bs);
> +
>  void blockdev_close_all_bdrv_states(void);
>  
>  #endif /* BLOCK_INT_H */
> -- 
> 2.5.0
> 
> 
> 

  reply	other threads:[~2016-03-09  3:35 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-16 17:56 [Qemu-devel] [PATCH 00/16] AioContext fine-grained locking, part 1 of 3, including bdrv_drain rewrite Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 01/16] block: make bdrv_start_throttled_reqs return void Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 02/16] block: move restarting of throttled reqs to block/throttle-groups.c Paolo Bonzini
2016-03-09  1:26   ` Fam Zheng
2016-03-09  7:37     ` Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 03/16] block: introduce bdrv_no_throttling_begin/end Paolo Bonzini
2016-03-09  1:45   ` Fam Zheng
2016-03-09  7:40     ` Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 04/16] block: plug whole tree at once, introduce bdrv_io_unplugged_begin/end Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 05/16] mirror: use bottom half to re-enter coroutine Paolo Bonzini
2016-03-09  3:19   ` Fam Zheng
2016-03-09  7:41     ` Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 06/16] block: add BDS field to count in-flight requests Paolo Bonzini
2016-03-09  3:35   ` Fam Zheng [this message]
2016-03-09  7:43     ` Paolo Bonzini
2016-03-09  8:00       ` Fam Zheng
2016-03-09  8:22         ` Paolo Bonzini
2016-03-09  8:33           ` Fam Zheng
2016-02-16 17:56 ` [Qemu-devel] [PATCH 07/16] block: change drain to look only at one child at a time Paolo Bonzini
2016-03-09  3:41   ` Fam Zheng
2016-03-09  7:49     ` Paolo Bonzini
2016-03-16 16:39   ` Stefan Hajnoczi
2016-03-16 17:41     ` Paolo Bonzini
2016-03-17  0:57       ` Fam Zheng
2016-02-16 17:56 ` [Qemu-devel] [PATCH 08/16] blockjob: introduce .drain callback for jobs Paolo Bonzini
2016-03-16 17:56   ` Stefan Hajnoczi
2016-02-16 17:56 ` [Qemu-devel] [PATCH 09/16] block: wait for all pending I/O when doing synchronous requests Paolo Bonzini
2016-03-09  8:13   ` Fam Zheng
2016-03-09  8:23     ` Paolo Bonzini
2016-03-16 18:04   ` Stefan Hajnoczi
2016-02-16 17:56 ` [Qemu-devel] [PATCH 10/16] nfs: replace aio_poll with bdrv_drain Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 11/16] sheepdog: disable dataplane Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 12/16] aio: introduce aio_context_in_iothread Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 13/16] block: only call aio_poll from iothread Paolo Bonzini
2016-03-09  8:30   ` Fam Zheng
2016-03-09  8:55     ` Paolo Bonzini
2016-03-09  9:10     ` Paolo Bonzini
2016-03-09  9:27       ` Fam Zheng
2016-02-16 17:56 ` [Qemu-devel] [PATCH 14/16] iothread: release AioContext around aio_poll Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 15/16] qemu-thread: introduce QemuRecMutex Paolo Bonzini
2016-02-16 17:56 ` [Qemu-devel] [PATCH 16/16] aio: convert from RFifoLock to QemuRecMutex Paolo Bonzini
2016-03-08 17:51 ` [Qemu-devel] [PATCH 00/16] AioContext fine-grained locking, part 1 of 3, including bdrv_drain rewrite Paolo Bonzini
2016-03-09  8:46 ` Fam Zheng
2016-03-16 18:18 ` Stefan Hajnoczi
2016-03-16 22:29   ` Paolo Bonzini
2016-03-17 13:44     ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2016-03-17 13:48       ` Paolo Bonzini
2016-03-18 15:49         ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160309033504.GE17947@ad.usersys.redhat.com \
    --to=famz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).