All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v4 2/6] block: add .bdrv_co_write_zeroes() interface
Date: Tue, 24 Jan 2012 16:16:07 +0100	[thread overview]
Message-ID: <4F1ECB37.10701@redhat.com> (raw)
In-Reply-To: <1326898793-20331-3-git-send-email-stefanha@linux.vnet.ibm.com>

Am 18.01.2012 15:59, schrieb Stefan Hajnoczi:
> The ability to zero regions of an image file is a useful primitive for
> higher-level features such as image streaming or zero write detection.
> 
> Image formats may support an optimized metadata representation instead
> of writing zeroes into the image file.  This allows zero writes to be
> potentially faster than regular write operations and also preserve
> sparseness of the image file.
> 
> The .bdrv_co_write_zeroes() interface should be implemented by block
> drivers that wish to provide efficient zeroing.
> 
> Note that this operation is different from the discard operation, which
> may leave the contents of the region indeterminate.  That means
> discarded blocks are not guaranteed to contain zeroes and may contain
> junk data instead.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
>  block.c      |   53 +++++++++++++++++++++++++++++++++++++++++++++++------
>  block.h      |    7 +++++++
>  block_int.h  |    8 ++++++++
>  trace-events |    1 +
>  4 files changed, 63 insertions(+), 6 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 3621d11..c9fa5c1 100644
> --- a/block.c
> +++ b/block.c
> @@ -50,6 +50,7 @@
>  
>  typedef enum {
>      BDRV_REQ_COPY_ON_READ = 0x1,
> +    BDRV_REQ_ZERO_WRITE   = 0x2,
>  } BdrvRequestFlags;
>  
>  static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
> @@ -69,7 +70,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
>      int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
>      BdrvRequestFlags flags);
>  static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
> -    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> +    BdrvRequestFlags flags);
>  static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
>                                                 int64_t sector_num,
>                                                 QEMUIOVector *qiov,
> @@ -1300,7 +1302,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
>                                       rwco->nb_sectors, rwco->qiov, 0);
>      } else {
>          rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
> -                                      rwco->nb_sectors, rwco->qiov);
> +                                      rwco->nb_sectors, rwco->qiov, 0);
>      }
>  }
>  
> @@ -1639,11 +1641,37 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
>                              BDRV_REQ_COPY_ON_READ);
>  }
>  
> +static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors)
> +{
> +    BlockDriver *drv = bs->drv;
> +    QEMUIOVector qiov;
> +    struct iovec iov;
> +    int ret;
> +
> +    /* First try the efficient write zeroes operation */
> +    if (drv->bdrv_co_write_zeroes) {
> +        return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
> +    }
> +
> +    /* Fall back to bounce buffer if write zeroes is unsupported */
> +    iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE;
> +    iov.iov_base = qemu_blockalign(bs, iov.iov_len);
> +    memset(iov.iov_base, 0, iov.iov_len);
> +    qemu_iovec_init_external(&qiov, &iov, 1);
> +
> +    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
> +
> +    qemu_vfree(iov.iov_base);
> +    return ret;
> +}
> +
>  /*
>   * Handle a write request in coroutine context
>   */
>  static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
> -    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
> +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
> +    BdrvRequestFlags flags)
>  {
>      BlockDriver *drv = bs->drv;
>      BdrvTrackedRequest req;
> @@ -1670,7 +1698,11 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
>  
>      tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
>  
> -    ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
> +    if (flags & BDRV_REQ_ZERO_WRITE) {
> +        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
> +    } else {
> +        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
> +    }
>  
>      if (bs->dirty_bitmap) {
>          set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
> @@ -1690,7 +1722,16 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
>  {
>      trace_bdrv_co_writev(bs, sector_num, nb_sectors);
>  
> -    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
> +    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
> +}
> +
> +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
> +                                      int64_t sector_num, int nb_sectors)
> +{
> +    trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
> +
> +    return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
> +                             BDRV_REQ_ZERO_WRITE);
>  }
>  
>  /**
> @@ -3192,7 +3233,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
>              acb->req.nb_sectors, acb->req.qiov, 0);
>      } else {
>          acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
> -            acb->req.nb_sectors, acb->req.qiov);
> +            acb->req.nb_sectors, acb->req.qiov, 0);
>      }
>  
>      acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
> diff --git a/block.h b/block.h
> index cae289b..9f3aad3 100644
> --- a/block.h
> +++ b/block.h
> @@ -146,6 +146,13 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
>      int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
>  int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
>      int nb_sectors, QEMUIOVector *qiov);
> +/*
> + * Efficiently zero a region of the disk image.  Note that this is a regular
> + * I/O request like read or write and should have a reasonable size.  This
> + * function is not suitable for zeroing the entire image in a single request.
> + */

The reason for this is that in the fallback case you allocate memory for
the whole request, right? So what about just limiting the allocation in
bdrv_co_do_write_zeroes() to a reasonable size and putting a loop there
for large requests?

Kevin

  reply	other threads:[~2012-01-24 15:13 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-18 14:59 [Qemu-devel] [PATCH v4 0/6] block: zero writes Stefan Hajnoczi
2012-01-18 14:59 ` [Qemu-devel] [PATCH v4 1/6] cutils: extract buffer_is_zero() from qemu-img.c Stefan Hajnoczi
2012-01-24 15:03   ` Kevin Wolf
2012-02-06 15:37     ` Stefan Hajnoczi
2012-01-18 14:59 ` [Qemu-devel] [PATCH v4 2/6] block: add .bdrv_co_write_zeroes() interface Stefan Hajnoczi
2012-01-24 15:16   ` Kevin Wolf [this message]
2012-02-06 15:50     ` Stefan Hajnoczi
2012-02-06 16:00       ` Kevin Wolf
2012-02-06 16:16         ` Stefan Hajnoczi
2012-01-18 14:59 ` [Qemu-devel] [PATCH v4 3/6] block: perform zero-detection during copy-on-read Stefan Hajnoczi
2012-01-18 14:59 ` [Qemu-devel] [PATCH v4 4/6] qed: replace is_write with flags field Stefan Hajnoczi
2012-01-18 14:59 ` [Qemu-devel] [PATCH v4 5/6] qed: add .bdrv_co_write_zeroes() support Stefan Hajnoczi
2012-01-18 14:59 ` [Qemu-devel] [PATCH v4 6/6] qemu-io: add write -z option for bdrv_co_write_zeroes Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F1ECB37.10701@redhat.com \
    --to=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.