From: Fam Zheng <famz@redhat.com>
To: John Snow <jsnow@redhat.com>
Cc: kwolf@redhat.com, jcody@redhat.com, stefanha@redhat.com,
qemu-devel@nongnu.org, qemu-block@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v2 1/3] block/backup: make backup cluster size configurable
Date: Tue, 23 Feb 2016 13:06:30 +0800 [thread overview]
Message-ID: <20160223050630.GA19080@ad.usersys.redhat.com> (raw)
In-Reply-To: <1456178827-6419-2-git-send-email-jsnow@redhat.com>
On Mon, 02/22 17:07, John Snow wrote:
> 64K might not always be appropriate, make this a runtime value.
>
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
> block/backup.c | 64 +++++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 36 insertions(+), 28 deletions(-)
>
> diff --git a/block/backup.c b/block/backup.c
> index 00cafdb..76addef 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -21,10 +21,7 @@
> #include "qemu/ratelimit.h"
> #include "sysemu/block-backend.h"
>
> -#define BACKUP_CLUSTER_BITS 16
> -#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
> -#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
> -
> +#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
> #define SLICE_TIME 100000000ULL /* ns */
>
> typedef struct CowRequest {
> @@ -46,9 +43,16 @@ typedef struct BackupBlockJob {
> CoRwlock flush_rwlock;
> uint64_t sectors_read;
> HBitmap *bitmap;
> + int64_t cluster_size;
> QLIST_HEAD(, CowRequest) inflight_reqs;
> } BackupBlockJob;
>
> +/* Size of a cluster in sectors, instead of bytes. */
> +static inline int64_t cluster_size_sectors(BackupBlockJob *job)
> +{
> + return job->cluster_size / BDRV_SECTOR_SIZE;
> +}
> +
> /* See if in-flight requests overlap and wait for them to complete */
> static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
> int64_t start,
> @@ -97,13 +101,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
> QEMUIOVector bounce_qiov;
> void *bounce_buffer = NULL;
> int ret = 0;
> + int64_t sectors_per_cluster = cluster_size_sectors(job);
> int64_t start, end;
> int n;
>
> qemu_co_rwlock_rdlock(&job->flush_rwlock);
>
> - start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
> - end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
> + start = sector_num / sectors_per_cluster;
> + end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
>
> trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
>
> @@ -118,12 +123,12 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
>
> trace_backup_do_cow_process(job, start);
>
> - n = MIN(BACKUP_SECTORS_PER_CLUSTER,
> + n = MIN(sectors_per_cluster,
> job->common.len / BDRV_SECTOR_SIZE -
> - start * BACKUP_SECTORS_PER_CLUSTER);
> + start * sectors_per_cluster);
>
> if (!bounce_buffer) {
> - bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
> + bounce_buffer = qemu_blockalign(bs, job->cluster_size);
> }
> iov.iov_base = bounce_buffer;
> iov.iov_len = n * BDRV_SECTOR_SIZE;
> @@ -131,10 +136,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
>
> if (is_write_notifier) {
> ret = bdrv_co_readv_no_serialising(bs,
> - start * BACKUP_SECTORS_PER_CLUSTER,
> + start * sectors_per_cluster,
> n, &bounce_qiov);
> } else {
> - ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
> + ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
> &bounce_qiov);
> }
> if (ret < 0) {
> @@ -147,11 +152,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
>
> if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
> ret = bdrv_co_write_zeroes(job->target,
> - start * BACKUP_SECTORS_PER_CLUSTER,
> + start * sectors_per_cluster,
> n, BDRV_REQ_MAY_UNMAP);
> } else {
> ret = bdrv_co_writev(job->target,
> - start * BACKUP_SECTORS_PER_CLUSTER, n,
> + start * sectors_per_cluster, n,
> &bounce_qiov);
> }
> if (ret < 0) {
> @@ -322,21 +327,22 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
> int64_t cluster;
> int64_t end;
> int64_t last_cluster = -1;
> + int64_t sectors_per_cluster = cluster_size_sectors(job);
> BlockDriverState *bs = job->common.bs;
> HBitmapIter hbi;
>
> granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
> - clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
> + clusters_per_iter = MAX((granularity / job->cluster_size), 1);
> bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
>
> /* Find the next dirty sector(s) */
> while ((sector = hbitmap_iter_next(&hbi)) != -1) {
> - cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
> + cluster = sector / sectors_per_cluster;
>
> /* Fake progress updates for any clusters we skipped */
> if (cluster != last_cluster + 1) {
> job->common.offset += ((cluster - last_cluster - 1) *
> - BACKUP_CLUSTER_SIZE);
> + job->cluster_size);
> }
>
> for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
> @@ -344,8 +350,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
> if (yield_and_check(job)) {
> return ret;
> }
> - ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
> - BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
> + ret = backup_do_cow(bs, cluster * sectors_per_cluster,
> + sectors_per_cluster, &error_is_read,
> false);
> if ((ret < 0) &&
> backup_error_action(job, error_is_read, -ret) ==
> @@ -357,17 +363,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
>
> /* If the bitmap granularity is smaller than the backup granularity,
> * we need to advance the iterator pointer to the next cluster. */
> - if (granularity < BACKUP_CLUSTER_SIZE) {
> - bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
> + if (granularity < job->cluster_size) {
> + bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
> }
>
> last_cluster = cluster - 1;
> }
>
> /* Play some final catchup with the progress meter */
> - end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
> + end = DIV_ROUND_UP(job->common.len, job->cluster_size);
> if (last_cluster + 1 < end) {
> - job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
> + job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
> }
>
> return ret;
> @@ -384,13 +390,14 @@ static void coroutine_fn backup_run(void *opaque)
> .notify = backup_before_write_notify,
> };
> int64_t start, end;
> + int64_t sectors_per_cluster = cluster_size_sectors(job);
> int ret = 0;
>
> QLIST_INIT(&job->inflight_reqs);
> qemu_co_rwlock_init(&job->flush_rwlock);
>
> start = 0;
> - end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
> + end = DIV_ROUND_UP(job->common.len, job->cluster_size);
>
> job->bitmap = hbitmap_alloc(end, 0);
>
> @@ -427,7 +434,7 @@ static void coroutine_fn backup_run(void *opaque)
> /* Check to see if these blocks are already in the
> * backing file. */
>
> - for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
> + for (i = 0; i < sectors_per_cluster;) {
> /* bdrv_is_allocated() only returns true/false based
> * on the first set of sectors it comes across that
> * are are all in the same state.
> @@ -436,8 +443,8 @@ static void coroutine_fn backup_run(void *opaque)
> * needed but at some point that is always the case. */
> alloced =
> bdrv_is_allocated(bs,
> - start * BACKUP_SECTORS_PER_CLUSTER + i,
> - BACKUP_SECTORS_PER_CLUSTER - i, &n);
> + start * sectors_per_cluster + i,
> + sectors_per_cluster - i, &n);
> i += n;
>
> if (alloced == 1 || n == 0) {
> @@ -452,8 +459,8 @@ static void coroutine_fn backup_run(void *opaque)
> }
> }
> /* FULL sync mode we copy the whole drive. */
> - ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
> - BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
> + ret = backup_do_cow(bs, start * sectors_per_cluster,
> + sectors_per_cluster, &error_is_read, false);
> if (ret < 0) {
> /* Depending on error action, fail now or retry cluster */
> BlockErrorAction action =
> @@ -571,6 +578,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
> job->sync_mode = sync_mode;
> job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
> sync_bitmap : NULL;
> + job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
> job->common.len = len;
> job->common.co = qemu_coroutine_create(backup_run);
> block_job_txn_add_job(txn, &job->common);
> --
> 2.4.3
>
Reviewed-by: Fam Zheng <famz@redhat.com>
next prev parent reply other threads:[~2016-02-23 5:06 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-22 22:07 [Qemu-devel] [PATCH v2 0/3] blockjob: correct backup cluster size for backups John Snow
2016-02-22 22:07 ` [Qemu-devel] [PATCH v2 1/3] block/backup: make backup cluster size configurable John Snow
2016-02-23 5:06 ` Fam Zheng [this message]
2016-02-22 22:07 ` [Qemu-devel] [PATCH v2 2/3] block/backup: avoid copying less than full target clusters John Snow
2016-02-23 5:08 ` Fam Zheng
2016-02-23 17:16 ` John Snow
2016-02-22 22:07 ` [Qemu-devel] [PATCH v2 3/3] iotests/124: Add cluster_size mismatch test John Snow
2016-03-02 10:48 ` [Qemu-devel] [Qemu-block] [PATCH v2 0/3] blockjob: correct backup cluster size for backups Stefan Hajnoczi
2016-03-02 11:03 ` Fam Zheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160223050630.GA19080@ad.usersys.redhat.com \
--to=famz@redhat.com \
--cc=jcody@redhat.com \
--cc=jsnow@redhat.com \
--cc=kwolf@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).