From: Max Reitz <mreitz@redhat.com>
To: Alberto Garcia <berto@igalia.com>, qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
qemu-block@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>
Subject: Re: [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time
Date: Thu, 28 May 2015 16:56:34 +0200 [thread overview]
Message-ID: <55672CA2.10105@redhat.com> (raw)
In-Reply-To: <8007efe81120cd72f7c4145b8bbc3f4bc558e62d.1432719752.git.berto@igalia.com>
On 27.05.2015 11:46, Alberto Garcia wrote:
> This adds a new 'cache-clean-interval' option that cleans all qcow2
> cache entries that haven't been used in a certain interval, given in
> seconds.
>
> This allows setting a large L2 cache size so it can handle scenarios
> with lots of I/O and at the same time use little memory during periods
> of inactivity.
>
> This feature currently relies on MADV_DONTNEED to free that memory, so
> it is not useful in systems that don't follow that behavior.
>
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> Cc: Max Reitz <mreitz@redhat.com>
> ---
> block/qcow2-cache.c | 35 ++++++++++++++++++++++++++++
> block/qcow2.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> block/qcow2.h | 4 ++++
> qapi/block-core.json | 13 +++++++++--
> 4 files changed, 116 insertions(+), 2 deletions(-)
>
> diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
> index ed14a92..a215f5b 100644
> --- a/block/qcow2-cache.c
> +++ b/block/qcow2-cache.c
> @@ -43,6 +43,7 @@ struct Qcow2Cache {
> bool depends_on_flush;
> void *table_array;
> uint64_t lru_counter;
> + uint64_t cache_clean_lru_counter;
> };
>
> static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
> @@ -78,6 +79,40 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
> #endif
> }
>
> +static inline bool can_clean_entry(Qcow2Cache *c, int i)
> +{
> + Qcow2CachedTable *t = &c->entries[i];
> + return t->ref == 0 && !t->dirty && t->offset != 0 &&
> + t->lru_counter <= c->cache_clean_lru_counter;
> +}
> +
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
> +{
> + int i = 0;
> + while (i < c->size) {
> + int to_clean = 0;
> +
> + /* Skip the entries that we don't need to clean */
> + while (i < c->size && !can_clean_entry(c, i)) {
> + i++;
> + }
> +
> + /* And count how many we can clean in a row */
> + while (i < c->size && can_clean_entry(c, i)) {
> + c->entries[i].offset = 0;
> + c->entries[i].lru_counter = 0;
> + i++;
> + to_clean++;
> + }
> +
> + if (to_clean > 0) {
> + qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
> + }
> + }
> +
> + c->cache_clean_lru_counter = c->lru_counter;
> +}
> +
> Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
> {
> BDRVQcowState *s = bs->opaque;
> diff --git a/block/qcow2.c b/block/qcow2.c
> index f7b4cc6..abafcdf 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -468,6 +468,11 @@ static QemuOptsList qcow2_runtime_opts = {
> .type = QEMU_OPT_SIZE,
> .help = "Maximum refcount block cache size",
> },
> + {
> + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
> + .type = QEMU_OPT_NUMBER,
> + .help = "Clean unused cache entries after this time (in seconds)",
> + },
> { /* end of list */ }
> },
> };
> @@ -483,6 +488,49 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
> [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
> };
>
> +static void cache_clean_timer_cb(void *opaque)
> +{
> + BlockDriverState *bs = opaque;
> + BDRVQcowState *s = bs->opaque;
> + qcow2_cache_clean_unused(bs, s->l2_table_cache);
> + qcow2_cache_clean_unused(bs, s->refcount_block_cache);
> + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
> + (int64_t) s->cache_clean_interval * 1000);
> +}
> +
> +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
> +{
> + BDRVQcowState *s = bs->opaque;
> + if (s->cache_clean_interval > 0) {
> + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
> + SCALE_MS, cache_clean_timer_cb,
> + bs);
> + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
> + (int64_t) s->cache_clean_interval * 1000);
> + }
> +}
> +
> +static void cache_clean_timer_del(BlockDriverState *bs)
> +{
> + BDRVQcowState *s = bs->opaque;
> + if (s->cache_clean_timer) {
> + timer_del(s->cache_clean_timer);
> + timer_free(s->cache_clean_timer);
> + s->cache_clean_timer = NULL;
> + }
> +}
> +
> +static void qcow2_detach_aio_context(BlockDriverState *bs)
> +{
> + cache_clean_timer_del(bs);
> +}
> +
> +static void qcow2_attach_aio_context(BlockDriverState *bs,
> + AioContext *new_context)
> +{
> + cache_clean_timer_init(bs, new_context);
> +}
> +
> static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size,
> uint64_t *refcount_cache_size, Error **errp)
> {
> @@ -552,6 +600,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
> const char *opt_overlap_check, *opt_overlap_check_template;
> int overlap_check_template = 0;
> uint64_t l2_cache_size, refcount_cache_size;
> + uint64_t cache_clean_interval;
>
> ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
> if (ret < 0) {
> @@ -839,6 +888,16 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
> goto fail;
> }
>
> + cache_clean_interval =
> + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0);
> + if (cache_clean_interval > UINT_MAX) {
> + error_setg(errp, "Cache clean interval too big");
> + ret = -EINVAL;
> + goto fail;
> + }
> + s->cache_clean_interval = cache_clean_interval;
> + cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
> +
> s->cluster_cache = g_malloc(s->cluster_size);
> /* one more sector for decompressed data alignment */
> s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
> @@ -1004,6 +1063,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
> qemu_vfree(s->l1_table);
> /* else pre-write overlap checks in cache_destroy may crash */
> s->l1_table = NULL;
> + cache_clean_timer_del(bs);
> if (s->l2_table_cache) {
> qcow2_cache_destroy(bs, s->l2_table_cache);
> }
> @@ -1458,6 +1518,7 @@ static void qcow2_close(BlockDriverState *bs)
> }
> }
>
> + cache_clean_timer_del(bs);
> qcow2_cache_destroy(bs, s->l2_table_cache);
> qcow2_cache_destroy(bs, s->refcount_block_cache);
>
> @@ -2552,6 +2613,8 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
> };
> }
>
> + spec_info->qcow2->cache_clean_interval = s->cache_clean_interval;
> +
> return spec_info;
> }
>
> @@ -2970,6 +3033,9 @@ BlockDriver bdrv_qcow2 = {
> .create_opts = &qcow2_create_opts,
> .bdrv_check = qcow2_check,
> .bdrv_amend_options = qcow2_amend_options,
> +
> + .bdrv_detach_aio_context = qcow2_detach_aio_context,
> + .bdrv_attach_aio_context = qcow2_attach_aio_context,
> };
>
> static void bdrv_qcow2_init(void)
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 0076512..2c45eb2 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -93,6 +93,7 @@
> #define QCOW2_OPT_CACHE_SIZE "cache-size"
> #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
> #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
> +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
>
> typedef struct QCowHeader {
> uint32_t magic;
> @@ -236,6 +237,8 @@ typedef struct BDRVQcowState {
>
> Qcow2Cache* l2_table_cache;
> Qcow2Cache* refcount_block_cache;
> + QEMUTimer *cache_clean_timer;
> + unsigned cache_clean_interval;
>
> uint8_t *cluster_cache;
> uint8_t *cluster_data;
> @@ -581,6 +584,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
> Qcow2Cache *dependency);
> void qcow2_cache_depends_on_flush(Qcow2Cache *c);
>
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
> int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
>
> int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 863ffea..3a9b590 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -41,6 +41,10 @@
> # @corrupt: #optional true if the image has been marked corrupt; only valid for
> # compat >= 1.1 (since 2.2)
> #
> +# @cache-clean-interval: interval in seconds after which unused L2 and
> +# refcount cache entries are removed. If 0 then
> +# this feature is not enabled (since 2.4)
> +#
> # @refcount-bits: width of a refcount entry in bits (since 2.3)
> #
> # Since: 1.7
> @@ -50,7 +54,8 @@
> 'compat': 'str',
> '*lazy-refcounts': 'bool',
> '*corrupt': 'bool',
> - 'refcount-bits': 'int'
> + 'refcount-bits': 'int',
> + 'cache-clean-interval': 'int'
> } }
I'm not too happy about making this part of ImageInfoSpecificQCow2. Two
reasons for this: First, it's eventually part of ImageInfo, which is
defined as "Information about a QEMU image file", but this option cannot
be set in the image file itself but is only a run-time option.
Second, my original goal for ImageInfoSpecific was to provide more
information through qemu-img info, and this value will look pretty
strange there.
I don't know how to resolve this quandary, though. Since qemu cannot
change this interval by itself, I think not providing an interface for
reading it is fine. On the other hand, if Eric finds such an interface
absolutely mandatory, I can't think of a better place to return it than
here.
The only solution which would satisfy both requirements would be another
structure which contains "online" flags, and thus is not evaluated by
qemu-img info, but only by QMP commands. But that's ugly.
Max
> ##
> @@ -1538,6 +1543,9 @@
> # @refcount-cache-size: #optional the maximum size of the refcount block cache
> # in bytes (since 2.2)
> #
> +# @cache-clean-interval: #optional clean unused entries in the L2 and refcount
> +# caches. The interval is in seconds (since 2.4)
> +#
> # Since: 1.7
> ##
> { 'struct': 'BlockdevOptionsQcow2',
> @@ -1549,7 +1557,8 @@
> '*overlap-check': 'Qcow2OverlapChecks',
> '*cache-size': 'int',
> '*l2-cache-size': 'int',
> - '*refcount-cache-size': 'int' } }
> + '*refcount-cache-size': 'int',
> + '*cache-clean-interval': 'int' } }
>
>
> ##
next prev parent reply other threads:[~2015-05-28 14:56 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-27 9:46 [Qemu-devel] [PATCH v3 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-27 9:46 ` [Qemu-devel] [PATCH 1/3] qcow2: mark the memory as no longer needed after qcow2_cache_empty() Alberto Garcia
2015-05-27 9:46 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-05-27 12:27 ` Eric Blake
2015-05-27 14:34 ` Alberto Garcia
2015-05-28 14:47 ` Max Reitz
2015-05-28 14:56 ` Max Reitz [this message]
2015-05-28 15:02 ` Alberto Garcia
2015-05-28 15:04 ` Max Reitz
2015-05-28 15:06 ` Max Reitz
2015-05-28 15:13 ` Eric Blake
2015-05-28 15:14 ` Max Reitz
2015-05-28 15:19 ` Alberto Garcia
2015-05-28 15:23 ` Max Reitz
2015-05-28 15:30 ` Alberto Garcia
2015-05-28 19:41 ` Eric Blake
2015-05-29 8:32 ` Kevin Wolf
2015-05-28 16:44 ` Kevin Wolf
2015-05-28 17:03 ` Alberto Garcia
2015-05-27 9:46 ` [Qemu-devel] [PATCH 3/3] qcow2: reorder fields in Qcow2CachedTable to reduce padding Alberto Garcia
-- strict thread matches above, loose matches on Subject: below --
2015-05-29 9:24 [Qemu-devel] [PATCH v4 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-29 9:24 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-06-02 11:04 ` Kevin Wolf
2015-05-26 17:14 [Qemu-devel] [PATCH v2 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-26 17:14 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-05-26 17:26 ` Max Reitz
2015-05-26 18:52 ` Eric Blake
2015-05-26 19:10 ` Alberto Garcia
2015-05-26 19:15 ` Eric Blake
2015-05-18 16:48 [Qemu-devel] [PATCH 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-18 16:48 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-05-26 16:07 ` Max Reitz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55672CA2.10105@redhat.com \
--to=mreitz@redhat.com \
--cc=berto@igalia.com \
--cc=kwolf@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.