qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: Alberto Garcia <berto@igalia.com>, qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	qemu-block@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>
Subject: Re: [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time
Date: Thu, 28 May 2015 16:56:34 +0200	[thread overview]
Message-ID: <55672CA2.10105@redhat.com> (raw)
In-Reply-To: <8007efe81120cd72f7c4145b8bbc3f4bc558e62d.1432719752.git.berto@igalia.com>

On 27.05.2015 11:46, Alberto Garcia wrote:
> This adds a new 'cache-clean-interval' option that cleans all qcow2
> cache entries that haven't been used in a certain interval, given in
> seconds.
>
> This allows setting a large L2 cache size so it can handle scenarios
> with lots of I/O and at the same time use little memory during periods
> of inactivity.
>
> This feature currently relies on MADV_DONTNEED to free that memory, so
> it is not useful in systems that don't follow that behavior.
>
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> Cc: Max Reitz <mreitz@redhat.com>
> ---
>   block/qcow2-cache.c  | 35 ++++++++++++++++++++++++++++
>   block/qcow2.c        | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>   block/qcow2.h        |  4 ++++
>   qapi/block-core.json | 13 +++++++++--
>   4 files changed, 116 insertions(+), 2 deletions(-)
>
> diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
> index ed14a92..a215f5b 100644
> --- a/block/qcow2-cache.c
> +++ b/block/qcow2-cache.c
> @@ -43,6 +43,7 @@ struct Qcow2Cache {
>       bool                    depends_on_flush;
>       void                   *table_array;
>       uint64_t                lru_counter;
> +    uint64_t                cache_clean_lru_counter;
>   };
>   
>   static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
> @@ -78,6 +79,40 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
>   #endif
>   }
>   
> +static inline bool can_clean_entry(Qcow2Cache *c, int i)
> +{
> +    Qcow2CachedTable *t = &c->entries[i];
> +    return t->ref == 0 && !t->dirty && t->offset != 0 &&
> +        t->lru_counter <= c->cache_clean_lru_counter;
> +}
> +
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
> +{
> +    int i = 0;
> +    while (i < c->size) {
> +        int to_clean = 0;
> +
> +        /* Skip the entries that we don't need to clean */
> +        while (i < c->size && !can_clean_entry(c, i)) {
> +            i++;
> +        }
> +
> +        /* And count how many we can clean in a row */
> +        while (i < c->size && can_clean_entry(c, i)) {
> +            c->entries[i].offset = 0;
> +            c->entries[i].lru_counter = 0;
> +            i++;
> +            to_clean++;
> +        }
> +
> +        if (to_clean > 0) {
> +            qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
> +        }
> +    }
> +
> +    c->cache_clean_lru_counter = c->lru_counter;
> +}
> +
>   Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
>   {
>       BDRVQcowState *s = bs->opaque;
> diff --git a/block/qcow2.c b/block/qcow2.c
> index f7b4cc6..abafcdf 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -468,6 +468,11 @@ static QemuOptsList qcow2_runtime_opts = {
>               .type = QEMU_OPT_SIZE,
>               .help = "Maximum refcount block cache size",
>           },
> +        {
> +            .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "Clean unused cache entries after this time (in seconds)",
> +        },
>           { /* end of list */ }
>       },
>   };
> @@ -483,6 +488,49 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
>       [QCOW2_OL_INACTIVE_L2_BITNR]    = QCOW2_OPT_OVERLAP_INACTIVE_L2,
>   };
>   
> +static void cache_clean_timer_cb(void *opaque)
> +{
> +    BlockDriverState *bs = opaque;
> +    BDRVQcowState *s = bs->opaque;
> +    qcow2_cache_clean_unused(bs, s->l2_table_cache);
> +    qcow2_cache_clean_unused(bs, s->refcount_block_cache);
> +    timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
> +              (int64_t) s->cache_clean_interval * 1000);
> +}
> +
> +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    if (s->cache_clean_interval > 0) {
> +        s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
> +                                             SCALE_MS, cache_clean_timer_cb,
> +                                             bs);
> +        timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
> +                  (int64_t) s->cache_clean_interval * 1000);
> +    }
> +}
> +
> +static void cache_clean_timer_del(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    if (s->cache_clean_timer) {
> +        timer_del(s->cache_clean_timer);
> +        timer_free(s->cache_clean_timer);
> +        s->cache_clean_timer = NULL;
> +    }
> +}
> +
> +static void qcow2_detach_aio_context(BlockDriverState *bs)
> +{
> +    cache_clean_timer_del(bs);
> +}
> +
> +static void qcow2_attach_aio_context(BlockDriverState *bs,
> +                                     AioContext *new_context)
> +{
> +    cache_clean_timer_init(bs, new_context);
> +}
> +
>   static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size,
>                                uint64_t *refcount_cache_size, Error **errp)
>   {
> @@ -552,6 +600,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>       const char *opt_overlap_check, *opt_overlap_check_template;
>       int overlap_check_template = 0;
>       uint64_t l2_cache_size, refcount_cache_size;
> +    uint64_t cache_clean_interval;
>   
>       ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
>       if (ret < 0) {
> @@ -839,6 +888,16 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>           goto fail;
>       }
>   
> +    cache_clean_interval =
> +        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0);
> +    if (cache_clean_interval > UINT_MAX) {
> +        error_setg(errp, "Cache clean interval too big");
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    s->cache_clean_interval = cache_clean_interval;
> +    cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
> +
>       s->cluster_cache = g_malloc(s->cluster_size);
>       /* one more sector for decompressed data alignment */
>       s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
> @@ -1004,6 +1063,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>       qemu_vfree(s->l1_table);
>       /* else pre-write overlap checks in cache_destroy may crash */
>       s->l1_table = NULL;
> +    cache_clean_timer_del(bs);
>       if (s->l2_table_cache) {
>           qcow2_cache_destroy(bs, s->l2_table_cache);
>       }
> @@ -1458,6 +1518,7 @@ static void qcow2_close(BlockDriverState *bs)
>           }
>       }
>   
> +    cache_clean_timer_del(bs);
>       qcow2_cache_destroy(bs, s->l2_table_cache);
>       qcow2_cache_destroy(bs, s->refcount_block_cache);
>   
> @@ -2552,6 +2613,8 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
>           };
>       }
>   
> +    spec_info->qcow2->cache_clean_interval = s->cache_clean_interval;
> +
>       return spec_info;
>   }
>   
> @@ -2970,6 +3033,9 @@ BlockDriver bdrv_qcow2 = {
>       .create_opts         = &qcow2_create_opts,
>       .bdrv_check          = qcow2_check,
>       .bdrv_amend_options  = qcow2_amend_options,
> +
> +    .bdrv_detach_aio_context  = qcow2_detach_aio_context,
> +    .bdrv_attach_aio_context  = qcow2_attach_aio_context,
>   };
>   
>   static void bdrv_qcow2_init(void)
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 0076512..2c45eb2 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -93,6 +93,7 @@
>   #define QCOW2_OPT_CACHE_SIZE "cache-size"
>   #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
>   #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
> +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
>   
>   typedef struct QCowHeader {
>       uint32_t magic;
> @@ -236,6 +237,8 @@ typedef struct BDRVQcowState {
>   
>       Qcow2Cache* l2_table_cache;
>       Qcow2Cache* refcount_block_cache;
> +    QEMUTimer *cache_clean_timer;
> +    unsigned cache_clean_interval;
>   
>       uint8_t *cluster_cache;
>       uint8_t *cluster_data;
> @@ -581,6 +584,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
>       Qcow2Cache *dependency);
>   void qcow2_cache_depends_on_flush(Qcow2Cache *c);
>   
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
>   int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
>   
>   int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 863ffea..3a9b590 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -41,6 +41,10 @@
>   # @corrupt: #optional true if the image has been marked corrupt; only valid for
>   #           compat >= 1.1 (since 2.2)
>   #
> +# @cache-clean-interval: interval in seconds after which unused L2 and
> +#                        refcount cache entries are removed. If 0 then
> +#                        this feature is not enabled (since 2.4)
> +#
>   # @refcount-bits: width of a refcount entry in bits (since 2.3)
>   #
>   # Since: 1.7
> @@ -50,7 +54,8 @@
>         'compat': 'str',
>         '*lazy-refcounts': 'bool',
>         '*corrupt': 'bool',
> -      'refcount-bits': 'int'
> +      'refcount-bits': 'int',
> +      'cache-clean-interval': 'int'
>     } }

I'm not too happy about making this part of ImageInfoSpecificQCow2. Two 
reasons for this: First, it's eventually part of ImageInfo, which is 
defined as "Information about a QEMU image file", but this option cannot 
be set in the image file itself but is only a run-time option.

Second, my original goal for ImageInfoSpecific was to provide more 
information through qemu-img info, and this value will look pretty 
strange there.

I don't know how to resolve this quandary, though. Since qemu cannot 
change this interval by itself, I think not providing an interface for 
reading it is fine. On the other hand, if Eric finds such an interface 
absolutely mandatory, I can't think of a better place to return it than 
here.

The only solution which would satisfy both requirements would be another 
structure which contains "online" flags, and thus is not evaluated by 
qemu-img info, but only by QMP commands. But that's ugly.

Max

>   ##
> @@ -1538,6 +1543,9 @@
>   # @refcount-cache-size:   #optional the maximum size of the refcount block cache
>   #                         in bytes (since 2.2)
>   #
> +# @cache-clean-interval:  #optional clean unused entries in the L2 and refcount
> +#                         caches. The interval is in seconds (since 2.4)
> +#
>   # Since: 1.7
>   ##
>   { 'struct': 'BlockdevOptionsQcow2',
> @@ -1549,7 +1557,8 @@
>               '*overlap-check': 'Qcow2OverlapChecks',
>               '*cache-size': 'int',
>               '*l2-cache-size': 'int',
> -            '*refcount-cache-size': 'int' } }
> +            '*refcount-cache-size': 'int',
> +            '*cache-clean-interval': 'int' } }
>   
>   
>   ##

  parent reply	other threads:[~2015-05-28 14:56 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-27  9:46 [Qemu-devel] [PATCH v3 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-27  9:46 ` [Qemu-devel] [PATCH 1/3] qcow2: mark the memory as no longer needed after qcow2_cache_empty() Alberto Garcia
2015-05-27  9:46 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-05-27 12:27   ` Eric Blake
2015-05-27 14:34     ` Alberto Garcia
2015-05-28 14:47     ` Max Reitz
2015-05-28 14:56   ` Max Reitz [this message]
2015-05-28 15:02     ` Alberto Garcia
2015-05-28 15:04       ` Max Reitz
2015-05-28 15:06         ` Max Reitz
2015-05-28 15:13     ` Eric Blake
2015-05-28 15:14       ` Max Reitz
2015-05-28 15:19         ` Alberto Garcia
2015-05-28 15:23           ` Max Reitz
2015-05-28 15:30             ` Alberto Garcia
2015-05-28 19:41             ` Eric Blake
2015-05-29  8:32               ` Kevin Wolf
2015-05-28 16:44       ` Kevin Wolf
2015-05-28 17:03         ` Alberto Garcia
2015-05-27  9:46 ` [Qemu-devel] [PATCH 3/3] qcow2: reorder fields in Qcow2CachedTable to reduce padding Alberto Garcia
  -- strict thread matches above, loose matches on Subject: below --
2015-05-29  9:24 [Qemu-devel] [PATCH v4 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-29  9:24 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-06-02 11:04   ` Kevin Wolf
2015-05-26 17:14 [Qemu-devel] [PATCH v2 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-26 17:14 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-05-26 17:26   ` Max Reitz
2015-05-26 18:52   ` Eric Blake
2015-05-26 19:10     ` Alberto Garcia
2015-05-26 19:15       ` Eric Blake
2015-05-18 16:48 [Qemu-devel] [PATCH 0/3] Clean unused entries in the qcow2 L2/refcount cache Alberto Garcia
2015-05-18 16:48 ` [Qemu-devel] [PATCH 2/3] qcow2: add option to clean unused cache entries after some time Alberto Garcia
2015-05-26 16:07   ` Max Reitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55672CA2.10105@redhat.com \
    --to=mreitz@redhat.com \
    --cc=berto@igalia.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).