qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Pranith Kumar <bobby.prani@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	Sergey Fedorov <serge.fdrv@gmail.com>,
	qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items
Date: Tue, 05 Sep 2017 13:28:43 +0100	[thread overview]
Message-ID: <87k21dmiro.fsf@linaro.org> (raw)
In-Reply-To: <20170829063313.10237-2-bobby.prani@gmail.com>


Pranith Kumar <bobby.prani@gmail.com> writes:

> Using heaptrack, I found that quite a few of our temporary allocations
> are coming from allocating work items. Instead of doing this
> continously, we can cache the allocated items and reuse them instead
> of freeing them.
>
> Stats from an ARM64 guest (boot+shutdown):
>
> heaptrack stats(before):
>         allocations:            1471317
>         leaked allocations:     73824
>         temporary allocations:  651293
>
> heaptrack stats(after):
>         allocations:            1143130
>         leaked allocations:     73693
>         temporary allocations:  487342
>
> The improvement in speedup is minor and within error margins, however I think the
> patch is still worth. We can also explore atomics instead of taking a lock for
> the work item pool.

When we where doing the original MTTCG work I looked at using GArray for
the work queue, see:

  http://lists.gnu.org/archive/html/qemu-devel/2016-08/msg00367.html

specifically:

  Subject: [PATCH v5 13/13] cpu-exec: replace cpu->queued_work with GArray
  Date: Tue,  2 Aug 2016 18:27:44 +0100
  Message-Id: <1470158864-17651-14-git-send-email-alex.bennee@linaro.org>

which I personally think might yield better results than messing around
with custom allocators and GSlice and the like. You still get the
dynamic sizing of a malloc based array but for operations like insertion
and iterating through the work queue should be cache friendly.

Once the array has (transparently) reached a reasonable size to service
all allocations in the usual servicing period the same memory can be
used over and over again ;-)

My fondness for arrays is informed by comments by Bjarne Stroustrup:

  https://www.youtube.com/watch?v=YQs6IC-vgmo

Obviously this patch would need to be re-worked given how much the code
has changes since it was merged.

>
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  cpus-common.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 60 insertions(+), 15 deletions(-)
>
> diff --git a/cpus-common.c b/cpus-common.c
> index 59f751ecf9..ccf5f50e4e 100644
> --- a/cpus-common.c
> +++ b/cpus-common.c
> @@ -24,6 +24,7 @@
>  #include "sysemu/cpus.h"
>
>  static QemuMutex qemu_cpu_list_lock;
> +static QemuMutex qemu_wi_pool_lock;
>  static QemuCond exclusive_cond;
>  static QemuCond exclusive_resume;
>  static QemuCond qemu_work_cond;
> @@ -33,6 +34,49 @@ static QemuCond qemu_work_cond;
>   */
>  static int pending_cpus;
>
> +typedef struct qemu_work_item {
> +    struct qemu_work_item *next;
> +    run_on_cpu_func func;
> +    run_on_cpu_data data;
> +    bool free, exclusive, done;
> +} qemu_work_item;
> +
> +typedef struct qemu_wi_pool {
> +    qemu_work_item *head;
> +    int num_items;
> +} qemu_wi_pool;
> +
> +qemu_wi_pool *wi_free_pool;
> +
> +static void qemu_init_workitem_pool(void)
> +{
> +    wi_free_pool = g_malloc0(sizeof(qemu_wi_pool));
> +}
> +
> +static void qemu_wi_pool_insert(qemu_work_item *item)
> +{
> +    qemu_mutex_lock(&qemu_wi_pool_lock);
> +    qemu_work_item *curr = atomic_read(&wi_free_pool->head);
> +    item->next = curr;
> +    wi_free_pool->head = item;
> +    qemu_mutex_unlock(&qemu_wi_pool_lock);
> +}
> +
> +static qemu_work_item *qemu_wi_pool_remove(void)
> +{
> +    qemu_mutex_lock(&qemu_wi_pool_lock);
> +    qemu_work_item *curr = atomic_read(&wi_free_pool->head);
> +    if (curr == NULL) {
> +        goto out;
> +    }
> +    wi_free_pool->head = curr->next;
> +    curr->next = NULL;
> +
> + out:
> +    qemu_mutex_unlock(&qemu_wi_pool_lock);
> +    return curr;
> +}
> +
>  void qemu_init_cpu_list(void)
>  {
>      /* This is needed because qemu_init_cpu_list is also called by the
> @@ -43,6 +87,9 @@ void qemu_init_cpu_list(void)
>      qemu_cond_init(&exclusive_cond);
>      qemu_cond_init(&exclusive_resume);
>      qemu_cond_init(&qemu_work_cond);
> +
> +    qemu_init_workitem_pool();
> +    qemu_mutex_init(&qemu_wi_pool_lock);
>  }
>
>  void cpu_list_lock(void)
> @@ -106,14 +153,7 @@ void cpu_list_remove(CPUState *cpu)
>      qemu_mutex_unlock(&qemu_cpu_list_lock);
>  }
>
> -struct qemu_work_item {
> -    struct qemu_work_item *next;
> -    run_on_cpu_func func;
> -    run_on_cpu_data data;
> -    bool free, exclusive, done;
> -};
> -
> -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> +static void queue_work_on_cpu(CPUState *cpu, qemu_work_item *wi)
>  {
>      qemu_mutex_lock(&cpu->work_mutex);
>      if (cpu->queued_work_first == NULL) {
> @@ -132,7 +172,7 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
>  void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
>                     QemuMutex *mutex)
>  {
> -    struct qemu_work_item wi;
> +    qemu_work_item wi;
>
>      if (qemu_cpu_is_self(cpu)) {
>          func(cpu, data);
> @@ -156,9 +196,11 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
>
>  void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
>  {
> -    struct qemu_work_item *wi;
> +    qemu_work_item *wi = qemu_wi_pool_remove();
>
> -    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    if (!wi) {
> +        wi = g_malloc0(sizeof(qemu_work_item));
> +    }
>      wi->func = func;
>      wi->data = data;
>      wi->free = true;
> @@ -299,9 +341,11 @@ void cpu_exec_end(CPUState *cpu)
>  void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
>                             run_on_cpu_data data)
>  {
> -    struct qemu_work_item *wi;
> +    qemu_work_item *wi = qemu_wi_pool_remove();
>
> -    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    if (!wi) {
> +        wi = g_malloc0(sizeof(qemu_work_item));
> +    }
>      wi->func = func;
>      wi->data = data;
>      wi->free = true;
> @@ -312,7 +356,7 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
>
>  void process_queued_cpu_work(CPUState *cpu)
>  {
> -    struct qemu_work_item *wi;
> +    qemu_work_item *wi;
>
>      if (cpu->queued_work_first == NULL) {
>          return;
> @@ -343,7 +387,8 @@ void process_queued_cpu_work(CPUState *cpu)
>          }
>          qemu_mutex_lock(&cpu->work_mutex);
>          if (wi->free) {
> -            g_free(wi);
> +            memset(wi, 0, sizeof(qemu_work_item));
> +            qemu_wi_pool_insert(wi);
>          } else {
>              atomic_mb_set(&wi->done, true);
>          }


--
Alex Bennée

  reply	other threads:[~2017-09-05 12:28 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-29  6:33 [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items Pranith Kumar
2017-09-05 12:28   ` Alex Bennée [this message]
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering Pranith Kumar
2017-08-29 14:51   ` Richard Henderson
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics Pranith Kumar
2017-08-29 14:53   ` Richard Henderson
2017-09-02  1:44   ` Emilio G. Cota
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches Pranith Kumar
2017-08-29 15:01   ` Richard Henderson
2017-08-29 16:23     ` Pranith Kumar
2017-08-29 15:03   ` Richard Henderson
2017-09-05 12:02 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Alex Bennée
2017-09-06  0:35   ` Pranith Kumar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87k21dmiro.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=bobby.prani@gmail.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=serge.fdrv@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).