qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Liguori <anthony@codemonkey.ws>
To: Avi Kivity <avi@redhat.com>
Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org
Subject: Re: [Qemu-devel] [PATCH] posix-aio-compat: fix latency issues
Date: Mon, 08 Aug 2011 07:34:47 -0500	[thread overview]
Message-ID: <4E3FD7E7.4090509@codemonkey.ws> (raw)
In-Reply-To: <1312803458-2272-1-git-send-email-avi@redhat.com>

On 08/08/2011 06:37 AM, Avi Kivity wrote:
> In certain circumstances, posix-aio-compat can incur a lot of latency:
>   - threads are created by vcpu threads, so if vcpu affinity is set,
>     aio threads inherit vcpu affinity.  This can cause many aio threads
>     to compete for one cpu.
>   - we can create up to max_threads (64) aio threads in one go; since a
>     pthread_create can take around 30μs, we have up to 2ms of cpu time
>     under a global lock.
>
> Fix by:
>   - moving thread creation to the main thread, so we inherit the main
>     thread's affinity instead of the vcpu thread's affinity.
>   - if a thread is currently being created, and we need to create yet
>     another thread, let thread being born create the new thread, reducing
>     the amount of time we spend under the main thread.
>   - drop the local lock while creating a thread (we may still hold the
>     global mutex, though)
>
> Note this doesn't eliminate latency completely; scheduler artifacts or
> lack of host cpu resources can still cause it.  We may want pre-allocated
> threads when this cannot be tolerated.
>
> Thanks to Uli Obergfell of Red Hat for his excellent analysis and suggestions.

Do you have a scenario where you can measure the benefits of this 
change?  The idle time in the thread pool is rather large, it surprises 
me that it'd be an issue in practice.

Regards,

Anthony Liguori

>
> Signed-off-by: Avi Kivity<avi@redhat.com>
> ---
>   posix-aio-compat.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++--
>   1 files changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/posix-aio-compat.c b/posix-aio-compat.c
> index 8dc00cb..aa30673 100644
> --- a/posix-aio-compat.c
> +++ b/posix-aio-compat.c
> @@ -30,6 +30,7 @@
>
>   #include "block/raw-posix-aio.h"
>
> +static void do_spawn_thread(void);
>
>   struct qemu_paiocb {
>       BlockDriverAIOCB common;
> @@ -64,6 +65,9 @@ static pthread_attr_t attr;
>   static int max_threads = 64;
>   static int cur_threads = 0;
>   static int idle_threads = 0;
> +static int new_threads = 0;     /* backlog of threads we need to create */
> +static int pending_threads = 0; /* threads created but not running yet */
> +static QEMUBH *new_thread_bh;
>   static QTAILQ_HEAD(, qemu_paiocb) request_list;
>
>   #ifdef CONFIG_PREADV
> @@ -311,6 +315,13 @@ static void *aio_thread(void *unused)
>
>       pid = getpid();
>
> +    mutex_lock(&lock);
> +    if (new_threads) {
> +        do_spawn_thread();
> +    }
> +    pending_threads--;
> +    mutex_unlock(&lock);
> +
>       while (1) {
>           struct qemu_paiocb *aiocb;
>           ssize_t ret = 0;
> @@ -381,11 +392,18 @@ static void *aio_thread(void *unused)
>       return NULL;
>   }
>
> -static void spawn_thread(void)
> +static void do_spawn_thread(void)
>   {
>       sigset_t set, oldset;
>
> -    cur_threads++;
> +    if (!new_threads) {
> +        return;
> +    }
> +
> +    new_threads--;
> +    pending_threads++;
> +
> +    mutex_unlock(&lock);
>
>       /* block all signals */
>       if (sigfillset(&set)) die("sigfillset");
> @@ -394,6 +412,31 @@ static void spawn_thread(void)
>       thread_create(&thread_id,&attr, aio_thread, NULL);
>
>       if (sigprocmask(SIG_SETMASK,&oldset, NULL)) die("sigprocmask restore");
> +
> +    mutex_lock(&lock);
> +}
> +
> +static void spawn_thread_bh_fn(void *opaque)
> +{
> +    mutex_lock(&lock);
> +    do_spawn_thread();
> +    mutex_unlock(&lock);
> +}
> +
> +static void spawn_thread(void)
> +{
> +    cur_threads++;
> +    new_threads++;
> +    /* If there are threads being created, they will spawn new workers, so
> +     * we don't spend time creating many threads in a loop holding a mutex or
> +     * starving the current vcpu.
> +     *
> +     * If there are no idle threads, ask the main thread to create one, so we
> +     * inherit the correct affinity instead of the vcpu affinity.
> +     */
> +    if (!pending_threads) {
> +        qemu_bh_schedule(new_thread_bh);
> +    }
>   }
>
>   static void qemu_paio_submit(struct qemu_paiocb *aiocb)
> @@ -665,6 +708,7 @@ int paio_init(void)
>           die2(ret, "pthread_attr_setdetachstate");
>
>       QTAILQ_INIT(&request_list);
> +    new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL);
>
>       posix_aio_state = s;
>       return 0;

  reply	other threads:[~2011-08-08 12:34 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-08-08 11:37 [Qemu-devel] [PATCH] posix-aio-compat: fix latency issues Avi Kivity
2011-08-08 12:34 ` Anthony Liguori [this message]
2011-08-08 12:42   ` Avi Kivity
2011-08-08 12:49 ` Frediano Ziglio
2011-08-08 12:54   ` Avi Kivity
2011-08-08 13:21     ` Frediano Ziglio
2011-08-08 13:26       ` Avi Kivity
2011-08-12 13:24 ` Anthony Liguori
2011-08-14  3:43   ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E3FD7E7.4090509@codemonkey.ws \
    --to=anthony@codemonkey.ws \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).