All of lore.kernel.org
 help / color / mirror / Atom feed
From: "bauerchen(陈蒙蒙)" <bauerchen@tencent.com>
To: qemu-devel <qemu-devel@nongnu.org>
Cc: "pbonzini >" <pbonzini@redhat.com>
Subject: Requesting review  about optimizing large guest start up time
Date: Tue, 11 Feb 2020 12:08:18 +0000	[thread overview]
Message-ID: <e9dfa1311de74824983e769ea197c2e6@tencent.com> (raw)

From c882b155466313fcd85ac330a45a573e608b0d74 Mon Sep 17 00:00:00 2001
From: bauerchen <bauerchen@tencent.com>
Date: Tue, 11 Feb 2020 17:10:35 +0800
Subject: [PATCH] Optimize: large guest start-up in mem-prealloc
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit

[desc]:
    Large memory VM starts slowly when using -mem-prealloc, and
    there are some areas to optimize in current method;

    1、mmap will be used to alloc threads stack during create page
    clearing threads, and it will attempt mm->mmap_sem for write
    lock, but clearing threads have hold read lock, this competition
    will cause threads createion very slow;

    2、methods of calcuating pages for per threads is not well;if we use
    64 threads to split 160 hugepage,63 threads clear 2page,1 thread
    clear 34 page,so the entire speed is very slow;

    to solve the first problem,we add a mutex in thread function,and
    start all threads when all threads finished createion;
    and the second problem, we spread remainder to other threads,in
    situation that 160 hugepage and 64 threads, there are 32 threads
    clear 3 pages,and 32 threads clear 2 pages;
[test]:
    320G 84c VM start time can be reduced to 10s
    680G 84c VM start time can be reduced to 18s

Signed-off-by: bauerchen <bauerchen@tencent.com>
Reviewed-by:Pan Rui <ruippan@tencent.com>
Reviewed-by:Ivan Ren <ivanren@tencent.com>
---
 util/oslib-posix.c | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 5a291cc..e97369b 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -76,6 +76,10 @@ static MemsetThread *memset_thread;
 static int memset_num_threads;
 static bool memset_thread_failed;
 
+static QemuMutex page_mutex;
+static QemuCond page_cond;
+static volatile bool thread_create_flag;
+
 int qemu_get_thread_id(void)
 {
 #if defined(__linux__)
@@ -403,6 +407,14 @@ static void *do_touch_pages(void *arg)
     MemsetThread *memset_args = (MemsetThread *)arg;
     sigset_t set, oldset;
 
+    /*wait for all threads create finished */
+    qemu_mutex_lock(&page_mutex);
+    while(!thread_create_flag){
+        qemu_cond_wait(&page_cond, &page_mutex);
+    }
+    qemu_mutex_unlock(&page_mutex);
+
+
     /* unblock SIGBUS */
     sigemptyset(&set);
     sigaddset(&set, SIGBUS);
@@ -448,30 +460,46 @@ static inline int get_memset_num_threads(int smp_cpus)
     return ret;
 }
 
+static void calc_page_per_thread(size_t numpages, int memset_threads, size_t *pages_per_thread){
+    int avg = numpages / memset_threads + 1;
+    int i = 0;
+    int last = avg * memset_threads - numpages;
+    for (i = 0; i < memset_threads; i++)
+    {
+        if(memset_threads - i <= last){
+            pages_per_thread[i] = avg - 1;
+        }else
+            pages_per_thread[i] = avg;
+    }
+}
+
 static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
                             int smp_cpus)
 {
-    size_t numpages_per_thread;
-    size_t size_per_thread;
+    size_t *numpages_per_thread;
     char *addr = area;
     int i = 0;
 
     memset_thread_failed = false;
+    thread_create_flag = false;
     memset_num_threads = get_memset_num_threads(smp_cpus);
+    numpages_per_thread = g_new0(size_t, memset_num_threads);
     memset_thread = g_new0(MemsetThread, memset_num_threads);
-    numpages_per_thread = (numpages / memset_num_threads);
-    size_per_thread = (hpagesize * numpages_per_thread);
+    calc_page_per_thread(numpages, memset_num_threads, numpages_per_thread);
+
     for (i = 0; i < memset_num_threads; i++) {
         memset_thread[i].addr = addr;
-        memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
-                                    numpages : numpages_per_thread;
+        memset_thread[i].numpages = numpages_per_thread[i];
         memset_thread[i].hpagesize = hpagesize;
         qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
                            do_touch_pages, &memset_thread[i],
                            QEMU_THREAD_JOINABLE);
-        addr += size_per_thread;
-        numpages -= numpages_per_thread;
+        addr += numpages_per_thread[i] * hpagesize;
+        numpages -= numpages_per_thread[i];
     }
+    thread_create_flag = true;
+    qemu_cond_broadcast(&page_cond);
+
     for (i = 0; i < memset_num_threads; i++) {
         qemu_thread_join(&memset_thread[i].pgthread);
     }
-- 
1.8.3.1

             reply	other threads:[~2020-02-11 13:36 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-11 12:08 bauerchen(陈蒙蒙) [this message]
2020-02-20 11:11 ` Requesting review about optimizing large guest start up time Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e9dfa1311de74824983e769ea197c2e6@tencent.com \
    --to=bauerchen@tencent.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.