qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v3] XBZRLE: Fix qemu crash when resize the xbzrle cache
@ 2014-02-22  8:52 Gonglei (Arei)
  2014-02-25 15:24 ` Juan Quintela
  0 siblings, 1 reply; 3+ messages in thread
From: Gonglei (Arei) @ 2014-02-22  8:52 UTC (permalink / raw)
  To: qemu-devel@nongnu.org
  Cc: chenliang (T), owasserm@redhat.com, Huangweidong (C),
	Dr. David Alan Gilbert, Juan Quintela

Resizing the xbzrle cache during migration causes qemu-crash,
because the main-thread and migration-thread modify the xbzrle
cache size concurrently without lock-protection.

Signed-off-by: ChenLiang <chenliang88@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
Changes against the previous version:
 *Remove the temporary variable cache in the xbzrle_cache_resize.

 arch_init.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 80574a0..003640f 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -164,26 +164,69 @@ static struct {
     uint8_t *encoded_buf;
     /* buffer for storing page content */
     uint8_t *current_buf;
-    /* Cache for XBZRLE */
+    /* Cache for XBZRLE, Protected by lock. */
     PageCache *cache;
+    QemuMutex lock;
 } XBZRLE = {
     .encoded_buf = NULL,
     .current_buf = NULL,
     .cache = NULL,
+    /* use the lock carefully */
+    .lock = {PTHREAD_MUTEX_INITIALIZER},
 };
 /* buffer used for XBZRLE decoding */
 static uint8_t *xbzrle_decoded_buf;
 
+static void XBZRLE_cache_lock(void)
+{
+    qemu_mutex_lock(&XBZRLE.lock);
+}
+
+static void XBZRLE_cache_unlock(void)
+{
+    qemu_mutex_unlock(&XBZRLE.lock);
+}
+
 int64_t xbzrle_cache_resize(int64_t new_size)
 {
+    PageCache *new_cache, *old_cache;
+
     if (new_size < TARGET_PAGE_SIZE) {
         return -1;
     }
 
+    XBZRLE_cache_lock();
     if (XBZRLE.cache != NULL) {
-        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
-            TARGET_PAGE_SIZE;
+        /* check XBZRLE.cache again later */
+        XBZRLE_cache_unlock();
+        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
+            return pow2floor(new_size);
+        }
+        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
+                                        TARGET_PAGE_SIZE);
+        if (!new_cache) {
+            DPRINTF("Error creating cache\n");
+            return -1;
+        }
+
+        XBZRLE_cache_lock();
+        /* the XBZRLE.cache may have be destroyed, check it again */
+        if (XBZRLE.cache != NULL) {
+            old_cache = XBZRLE.cache;
+            XBZRLE.cache = new_cache;
+            new_cache = NULL;
+        }
+        XBZRLE_cache_unlock();
+
+        if (NULL == new_cache) {
+            cache_fini(old_cache);
+        } else {
+            cache_fini(new_cache);
+        }
+    } else {
+        XBZRLE_cache_unlock();
     }
+
     return pow2floor(new_size);
 }
 
@@ -522,6 +565,8 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             ret = ram_control_save_page(f, block->offset,
                                offset, TARGET_PAGE_SIZE, &bytes_sent);
 
+            XBZRLE_cache_lock();
+
             if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                 if (ret != RAM_SAVE_CONTROL_DELAYED) {
                     if (bytes_sent > 0) {
@@ -553,6 +598,8 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                 acct_info.norm_pages++;
             }
 
+            XBZRLE_cache_unlock();
+
             /* if page is unmodified, continue to the next */
             if (bytes_sent > 0) {
                 last_sent_block = block;
@@ -620,6 +667,7 @@ static void migration_end(void)
         migration_bitmap = NULL;
     }
 
+    XBZRLE_cache_lock();
     if (XBZRLE.cache) {
         cache_fini(XBZRLE.cache);
         g_free(XBZRLE.cache);
@@ -629,6 +677,7 @@ static void migration_end(void)
         XBZRLE.encoded_buf = NULL;
         XBZRLE.current_buf = NULL;
     }
+    XBZRLE_cache_unlock();
 }
 
 static void ram_migration_cancel(void *opaque)
@@ -659,13 +708,16 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     dirty_rate_high_cnt = 0;
 
     if (migrate_use_xbzrle()) {
+        XBZRLE_cache_lock();
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
                                   TARGET_PAGE_SIZE,
                                   TARGET_PAGE_SIZE);
         if (!XBZRLE.cache) {
+            XBZRLE_cache_unlock();
             DPRINTF("Error creating cache\n");
             return -1;
         }
+        XBZRLE_cache_unlock();
 
         /* We prefer not to abort if there is no memory */
         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
-- 
1.7.12.4

Best regards,
-Gonglei

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH v3] XBZRLE: Fix qemu crash when resize the xbzrle cache
  2014-02-22  8:52 [Qemu-devel] [PATCH v3] XBZRLE: Fix qemu crash when resize the xbzrle cache Gonglei (Arei)
@ 2014-02-25 15:24 ` Juan Quintela
  2014-02-26  9:10   ` Gonglei (Arei)
  0 siblings, 1 reply; 3+ messages in thread
From: Juan Quintela @ 2014-02-25 15:24 UTC (permalink / raw)
  To: Gonglei (Arei)
  Cc: chenliang (T), owasserm@redhat.com, Huangweidong (C),
	qemu-devel@nongnu.org, Dr. David Alan Gilbert

"Gonglei (Arei)" <arei.gonglei@huawei.com> wrote:
> Resizing the xbzrle cache during migration causes qemu-crash,
> because the main-thread and migration-thread modify the xbzrle
> cache size concurrently without lock-protection.
>
> Signed-off-by: ChenLiang <chenliang88@huawei.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Sorry for the late review.

> ---
> Changes against the previous version:
>  *Remove the temporary variable cache in the xbzrle_cache_resize.
>
>  arch_init.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 55 insertions(+), 3 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 80574a0..003640f 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -164,26 +164,69 @@ static struct {
>      uint8_t *encoded_buf;
>      /* buffer for storing page content */
>      uint8_t *current_buf;
> -    /* Cache for XBZRLE */
> +    /* Cache for XBZRLE, Protected by lock. */
>      PageCache *cache;
> +    QemuMutex lock;
>  } XBZRLE = {
>      .encoded_buf = NULL,
>      .current_buf = NULL,
>      .cache = NULL,
> +    /* use the lock carefully */
> +    .lock = {PTHREAD_MUTEX_INITIALIZER},
>  };

Have you tried to compile for windows?  I think this would make it
break.  We need to init it with qemu_mutex_init() in ram_save_setup()
after the call to cache_init()?



>  /* buffer used for XBZRLE decoding */
>  static uint8_t *xbzrle_decoded_buf;
>  
> +static void XBZRLE_cache_lock(void)
> +{
> +    qemu_mutex_lock(&XBZRLE.lock);
> +}
> +

if we change this to:

    if (migrate_use_xbzrle())
        qemu_mutex_lock(&XBZRLE.lock);

On both cache operations, we would remove the overhead in the no XBRLE
case, right?

> +static void XBZRLE_cache_unlock(void)
> +{
> +    qemu_mutex_unlock(&XBZRLE.lock);
> +}
> +
>  int64_t xbzrle_cache_resize(int64_t new_size)
>  {
> +    PageCache *new_cache, *old_cache;
> +
>      if (new_size < TARGET_PAGE_SIZE) {
>          return -1;
>      }
>  
> +    XBZRLE_cache_lock();
>      if (XBZRLE.cache != NULL) {
> -        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
> -            TARGET_PAGE_SIZE;
> +        /* check XBZRLE.cache again later */
> +        XBZRLE_cache_unlock();
> +        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
> +            return pow2floor(new_size);
> +        }
> +        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
> +                                        TARGET_PAGE_SIZE);
> +        if (!new_cache) {
> +            DPRINTF("Error creating cache\n");
> +            return -1;
> +        }
> +
> +        XBZRLE_cache_lock();
> +        /* the XBZRLE.cache may have be destroyed, check it again */
> +        if (XBZRLE.cache != NULL) {
> +            old_cache = XBZRLE.cache;
> +            XBZRLE.cache = new_cache;
> +            new_cache = NULL;
> +        }
> +        XBZRLE_cache_unlock();
> +
> +        if (NULL == new_cache) {
> +            cache_fini(old_cache);
> +        } else {
> +            cache_fini(new_cache);
> +        }
> +    } else {
> +        XBZRLE_cache_unlock();
>      }

Can we change this to:

        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
            return pow2floor(new_size);
        }

        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
                                        TARGET_PAGE_SIZE);
        if (!new_cache) {
            DPRINTF("Error creating cache\n");
            return -1;
        }

        XBZRLE_cache_lock();
        /* the XBZRLE.cache may have be destroyed, check it again */
        if (XBZRLE.cache != NULL) {
            cache_to_free = XBZRLE.cache;
            XBZRLE.cache = new_cache;
        } else {
            cache_to_free = new_cache;
        }
        XBZRLE_cache_unlock();

        cache_fini(cache_to_free);

I think that looking is clearer this way.  BTW, we are losing
_everything_ that is on the cache if we resize it.  I don't know if that
is what we want.  If we have a big cache, and make it bigger because we
are having too many misses, just dropping the whole cache looks like a
bad idea :-(


And my understanding is that we should do a:

migrate_get_current()->xbzrle_cache_size = new_size;

independently of what CACHE is equal or different from NULL?
(this was already wrong before your patch, just asking because you are
looking at the code).

Thanks, Juan.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Qemu-devel] [PATCH v3] XBZRLE: Fix qemu crash when resize the xbzrle cache
  2014-02-25 15:24 ` Juan Quintela
@ 2014-02-26  9:10   ` Gonglei (Arei)
  0 siblings, 0 replies; 3+ messages in thread
From: Gonglei (Arei) @ 2014-02-26  9:10 UTC (permalink / raw)
  To: quintela@redhat.com
  Cc: chenliang (T), owasserm@redhat.com, Huangweidong (C),
	qemu-devel@nongnu.org, Dr. David Alan Gilbert

Hi, Juan. Thanks for your review.

> -----Original Message-----
> From: Juan Quintela [mailto:quintela@redhat.com]
> Sent: Tuesday, February 25, 2014 11:25 PM
> To: Gonglei (Arei)
> Cc: qemu-devel@nongnu.org; Dr. David Alan Gilbert; owasserm@redhat.com;
> chenliang (T); Huangweidong (C)
> Subject: Re: [PATCH v3] XBZRLE: Fix qemu crash when resize the xbzrle cache
> 
> "Gonglei (Arei)" <arei.gonglei@huawei.com> wrote:
> > Resizing the xbzrle cache during migration causes qemu-crash,
> > because the main-thread and migration-thread modify the xbzrle
> > cache size concurrently without lock-protection.
> >
> > Signed-off-by: ChenLiang <chenliang88@huawei.com>
> > Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> 
> Sorry for the late review.
> 
> > ---
> > Changes against the previous version:
> >  *Remove the temporary variable cache in the xbzrle_cache_resize.
> >
> >  arch_init.c | 58
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 55 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch_init.c b/arch_init.c
> > index 80574a0..003640f 100644
> > --- a/arch_init.c
> > +++ b/arch_init.c
> > @@ -164,26 +164,69 @@ static struct {
> >      uint8_t *encoded_buf;
> >      /* buffer for storing page content */
> >      uint8_t *current_buf;
> > -    /* Cache for XBZRLE */
> > +    /* Cache for XBZRLE, Protected by lock. */
> >      PageCache *cache;
> > +    QemuMutex lock;
> >  } XBZRLE = {
> >      .encoded_buf = NULL,
> >      .current_buf = NULL,
> >      .cache = NULL,
> > +    /* use the lock carefully */
> > +    .lock = {PTHREAD_MUTEX_INITIALIZER},
> >  };
> 
> Have you tried to compile for windows?  I think this would make it
> break.  We need to init it with qemu_mutex_init() in ram_save_setup()
> after the call to cache_init()?

Yeah,it isn't compatible with windows. There is a problem that we must initialize the lock at qemu startup. 
Because the xbzrle_cache_resize can be called anytime. I don't know how to handle it in windows. 
Do you have any suggestion?
> 
> 
> 
> >  /* buffer used for XBZRLE decoding */
> >  static uint8_t *xbzrle_decoded_buf;
> >
> > +static void XBZRLE_cache_lock(void)
> > +{
> > +    qemu_mutex_lock(&XBZRLE.lock);
> > +}
> > +
> 
> if we change this to:
> 
>     if (migrate_use_xbzrle())
>         qemu_mutex_lock(&XBZRLE.lock);
> 
> On both cache operations, we would remove the overhead in the no XBRLE
> case, right?

Check.
> 
> > +static void XBZRLE_cache_unlock(void)
> > +{
> > +    qemu_mutex_unlock(&XBZRLE.lock);
> > +}
> > +
> >  int64_t xbzrle_cache_resize(int64_t new_size)
> >  {
> > +    PageCache *new_cache, *old_cache;
> > +
> >      if (new_size < TARGET_PAGE_SIZE) {
> >          return -1;
> >      }
> >
> > +    XBZRLE_cache_lock();
> >      if (XBZRLE.cache != NULL) {
> > -        return cache_resize(XBZRLE.cache, new_size /
> TARGET_PAGE_SIZE) *
> > -            TARGET_PAGE_SIZE;
> > +        /* check XBZRLE.cache again later */
> > +        XBZRLE_cache_unlock();
> > +        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
> > +            return pow2floor(new_size);
> > +        }
> > +        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
> > +                                        TARGET_PAGE_SIZE);
> > +        if (!new_cache) {
> > +            DPRINTF("Error creating cache\n");
> > +            return -1;
> > +        }
> > +
> > +        XBZRLE_cache_lock();
> > +        /* the XBZRLE.cache may have be destroyed, check it again */
> > +        if (XBZRLE.cache != NULL) {
> > +            old_cache = XBZRLE.cache;
> > +            XBZRLE.cache = new_cache;
> > +            new_cache = NULL;
> > +        }
> > +        XBZRLE_cache_unlock();
> > +
> > +        if (NULL == new_cache) {
> > +            cache_fini(old_cache);
> > +        } else {
> > +            cache_fini(new_cache);
> > +        }
> > +    } else {
> > +        XBZRLE_cache_unlock();
> >      }
> 
> Can we change this to:
> 
>         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
>             return pow2floor(new_size);
>         }
> 
>         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
>                                         TARGET_PAGE_SIZE);
>         if (!new_cache) {
>             DPRINTF("Error creating cache\n");
>             return -1;
>         }
> 
>         XBZRLE_cache_lock();
>         /* the XBZRLE.cache may have be destroyed, check it again */
>         if (XBZRLE.cache != NULL) {
>             cache_to_free = XBZRLE.cache;
>             XBZRLE.cache = new_cache;
>         } else {
>             cache_to_free = new_cache;
>         }
>         XBZRLE_cache_unlock();
> 
>         cache_fini(cache_to_free);
> 
> I think that looking is clearer this way.  BTW, we are losing
> _everything_ that is on the cache if we resize it.  I don't know if that
> is what we want.  If we have a big cache, and make it bigger because we
> are having too many misses, just dropping the whole cache looks like a
> bad idea :-(
> 

Dropping the whole cache is a pity. I have thought about reuse the cache_resize in the xbzrle_cache_resize. 
But the cache_resize may consume a little time, and it will block migration thread too long.
> 
> And my understanding is that we should do a:
> 
> migrate_get_current()->xbzrle_cache_size = new_size;
> 
> independently of what CACHE is equal or different from NULL?
> (this was already wrong before your patch, just asking because you are
> looking at the code).

We should do it definitely. The old code do it too. But the old code don't handle the return
value of the xbzrle_cache_resize. The return value may be -1. I will fix it .

In addition, I also do some work to fix another XBZRLE bug and optimize XBZRLE. 
The patches will be committed later.
> 
> Thanks, Juan.

Best regards,
-Gonglei

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-02-26  9:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-22  8:52 [Qemu-devel] [PATCH v3] XBZRLE: Fix qemu crash when resize the xbzrle cache Gonglei (Arei)
2014-02-25 15:24 ` Juan Quintela
2014-02-26  9:10   ` Gonglei (Arei)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).