From: Orit Wasserman <owasserm@redhat.com>
To: quintela@redhat.com
Cc: peter.maydell@linaro.org, aliguori@us.ibm.com,
stefanha@gmail.com, qemu-devel@nongnu.org,
Benoit Hudzia <benoit.hudzia@sap.com>,
mdroth@linux.vnet.ibm.com, blauwirbel@gmail.com,
Petter Svard <petters@cs.umu.se>,
chegu_vinod@hp.com, avi@redhat.com,
Aidan Shribman <aidan.shribman@sap.com>,
pbonzini@redhat.com, eblake@redhat.com
Subject: Re: [Qemu-devel] [PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live
Date: Wed, 06 Jun 2012 05:13:04 +0300 [thread overview]
Message-ID: <4FCEBCB0.5040402@redhat.com> (raw)
In-Reply-To: <877gvr8di8.fsf@elfo.elfo>
On 06/01/2012 02:42 PM, Juan Quintela wrote:
> Orit Wasserman <owasserm@redhat.com> wrote:
>> In the outgoing migration check to see if the page is cached and
>> changed than send compressed page by using save_xbrle_page function.
>> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
>> and decompress the page (by using load_xbrle function).
>
>
> This patch can be split to easy review.
Sure.
>
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -43,6 +43,15 @@
>> #include "hw/smbios.h"
>> #include "exec-memory.h"
>> #include "hw/pcspk.h"
>> +#include "qemu/cache.h"
>> +
>> +#ifdef DEBUG_ARCH_INIT
>> +#define DPRINTF(fmt, ...) \
>> + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
>> +#else
>> +#define DPRINTF(fmt, ...) \
>> + do { } while (0)
>> +#endif
>
> Independent of xbzrle.
>
>>
>> #ifdef TARGET_SPARC
>> int graphic_width = 1024;
>> @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH;
>> #define RAM_SAVE_FLAG_PAGE 0x08
>> #define RAM_SAVE_FLAG_EOS 0x10
>> #define RAM_SAVE_FLAG_CONTINUE 0x20
>> +#define RAM_SAVE_FLAG_XBZRLE 0x40
>>
>> #ifdef __ALTIVEC__
>> #include <altivec.h>
>> @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page)
>> return 1;
>> }
>>
>> +/* XBZRLE (Xor Based Zero Length Encoding */
>> +typedef struct XBZRLEHeader {
>> + uint32_t xh_cksum;
>
> We are still not using this value, and we are sending it anyway (with a
> value of zero). What happens when we start using if for a checksum, and
> we migration to a new version that "expects" it to be valid? I would
> preffer not to sent it, or sent the correct value.
I think I will remove it, checksum should be used for all migration not just XBZRLE.
I guess we can add it to the protocol in the future.
>
>> + uint16_t xh_len;
>> + uint8_t xh_flags;
>> +} XBZRLEHeader;
>> +
>> +/* struct contains XBZRLE cache and a static page
>> + used by the compression */
>> +static struct {
>> + /* buffer used for XBZRLE encoding */
>> + uint8_t *encoded_buf;
>> + /* Cache for XBZRLE */
>> + Cache *cache;
>> +} XBZRLE = {0};
>
> Use c99 initializers, please.
>
> { .encoded_buf = NULL,
> .cache = NULL,
> }
>
> More instances in other parts.
>
>> +
>> static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>> int cont, int flag)
> > {
>> @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>>
>> }
>>
>> +#define ENCODING_FLAG_XBZRLE 0x1
>> +
>> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
>> + ram_addr_t current_addr, RAMBlock *block,
>> + ram_addr_t offset, int cont)
>> +{
>> + int encoded_len = 0, bytes_sent = -1, ret = -1;
>> + XBZRLEHeader hdr = {0};
>> + uint8_t *prev_cached_page;
>> +
>> + /* check to see if page is cached , if not cache and return */
>> + if (!cache_is_cached(XBZRLE.cache, current_addr)) {
>> + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data,
>> + TARGET_PAGE_SIZE));
>> + goto done;
>> + }
>> +
>> + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
>> +
>> + /* XBZRLE encoding (if there is no overflow) */
>> + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data,
>> + TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
>> + TARGET_PAGE_SIZE);
>> + if (encoded_len == 0) {
>> + bytes_sent = 0;
>> + DPRINTF("Unmodifed page or overflow skipping\n");
>> + goto done;
>> + } else if (encoded_len == -1) {
>> + bytes_sent = -1;
>> + DPRINTF("Overflow\n");
>> + /* update data in the cache */
>> + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
>> + goto done;
>> + }
>> +
>> + /* we need to update the data in the cache, in order to get the same data
>> + we cached we decode the encoded page on the cached data */
>> + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len,
>> + prev_cached_page, TARGET_PAGE_SIZE);
>> + g_assert(ret != -1);
>> +
>> + hdr.xh_len = encoded_len;
>> + hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
>> +
>> + /* Send XBZRLE based compressed page */
>> + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
>> + qemu_put_byte(f, hdr.xh_flags);
>> + qemu_put_be16(f, hdr.xh_len);
>> + qemu_put_be32(f, hdr.xh_cksum);
>> + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
>> + bytes_sent = encoded_len + sizeof(hdr);
>> +
>> +done:
>> + return bytes_sent;
>> +}
>> +
>> static RAMBlock *last_block;
>> static ram_addr_t last_offset;
>>
>> -static int ram_save_block(QEMUFile *f)
>> +static int ram_save_block(QEMUFile *f, int stage)
>> {
>> RAMBlock *block = last_block;
>> ram_addr_t offset = last_offset;
>> - int bytes_sent = 0;
>> + int bytes_sent = -1;
>> MemoryRegion *mr;
>> + ram_addr_t current_addr;
>>
>> if (!block)
>> block = QLIST_FIRST(&ram_list.blocks);
>>
>> + current_addr = block->offset + offset;
>> +
>> do {
>> mr = block->mr;
>> if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
>> @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f)
>> save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>> qemu_put_byte(f, *p);
>> bytes_sent = 1;
>> - } else {
>> + } else if (migrate_use_xbzrle()) {
>> + /* in stage 1 none of the pages are cached so we just want to
>> + cache them for next stages, and send the cached copy */
>> + if (stage == 1) {
>> + cache_insert(XBZRLE.cache, current_addr,
>> + g_memdup(p, TARGET_PAGE_SIZE));
>> + } else {
>> + bytes_sent = save_xbzrle_page(f, p, current_addr, block,
>> + offset, cont);
>> + }
>> + /* send the cached page copy for stage 1 and 2*/
>> + if (stage != 3) {
>> + p = get_cached_data(XBZRLE.cache, current_addr);
>> + }
>> + }
>> +
>> + /* either we didn't send yet (we may got XBZRLE overflow) */
>> + if (bytes_sent == -1) {
>> save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>> qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>> bytes_sent = TARGET_PAGE_SIZE;
>
>
> I think that code is not right when save_xbzrle_page() returns 0. That
> means that page hasn't changed since last time we sent that page. We
> shouldn't break in that case. Just continue with next page, right?
>
You are right I missed that , will be fixed
> On the other hand ... Why are we doing the stage == 1 test? stage 1
> normally only sent part of the pages, so we could use the generic code
> there? It would just return -1 as bytes_sent, and do the same code that
> we have now?
we need to add the pages to the cache in stage 1 (for the next stage),
and there is no need for checking if the page is cached.
and send the pages from the cache for consistency
>
> The optimization for stage 3 is not done backwards? We are inserting
> the page in the cache even if we are on stage 3. In stage three we
> should:
> - look if page is on the cache: do usual xbrlze trick
> - if it is not, just sent the whole page without inserting it into the
> cache? We are never going to reuse it, so putting it into the cache
> would not help us at all. We are just making an extra copy?
right no need to insert the page into the cache in stage 3, I will remove it
>
>
>>
>> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>
>> expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>>
>> + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
>> + migrate_max_downtime());
>> +
>
> This belongs to debugging patch.
>
>> + /* load data and decode */
>> + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE);
>
> can't we have a static buffer of that size, and avoid all the
> malloc/free business? If space is tight, we can allways put it on the
> xbrle structure and assign it only for migration.
good idea
>
>> @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>> void *host;
>>
>> host = host_from_stream_offset(f, addr, flags);
>> + if (!host) {
>> + return -EINVAL;
>> + }
>
> Why is this check only needed now?
I wish I knew, looks like it is missing in upstream.
Do you think I should fix it separately ?
Thanks,
Orit
>
> Later, Juan.
next prev parent reply other threads:[~2012-06-06 5:26 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-22 12:56 [Qemu-devel] [PATCH v11 0/9] XBZRLE delta for live migration of large memory app Orit Wasserman
2012-05-22 12:56 ` [Qemu-devel] [PATCH v11 1/9] Add MigrationParams structure Orit Wasserman
2012-06-01 10:51 ` Juan Quintela
2012-05-22 12:56 ` [Qemu-devel] [PATCH v11 2/9] Add migration capabilites Orit Wasserman
2012-05-22 13:08 ` Eric Blake
2012-06-01 10:57 ` Juan Quintela
2012-06-06 1:48 ` Orit Wasserman
2012-06-07 10:41 ` Juan Quintela
2012-05-22 12:56 ` [Qemu-devel] [PATCH v11 3/9] Add XBZRLE documentation Orit Wasserman
2012-05-22 13:13 ` Eric Blake
2012-06-01 10:58 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 4/9] Add cache handling functions Orit Wasserman
2012-06-01 11:01 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 5/9] Add uleb encoding/decoding functions Orit Wasserman
2012-06-01 11:04 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 6/9] Add save_block_hdr function Orit Wasserman
2012-06-01 11:04 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live Orit Wasserman
2012-06-01 11:42 ` Juan Quintela
2012-06-06 2:13 ` Orit Wasserman [this message]
2012-06-07 10:38 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 8/9] Add set_cachesize command Orit Wasserman
2012-06-01 11:19 ` Juan Quintela
2012-06-06 2:14 ` Orit Wasserman
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 9/9] Add XBZRLE statistics Orit Wasserman
2012-06-01 11:10 ` Juan Quintela
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FCEBCB0.5040402@redhat.com \
--to=owasserm@redhat.com \
--cc=aidan.shribman@sap.com \
--cc=aliguori@us.ibm.com \
--cc=avi@redhat.com \
--cc=benoit.hudzia@sap.com \
--cc=blauwirbel@gmail.com \
--cc=chegu_vinod@hp.com \
--cc=eblake@redhat.com \
--cc=mdroth@linux.vnet.ibm.com \
--cc=pbonzini@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=petters@cs.umu.se \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
--cc=stefanha@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.