From: Orit Wasserman <owasserm@redhat.com>
To: quintela@redhat.com
Cc: peter.maydell@linaro.org, aliguori@us.ibm.com,
stefanha@gmail.com, qemu-devel@nongnu.org,
Benoit Hudzia <benoit.hudzia@sap.com>,
mdroth@linux.vnet.ibm.com, blauwirbel@gmail.com,
Petter Svard <petters@cs.umu.se>,
chegu_vinod@hp.com, avi@redhat.com,
Aidan Shribman <aidan.shribman@sap.com>,
pbonzini@redhat.com, eblake@redhat.com
Subject: Re: [Qemu-devel] [PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live
Date: Wed, 06 Jun 2012 05:13:04 +0300 [thread overview]
Message-ID: <4FCEBCB0.5040402@redhat.com> (raw)
In-Reply-To: <877gvr8di8.fsf@elfo.elfo>
On 06/01/2012 02:42 PM, Juan Quintela wrote:
> Orit Wasserman <owasserm@redhat.com> wrote:
>> In the outgoing migration check to see if the page is cached and
>> changed than send compressed page by using save_xbrle_page function.
>> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
>> and decompress the page (by using load_xbrle function).
>
>
> This patch can be split to easy review.
Sure.
>
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -43,6 +43,15 @@
>> #include "hw/smbios.h"
>> #include "exec-memory.h"
>> #include "hw/pcspk.h"
>> +#include "qemu/cache.h"
>> +
>> +#ifdef DEBUG_ARCH_INIT
>> +#define DPRINTF(fmt, ...) \
>> + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
>> +#else
>> +#define DPRINTF(fmt, ...) \
>> + do { } while (0)
>> +#endif
>
> Independent of xbzrle.
>
>>
>> #ifdef TARGET_SPARC
>> int graphic_width = 1024;
>> @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH;
>> #define RAM_SAVE_FLAG_PAGE 0x08
>> #define RAM_SAVE_FLAG_EOS 0x10
>> #define RAM_SAVE_FLAG_CONTINUE 0x20
>> +#define RAM_SAVE_FLAG_XBZRLE 0x40
>>
>> #ifdef __ALTIVEC__
>> #include <altivec.h>
>> @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page)
>> return 1;
>> }
>>
>> +/* XBZRLE (Xor Based Zero Length Encoding */
>> +typedef struct XBZRLEHeader {
>> + uint32_t xh_cksum;
>
> We are still not using this value, and we are sending it anyway (with a
> value of zero). What happens when we start using if for a checksum, and
> we migration to a new version that "expects" it to be valid? I would
> preffer not to sent it, or sent the correct value.
I think I will remove it, checksum should be used for all migration not just XBZRLE.
I guess we can add it to the protocol in the future.
>
>> + uint16_t xh_len;
>> + uint8_t xh_flags;
>> +} XBZRLEHeader;
>> +
>> +/* struct contains XBZRLE cache and a static page
>> + used by the compression */
>> +static struct {
>> + /* buffer used for XBZRLE encoding */
>> + uint8_t *encoded_buf;
>> + /* Cache for XBZRLE */
>> + Cache *cache;
>> +} XBZRLE = {0};
>
> Use c99 initializers, please.
>
> { .encoded_buf = NULL,
> .cache = NULL,
> }
>
> More instances in other parts.
>
>> +
>> static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>> int cont, int flag)
> > {
>> @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>>
>> }
>>
>> +#define ENCODING_FLAG_XBZRLE 0x1
>> +
>> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
>> + ram_addr_t current_addr, RAMBlock *block,
>> + ram_addr_t offset, int cont)
>> +{
>> + int encoded_len = 0, bytes_sent = -1, ret = -1;
>> + XBZRLEHeader hdr = {0};
>> + uint8_t *prev_cached_page;
>> +
>> + /* check to see if page is cached , if not cache and return */
>> + if (!cache_is_cached(XBZRLE.cache, current_addr)) {
>> + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data,
>> + TARGET_PAGE_SIZE));
>> + goto done;
>> + }
>> +
>> + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
>> +
>> + /* XBZRLE encoding (if there is no overflow) */
>> + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data,
>> + TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
>> + TARGET_PAGE_SIZE);
>> + if (encoded_len == 0) {
>> + bytes_sent = 0;
>> + DPRINTF("Unmodifed page or overflow skipping\n");
>> + goto done;
>> + } else if (encoded_len == -1) {
>> + bytes_sent = -1;
>> + DPRINTF("Overflow\n");
>> + /* update data in the cache */
>> + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
>> + goto done;
>> + }
>> +
>> + /* we need to update the data in the cache, in order to get the same data
>> + we cached we decode the encoded page on the cached data */
>> + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len,
>> + prev_cached_page, TARGET_PAGE_SIZE);
>> + g_assert(ret != -1);
>> +
>> + hdr.xh_len = encoded_len;
>> + hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
>> +
>> + /* Send XBZRLE based compressed page */
>> + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
>> + qemu_put_byte(f, hdr.xh_flags);
>> + qemu_put_be16(f, hdr.xh_len);
>> + qemu_put_be32(f, hdr.xh_cksum);
>> + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
>> + bytes_sent = encoded_len + sizeof(hdr);
>> +
>> +done:
>> + return bytes_sent;
>> +}
>> +
>> static RAMBlock *last_block;
>> static ram_addr_t last_offset;
>>
>> -static int ram_save_block(QEMUFile *f)
>> +static int ram_save_block(QEMUFile *f, int stage)
>> {
>> RAMBlock *block = last_block;
>> ram_addr_t offset = last_offset;
>> - int bytes_sent = 0;
>> + int bytes_sent = -1;
>> MemoryRegion *mr;
>> + ram_addr_t current_addr;
>>
>> if (!block)
>> block = QLIST_FIRST(&ram_list.blocks);
>>
>> + current_addr = block->offset + offset;
>> +
>> do {
>> mr = block->mr;
>> if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
>> @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f)
>> save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>> qemu_put_byte(f, *p);
>> bytes_sent = 1;
>> - } else {
>> + } else if (migrate_use_xbzrle()) {
>> + /* in stage 1 none of the pages are cached so we just want to
>> + cache them for next stages, and send the cached copy */
>> + if (stage == 1) {
>> + cache_insert(XBZRLE.cache, current_addr,
>> + g_memdup(p, TARGET_PAGE_SIZE));
>> + } else {
>> + bytes_sent = save_xbzrle_page(f, p, current_addr, block,
>> + offset, cont);
>> + }
>> + /* send the cached page copy for stage 1 and 2*/
>> + if (stage != 3) {
>> + p = get_cached_data(XBZRLE.cache, current_addr);
>> + }
>> + }
>> +
>> + /* either we didn't send yet (we may got XBZRLE overflow) */
>> + if (bytes_sent == -1) {
>> save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>> qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>> bytes_sent = TARGET_PAGE_SIZE;
>
>
> I think that code is not right when save_xbzrle_page() returns 0. That
> means that page hasn't changed since last time we sent that page. We
> shouldn't break in that case. Just continue with next page, right?
>
You are right I missed that , will be fixed
> On the other hand ... Why are we doing the stage == 1 test? stage 1
> normally only sent part of the pages, so we could use the generic code
> there? It would just return -1 as bytes_sent, and do the same code that
> we have now?
we need to add the pages to the cache in stage 1 (for the next stage),
and there is no need for checking if the page is cached.
and send the pages from the cache for consistency
>
> The optimization for stage 3 is not done backwards? We are inserting
> the page in the cache even if we are on stage 3. In stage three we
> should:
> - look if page is on the cache: do usual xbrlze trick
> - if it is not, just sent the whole page without inserting it into the
> cache? We are never going to reuse it, so putting it into the cache
> would not help us at all. We are just making an extra copy?
right no need to insert the page into the cache in stage 3, I will remove it
>
>
>>
>> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>
>> expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>>
>> + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
>> + migrate_max_downtime());
>> +
>
> This belongs to debugging patch.
>
>> + /* load data and decode */
>> + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE);
>
> can't we have a static buffer of that size, and avoid all the
> malloc/free business? If space is tight, we can allways put it on the
> xbrle structure and assign it only for migration.
good idea
>
>> @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>> void *host;
>>
>> host = host_from_stream_offset(f, addr, flags);
>> + if (!host) {
>> + return -EINVAL;
>> + }
>
> Why is this check only needed now?
I wish I knew, looks like it is missing in upstream.
Do you think I should fix it separately ?
Thanks,
Orit
>
> Later, Juan.
next prev parent reply other threads:[~2012-06-06 5:26 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-22 12:56 [Qemu-devel] [PATCH v11 0/9] XBZRLE delta for live migration of large memory app Orit Wasserman
2012-05-22 12:56 ` [Qemu-devel] [PATCH v11 1/9] Add MigrationParams structure Orit Wasserman
2012-06-01 10:51 ` Juan Quintela
2012-05-22 12:56 ` [Qemu-devel] [PATCH v11 2/9] Add migration capabilites Orit Wasserman
2012-05-22 13:08 ` Eric Blake
2012-06-01 10:57 ` Juan Quintela
2012-06-06 1:48 ` Orit Wasserman
2012-06-07 10:41 ` Juan Quintela
2012-05-22 12:56 ` [Qemu-devel] [PATCH v11 3/9] Add XBZRLE documentation Orit Wasserman
2012-05-22 13:13 ` Eric Blake
2012-06-01 10:58 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 4/9] Add cache handling functions Orit Wasserman
2012-06-01 11:01 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 5/9] Add uleb encoding/decoding functions Orit Wasserman
2012-06-01 11:04 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 6/9] Add save_block_hdr function Orit Wasserman
2012-06-01 11:04 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 7/9] Add XBZRLE to ram_save_block and ram_save_live Orit Wasserman
2012-06-01 11:42 ` Juan Quintela
2012-06-06 2:13 ` Orit Wasserman [this message]
2012-06-07 10:38 ` Juan Quintela
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 8/9] Add set_cachesize command Orit Wasserman
2012-06-01 11:19 ` Juan Quintela
2012-06-06 2:14 ` Orit Wasserman
2012-05-22 12:57 ` [Qemu-devel] [PATCH v11 9/9] Add XBZRLE statistics Orit Wasserman
2012-06-01 11:10 ` Juan Quintela
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FCEBCB0.5040402@redhat.com \
--to=owasserm@redhat.com \
--cc=aidan.shribman@sap.com \
--cc=aliguori@us.ibm.com \
--cc=avi@redhat.com \
--cc=benoit.hudzia@sap.com \
--cc=blauwirbel@gmail.com \
--cc=chegu_vinod@hp.com \
--cc=eblake@redhat.com \
--cc=mdroth@linux.vnet.ibm.com \
--cc=pbonzini@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=petters@cs.umu.se \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
--cc=stefanha@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).