* [Qemu-devel] [PATCH 1/3] qemu-file: improve qemu_put_compression_data
2015-08-25 11:59 [Qemu-devel] [PATCH 0/3] Optimize the performance for single thread (de)compression Liang Li
@ 2015-08-25 11:59 ` Liang Li
2015-08-25 11:59 ` [Qemu-devel] [PATCH 2/3] migration: optimization for one compression thread Liang Li
2015-08-25 11:59 ` [Qemu-devel] [PATCH 3/3] migration: optimization for one decompression thread Liang Li
2 siblings, 0 replies; 4+ messages in thread
From: Liang Li @ 2015-08-25 11:59 UTC (permalink / raw)
To: qemu-devel; +Cc: amit.shah, yang.z.zhang, Liang Li, dgilbert, quintela
There are some flaws in qemu_put_compression_data so that it can't not operate
on an normal QEMUFile, improve it so as to use it later.
Signed-off-by: Liang Li <liang.z.li@intel.com>
---
migration/qemu-file.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 6bb3dc1..59967b6 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -567,7 +567,9 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
if (blen < compressBound(size)) {
- return 0;
+ if (f->ops->writev_buffer || f->ops->put_buffer) {
+ qemu_fflush(f);
+ }
}
if (compress2(f->buf + f->buf_index + sizeof(int32_t), (uLongf *)&blen,
(Bytef *)p, size, level) != Z_OK) {
@@ -575,7 +577,13 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size,
return 0;
}
qemu_put_be32(f, blen);
+ if (f->ops->writev_buffer) {
+ add_to_iovec(f, f->buf + f->buf_index, blen);
+ }
f->buf_index += blen;
+ if (f->buf_index == IO_BUF_SIZE) {
+ qemu_fflush(f);
+ }
return blen + sizeof(int32_t);
}
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH 2/3] migration: optimization for one compression thread
2015-08-25 11:59 [Qemu-devel] [PATCH 0/3] Optimize the performance for single thread (de)compression Liang Li
2015-08-25 11:59 ` [Qemu-devel] [PATCH 1/3] qemu-file: improve qemu_put_compression_data Liang Li
@ 2015-08-25 11:59 ` Liang Li
2015-08-25 11:59 ` [Qemu-devel] [PATCH 3/3] migration: optimization for one decompression thread Liang Li
2 siblings, 0 replies; 4+ messages in thread
From: Liang Li @ 2015-08-25 11:59 UTC (permalink / raw)
To: qemu-devel; +Cc: amit.shah, yang.z.zhang, Liang Li, dgilbert, quintela
When the compression thread count is set to 1, the current implementation
is inefficient because of the following reason:
1. Thread synchronization cost;
2. Data copy;
3. No benefit from the separate compression thread;
This patch optimizes the performance for the case of 1 compress thread.
In this case, the compression is done in the migration thread, for some
fast compression algorithm, it can help to improve the performance.
Signed-off-by: Liang Li <liang.z.li@intel.com>
---
migration/ram.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)
diff --git a/migration/ram.c b/migration/ram.c
index 7f007e6..0cc4f81 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -318,8 +318,13 @@ void migrate_compress_threads_join(void)
if (!migrate_use_compression()) {
return;
}
- terminate_compression_threads();
+
thread_count = migrate_compress_threads();
+ if (thread_count == 1) {
+ return;
+ }
+
+ terminate_compression_threads();
for (i = 0; i < thread_count; i++) {
qemu_thread_join(compress_threads + i);
qemu_fclose(comp_param[i].file);
@@ -345,9 +350,12 @@ void migrate_compress_threads_create(void)
if (!migrate_use_compression()) {
return;
}
+ thread_count = migrate_compress_threads();
+ if (thread_count == 1) {
+ return;
+ }
quit_comp_thread = false;
compression_switch = true;
- thread_count = migrate_compress_threads();
compress_threads = g_new0(QemuThread, thread_count);
comp_param = g_new0(CompressParam, thread_count);
comp_done_cond = g_new0(QemuCond, 1);
@@ -782,6 +790,9 @@ static void flush_compressed_data(QEMUFile *f)
return;
}
thread_count = migrate_compress_threads();
+ if (thread_count == 1) {
+ return;
+ }
for (idx = 0; idx < thread_count; idx++) {
if (!comp_param[idx].done) {
qemu_mutex_lock(comp_done_lock);
@@ -883,18 +894,16 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
* out, keeping this order is important, because the 'cont' flag
* is used to avoid resending the block name.
*/
- if (block != last_sent_block) {
+ if (block != last_sent_block || migrate_compress_threads() == 1) {
flush_compressed_data(f);
pages = save_zero_page(f, block, offset, p, bytes_transferred);
if (pages == -1) {
- set_compress_params(&comp_param[0], block, offset);
- /* Use the qemu thread to compress the data to make sure the
- * first page is sent out before other pages
- */
- bytes_xmit = do_compress_ram_page(&comp_param[0]);
- acct_info.norm_pages++;
- qemu_put_qemu_file(f, comp_param[0].file);
+ bytes_xmit = save_page_header(f, block, offset |
+ RAM_SAVE_FLAG_COMPRESS_PAGE);
+ bytes_xmit += qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
+ migrate_compress_level());
*bytes_transferred += bytes_xmit;
+ acct_info.norm_pages++;
pages = 1;
}
} else {
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH 3/3] migration: optimization for one decompression thread
2015-08-25 11:59 [Qemu-devel] [PATCH 0/3] Optimize the performance for single thread (de)compression Liang Li
2015-08-25 11:59 ` [Qemu-devel] [PATCH 1/3] qemu-file: improve qemu_put_compression_data Liang Li
2015-08-25 11:59 ` [Qemu-devel] [PATCH 2/3] migration: optimization for one compression thread Liang Li
@ 2015-08-25 11:59 ` Liang Li
2 siblings, 0 replies; 4+ messages in thread
From: Liang Li @ 2015-08-25 11:59 UTC (permalink / raw)
To: qemu-devel; +Cc: amit.shah, yang.z.zhang, Liang Li, dgilbert, quintela
When decompression thread count is set to 1, the current implementation
is inefficient because of the following reason:
1. Thread syncronization cost;
2. Data copy;
This patch optimizes the performance for the case of 1 decompress thread.
In this case, the compression is done in process_incoming_migration_co,
for some fast decompression algorithm, it can help to improve the
performance.
Signed-off-by: Liang Li <liang.z.li@intel.com>
---
migration/ram.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/migration/ram.c b/migration/ram.c
index 0cc4f81..fc91997 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1414,6 +1414,9 @@ void migrate_decompress_threads_create(void)
int i, thread_count;
thread_count = migrate_decompress_threads();
+ if (thread_count == 1) {
+ return;
+ }
decompress_threads = g_new0(QemuThread, thread_count);
decomp_param = g_new0(DecompressParam, thread_count);
compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
@@ -1432,8 +1435,11 @@ void migrate_decompress_threads_join(void)
{
int i, thread_count;
- quit_decomp_thread = true;
thread_count = migrate_decompress_threads();
+ if (thread_count == 1) {
+ return;
+ }
+ quit_decomp_thread = true;
for (i = 0; i < thread_count; i++) {
qemu_mutex_lock(&decomp_param[i].mutex);
qemu_cond_signal(&decomp_param[i].cond);
@@ -1575,7 +1581,14 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
break;
}
qemu_get_buffer(f, compressed_data_buf, len);
- decompress_data_with_multi_threads(compressed_data_buf, host, len);
+ if (migrate_decompress_threads() == 1) {
+ unsigned long pagesize = TARGET_PAGE_SIZE;
+ uncompress((Bytef *)host, &pagesize,
+ (const Bytef *)compressed_data_buf, len);
+ } else {
+ decompress_data_with_multi_threads(compressed_data_buf,
+ host, len);
+ }
break;
case RAM_SAVE_FLAG_XBZRLE:
host = host_from_stream_offset(f, addr, flags);
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread