From: Paolo Bonzini <pbonzini@redhat.com>
To: Juan Quintela <quintela@redhat.com>
Cc: chegu_vinod@hp.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [RFC 00/28] bitmap handling optimization
Date: Wed, 09 Oct 2013 15:29:57 +0200 [thread overview]
Message-ID: <52555A55.2020109@redhat.com> (raw)
In-Reply-To: <1381318130-10620-1-git-send-email-quintela@redhat.com>
Il 09/10/2013 13:28, Juan Quintela ha scritto:
> Hi
>
> This series split the dirty bitmap (8 bits per page, only three used)
> into 3 individual bitmaps. Once the conversion is done, operations
> are handled by bitmap operations, not bit by bit.
>
> - *_DIRTY_FLAG flags are gone, now we use memory.h DIRTY_MEMORY_*
> everywhere.
>
> - We set/reset each flag individually
> (set_dirty_flags(0xff&~CODE_DIRTY_FLAG)) are gone.
>
> - Rename several functions to clarify/make consistent things.
>
> - I know it dont't pass checkpatch for long lines, propper submission
> should pass it. We have to have long lines, short variable names, or
> ugly line splitting :p
>
> - DIRTY_MEMORY_NUM: how can one include exec/memory.h into cpu-all.h?
> #include it don't work, as a workaround, I have copied its value, but
> any better idea? I can always create "exec/migration-flags.h", though.
I think both files are too "central" and you get some sort of circular dependency.
The solution could be to move RAM definitions from cpu-all.h to
memory-internal.h. For example:
diff --git a/arch_init.c b/arch_init.c
index 7545d96..8752e27 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -47,7 +47,7 @@
#include "qemu/config-file.h"
#include "qmp-commands.h"
#include "trace.h"
-#include "exec/cpu-all.h"
+#include "exec/memory-internal.h"
#include "hw/acpi/acpi.h"
#ifdef DEBUG_ARCH_INIT
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 019dc20..4cfde66 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -435,57 +435,11 @@ void cpu_watchpoint_remove_all(CPUArchState *env, int mask);
#if !defined(CONFIG_USER_ONLY)
-/* memory API */
-
extern ram_addr_t ram_size;
-
-/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
-#define RAM_PREALLOC_MASK (1 << 0)
-
-typedef struct RAMBlock {
- struct MemoryRegion *mr;
- uint8_t *host;
- ram_addr_t offset;
- ram_addr_t length;
- uint32_t flags;
- char idstr[256];
- /* Reads can take either the iothread or the ramlist lock.
- * Writes must take both locks.
- */
- QTAILQ_ENTRY(RAMBlock) next;
- int fd;
-} RAMBlock;
-
-#define DIRTY_MEMORY_NUM 3
-
-typedef struct RAMList {
- QemuMutex mutex;
- /* Protected by the iothread lock. */
- unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
- RAMBlock *mru_block;
- /* Protected by the ramlist lock. */
- QTAILQ_HEAD(, RAMBlock) blocks;
- uint32_t version;
-} RAMList;
-extern RAMList ram_list;
-
extern const char *mem_path;
extern int mem_prealloc;
-/* Flags stored in the low bits of the TLB virtual address. These are
- defined so that fast path ram access is all zeros. */
-/* Zero if TLB entry is valid. */
-#define TLB_INVALID_MASK (1 << 3)
-/* Set if TLB entry references a clean RAM page. The iotlb entry will
- contain the page physical address. */
-#define TLB_NOTDIRTY (1 << 4)
-/* Set if TLB entry is an IO callback. */
-#define TLB_MMIO (1 << 5)
-
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
-ram_addr_t last_ram_offset(void);
-void qemu_mutex_lock_ramlist(void);
-void qemu_mutex_unlock_ramlist(void);
#endif /* !CONFIG_USER_ONLY */
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index e21cb60..719fa27 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -20,6 +20,17 @@
#define CPUTLB_H
#if !defined(CONFIG_USER_ONLY)
+
+/* Flags stored in the low bits of the TLB virtual address. These are
+ defined so that fast path ram access is all zeros. */
+/* Zero if TLB entry is valid. */
+#define TLB_INVALID_MASK (1 << 3)
+/* Set if TLB entry references a clean RAM page. The iotlb entry will
+ contain the page physical address. */
+#define TLB_NOTDIRTY (1 << 4)
+/* Set if TLB entry is an IO callback. */
+#define TLB_MMIO (1 << 5)
+
/* cputlb.c */
void tlb_protect_code(ram_addr_t ram_addr);
void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h
index d46570e..aaf76c2 100644
--- a/include/exec/memory-internal.h
+++ b/include/exec/memory-internal.h
@@ -22,6 +22,35 @@
#ifndef CONFIG_USER_ONLY
#include "hw/xen/xen.h"
+/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
+#define RAM_PREALLOC_MASK (1 << 0)
+
+typedef struct RAMBlock {
+ struct MemoryRegion *mr;
+ uint8_t *host;
+ ram_addr_t offset;
+ ram_addr_t length;
+ uint32_t flags;
+ char idstr[256];
+ /* Reads can take either the iothread or the ramlist lock.
+ * Writes must take both locks.
+ */
+ QTAILQ_ENTRY(RAMBlock) next;
+ int fd;
+} RAMBlock;
+
+#define DIRTY_MEMORY_NUM 3
+
+typedef struct RAMList {
+ QemuMutex mutex;
+ /* Protected by the iothread lock. */
+ unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
+ RAMBlock *mru_block;
+ /* Protected by the ramlist lock. */
+ QTAILQ_HEAD(, RAMBlock) blocks;
+ uint32_t version;
+} RAMList;
+extern RAMList ram_list;
typedef struct AddressSpaceDispatch AddressSpaceDispatch;
@@ -105,6 +134,10 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
unsigned client);
+ram_addr_t last_ram_offset(void);
+void qemu_mutex_lock_ramlist(void);
+void qemu_mutex_unlock_ramlist(void);
+
#endif
#endif
diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index 5bbc56a..cc27058 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -23,6 +23,7 @@
*/
#include "qemu/timer.h"
#include "exec/memory.h"
+#include "exec/cputlb.h"
#define DATA_SIZE (1 << SHIFT)
Bonus points for splitting RAM save out of arch_init.c. :)
> - create a lock for the bitmaps and fold migration bitmap into this
> one. This would avoid a copy and make things easier?
I think this is less important and not that easy. For example,
how fine-grained should the locking be without bogging down TCG?
You can speed up migration_bitmap_sync by a factor of 64 or more
if you copy word-by-word instead of memory_region_test_and_clear_dirty
and migration_bitmap_set_dirty.
Once you do that and also merge KVM and vhost bitmaps one word at a
time, it's likely that dirty bitmaps get almost out of the
profile.
> - As this code uses/abuses bitmaps, we need to change the type of the
> index from int to long. With an int index, we can only access a
> maximum of 8TB guest (yes, this is not urgent, we have a couple of
> years to do it).
Yes.
> - merging KVM <-> QEMU bitmap as a bitmap and not bit-by-bit.
Right. All of vhost_dev_sync_region, kvm_get_dirty_pages_log_range,
xen_sync_dirty_bitmap are really working a word at a time.
So it should be easy to optimize the log_sync implementations to
work with a word-at-a-time API instead of memory_region_set_dirty.
> - spliting the KVM bitmap synchronization into chunks, i.e. not
> synchronize all memory, just enough to continue with migration.
That can also help. However, it's not easy to do it without
making ram_save_pending's computations too optimistic.
So I'd just focus on speeding up migration_bitmap_sync first. It's
easy and should "almost" do the entirety of the work.
Paolo
prev parent reply other threads:[~2013-10-09 13:30 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-09 11:28 [Qemu-devel] [RFC 00/28] bitmap handling optimization Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 01/28] Move prototypes to memory.h Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 02/28] memory: cpu_physical_memory_set_dirty_flags() result is never used Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 03/28] memory: cpu_physical_memory_set_dirty_range() return void Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 04/28] exec: use accessor function to know if memory is dirty Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 05/28] memory: create function to set a single dirty bit Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 06/28] exec: create function to get " Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 07/28] memory: make cpu_physical_memory_is_dirty return bool Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 08/28] exec: simplify notdirty_mem_write() Juan Quintela
2013-10-09 19:10 ` Eric Blake
2013-10-09 19:18 ` Eric Blake
2013-10-09 11:28 ` [Qemu-devel] [PATCH 09/28] memory: all users of cpu_physical_memory_get_dirty used only one flag Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 10/28] memory: set single dirty flags when possible Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 11/28] memory: cpu_physical_memory_set_dirty_range() allways dirty all flags Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 12/28] memory: cpu_physical_memory_mask_dirty_range() allways clear a single flag Juan Quintela
2013-10-09 19:17 ` Eric Blake
2013-10-09 11:28 ` [Qemu-devel] [PATCH 13/28] memory: use DIRTY_MEMORY_* instead of *_DIRTY_FLAG Juan Quintela
2013-10-09 19:23 ` Eric Blake
2013-10-09 11:28 ` [Qemu-devel] [PATCH 14/28] memory: use bit 2 for migration Juan Quintela
2013-10-09 19:24 ` Eric Blake
2013-10-09 11:28 ` [Qemu-devel] [PATCH 15/28] memory: make sure that client is always inside range Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 16/28] memory: only resize dirty bitmap when memory size increases Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 17/28] memory: cpu_physical_memory_clear_dirty_flag() result is never used Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 18/28] bitmap: Add bitmap_zero_extend operation Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 19/28] memory: split dirty bitmap into three Juan Quintela
2013-10-09 19:42 ` Eric Blake
2013-10-09 11:28 ` [Qemu-devel] [PATCH 20/28] memory: unfold cpu_physical_memory_clear_dirty_flag() in its only user Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 21/28] memory: unfold cpu_physical_memory_set_dirty() " Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 22/28] memory: unfold cpu_physical_memory_set_dirty_flag() Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 23/28] memory: make cpu_physical_memory_get_dirty() the main function Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 24/28] memory: cpu_physical_memory_get_dirty() is used as returning a bool Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 25/28] memory: s/mask/clear/ cpu_physical_memory_mask_dirty_range Juan Quintela
2013-10-09 11:28 ` [Qemu-devel] [PATCH 26/28] memory: use find_next_bit() to find dirty bits Juan Quintela
2013-10-09 19:57 ` Eric Blake
2013-10-09 11:28 ` [Qemu-devel] [PATCH 27/28] memory: cpu_physical_memory_set_dirty_range() now uses bitmap operations Juan Quintela
2013-10-09 19:57 ` Eric Blake
2013-10-09 11:28 ` [Qemu-devel] [PATCH 28/28] memory: cpu_physical_memory_clear_dirty_range() " Juan Quintela
2013-10-09 18:16 ` Richard Henderson
2013-10-09 13:29 ` Paolo Bonzini [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=52555A55.2020109@redhat.com \
--to=pbonzini@redhat.com \
--cc=chegu_vinod@hp.com \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.