* [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core
@ 2012-02-14 9:27 Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 01/20] memory: allow MemoryListeners to observe a specific address space Avi Kivity
` (19 more replies)
0 siblings, 20 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Currently the memory core requires 16 bytes per guest page (0.4% overhead).
This patchset, building on the patches posted last week, reduces the overhead
to practically nothing. Assuming a 4k page size, contiguous aligned 4k, 4M,
and 4G regions each occupy just 2 bytes. Since most guest memory is physically
contiguous, we can describe a guest with just a few dozen kilobytes.
Please review carefully, especially patch 13, and test carefully, especially
things like rom/device.
Also available in
git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git memory/core
Avi Kivity (20):
memory: allow MemoryListeners to observe a specific address space
xen: ignore I/O memory regions
memory: split memory listener for the two address spaces
memory: support stateless memory listeners
memory: change memory registration to rebuild the memory map on each
change
memory: remove first level of l1_phys_map
memory: unify phys_map last level with intermediate levels
memory: store MemoryRegionSection pointers in phys_map
memory: compress phys_map node pointers to 16 bits
memory: fix RAM subpages in newly initialized pages
memory: unify the two branches of cpu_register_physical_memory_log()
memory: move tlb flush to MemoryListener commit callback
memory: make phys_page_find() return a MemoryRegionSection
memory: give phys_page_find() its own tree search loop
memory: simplify multipage/subpage registration
memory: replace phys_page_find_alloc() with phys_page_set()
memory: switch phys_page_set() to a recursive implementation
memory: change phys_page_set() to set multiple pages
memory: unify PhysPageEntry::node and ::leaf
memory: allow phys_map tree paths to terminate early
exec.c | 828 +++++++++++++++++++++++++++++++++++++----------------------
hw/vhost.c | 18 ++-
kvm-all.c | 18 ++-
memory.c | 50 ++++-
memory.h | 7 +-
xen-all.c | 18 ++-
6 files changed, 620 insertions(+), 319 deletions(-)
--
1.7.9
^ permalink raw reply [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 01/20] memory: allow MemoryListeners to observe a specific address space
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 02/20] xen: ignore I/O memory regions Avi Kivity
` (18 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Ignore any regions not belonging to a specified address space.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 2 +-
hw/vhost.c | 2 +-
kvm-all.c | 2 +-
memory.c | 45 +++++++++++++++++++++++++++++++++++++++------
memory.h | 4 +++-
xen-all.c | 2 +-
6 files changed, 46 insertions(+), 11 deletions(-)
diff --git a/exec.c b/exec.c
index 7fb5d4e..aa54eb1 100644
--- a/exec.c
+++ b/exec.c
@@ -3571,7 +3571,7 @@ static void memory_map_init(void)
memory_region_init(system_io, "io", 65536);
set_system_io_map(system_io);
- memory_listener_register(&core_memory_listener);
+ memory_listener_register(&core_memory_listener, NULL);
}
MemoryRegion *get_system_memory(void)
diff --git a/hw/vhost.c b/hw/vhost.c
index e1e7e01..01f676a 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -774,7 +774,7 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
hdev->log_size = 0;
hdev->log_enabled = false;
hdev->started = false;
- memory_listener_register(&hdev->memory_listener);
+ memory_listener_register(&hdev->memory_listener, NULL);
hdev->force = force;
return 0;
fail:
diff --git a/kvm-all.c b/kvm-all.c
index a05e591..15bc42f 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1049,7 +1049,7 @@ int kvm_init(void)
}
kvm_state = s;
- memory_listener_register(&kvm_memory_listener);
+ memory_listener_register(&kvm_memory_listener, NULL);
s->many_ioeventfds = kvm_check_many_ioeventfds();
diff --git a/memory.c b/memory.c
index 4f854d4..e66e39a 100644
--- a/memory.c
+++ b/memory.c
@@ -84,7 +84,14 @@ static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2)
enum ListenerDirection { Forward, Reverse };
-#define MEMORY_LISTENER_CALL(_callback, _direction, _args...) \
+static bool memory_listener_match(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ return !listener->address_space_filter
+ || listener->address_space_filter == section->address_space;
+}
+
+#define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...) \
do { \
MemoryListener *_listener; \
\
@@ -105,15 +112,40 @@ static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2)
} \
} while (0)
+#define MEMORY_LISTENER_CALL(_callback, _direction, _section, _args...) \
+ do { \
+ MemoryListener *_listener; \
+ \
+ switch (_direction) { \
+ case Forward: \
+ QTAILQ_FOREACH(_listener, &memory_listeners, link) { \
+ if (memory_listener_match(_listener, _section)) { \
+ _listener->_callback(_listener, _section, ##_args); \
+ } \
+ } \
+ break; \
+ case Reverse: \
+ QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners, \
+ memory_listeners, link) { \
+ if (memory_listener_match(_listener, _section)) { \
+ _listener->_callback(_listener, _section, ##_args); \
+ } \
+ } \
+ break; \
+ default: \
+ abort(); \
+ } \
+ } while (0)
+
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback) \
- MEMORY_LISTENER_CALL(callback, dir, &(MemoryRegionSection) { \
+ MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) { \
.mr = (fr)->mr, \
.address_space = (as)->root, \
.offset_within_region = (fr)->offset_in_region, \
.size = int128_get64((fr)->addr.size), \
.offset_within_address_space = int128_get64((fr)->addr.start), \
.readonly = (fr)->readonly, \
- })
+ }))
struct CoalescedMemoryRange {
AddrRange addr;
@@ -1382,13 +1414,13 @@ void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
void memory_global_dirty_log_start(void)
{
global_dirty_log = true;
- MEMORY_LISTENER_CALL(log_global_start, Forward);
+ MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
}
void memory_global_dirty_log_stop(void)
{
global_dirty_log = false;
- MEMORY_LISTENER_CALL(log_global_stop, Reverse);
+ MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse);
}
static void listener_add_address_space(MemoryListener *listener,
@@ -1412,10 +1444,11 @@ static void listener_add_address_space(MemoryListener *listener,
}
}
-void memory_listener_register(MemoryListener *listener)
+void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
{
MemoryListener *other = NULL;
+ listener->address_space_filter = filter;
if (QTAILQ_EMPTY(&memory_listeners)
|| listener->priority >= QTAILQ_LAST(&memory_listeners,
memory_listeners)->priority) {
diff --git a/memory.h b/memory.h
index 1d99cee..bc9600b 100644
--- a/memory.h
+++ b/memory.h
@@ -193,6 +193,7 @@ struct MemoryListener {
bool match_data, uint64_t data, int fd);
/* Lower = earlier (during add), later (during del) */
unsigned priority;
+ MemoryRegion *address_space_filter;
QTAILQ_ENTRY(MemoryListener) link;
};
@@ -685,8 +686,9 @@ void memory_region_transaction_commit(void);
* space
*
* @listener: an object containing the callbacks to be called
+ * @filter: if non-%NULL, only regions in this address space will be observed
*/
-void memory_listener_register(MemoryListener *listener);
+void memory_listener_register(MemoryListener *listener, MemoryRegion *filter);
/**
* memory_listener_unregister: undo the effect of memory_listener_register()
diff --git a/xen-all.c b/xen-all.c
index e005b63..dd52f02 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -989,7 +989,7 @@ int xen_hvm_init(void)
state->memory_listener = xen_memory_listener;
QLIST_INIT(&state->physmap);
- memory_listener_register(&state->memory_listener);
+ memory_listener_register(&state->memory_listener, NULL);
state->log_for_dirtybit = NULL;
/* Initialize backend core & drivers */
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 02/20] xen: ignore I/O memory regions
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 01/20] memory: allow MemoryListeners to observe a specific address space Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 03/20] memory: split memory listener for the two address spaces Avi Kivity
` (17 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Avi Kivity <avi@redhat.com>
---
xen-all.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/xen-all.c b/xen-all.c
index dd52f02..a58a397 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -989,7 +989,7 @@ int xen_hvm_init(void)
state->memory_listener = xen_memory_listener;
QLIST_INIT(&state->physmap);
- memory_listener_register(&state->memory_listener, NULL);
+ memory_listener_register(&state->memory_listener, get_system_memory());
state->log_for_dirtybit = NULL;
/* Initialize backend core & drivers */
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 03/20] memory: split memory listener for the two address spaces
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 01/20] memory: allow MemoryListeners to observe a specific address space Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 02/20] xen: ignore I/O memory regions Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 04/20] memory: support stateless memory listeners Avi Kivity
` (16 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
The memory and I/O address spaces do different things, so split them into
two memory listeners.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 files changed, 66 insertions(+), 14 deletions(-)
diff --git a/exec.c b/exec.c
index aa54eb1..16973a6 100644
--- a/exec.c
+++ b/exec.c
@@ -3491,24 +3491,13 @@ static void io_mem_init(void)
static void core_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
- if (section->address_space == get_system_memory()) {
- cpu_register_physical_memory_log(section, section->readonly);
- } else {
- iorange_init(§ion->mr->iorange, &memory_region_iorange_ops,
- section->offset_within_address_space, section->size);
- ioport_register(§ion->mr->iorange);
- }
+ cpu_register_physical_memory_log(section, section->readonly);
}
static void core_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
- if (section->address_space == get_system_memory()) {
- cpu_register_physical_memory_log(section, false);
- } else {
- isa_unassign_ioport(section->offset_within_address_space,
- section->size);
- }
+ cpu_register_physical_memory_log(section, false);
}
static void core_log_start(MemoryListener *listener,
@@ -3548,6 +3537,55 @@ static void core_eventfd_del(MemoryListener *listener,
{
}
+static void io_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ iorange_init(§ion->mr->iorange, &memory_region_iorange_ops,
+ section->offset_within_address_space, section->size);
+ ioport_register(§ion->mr->iorange);
+}
+
+static void io_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ isa_unassign_ioport(section->offset_within_address_space, section->size);
+}
+
+static void io_log_start(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
+static void io_log_stop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
+static void io_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
+static void io_log_global_start(MemoryListener *listener)
+{
+}
+
+static void io_log_global_stop(MemoryListener *listener)
+{
+}
+
+static void io_eventfd_add(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data, int fd)
+{
+}
+
+static void io_eventfd_del(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data, int fd)
+{
+}
+
static MemoryListener core_memory_listener = {
.region_add = core_region_add,
.region_del = core_region_del,
@@ -3561,6 +3599,19 @@ static void core_eventfd_del(MemoryListener *listener,
.priority = 0,
};
+static MemoryListener io_memory_listener = {
+ .region_add = io_region_add,
+ .region_del = io_region_del,
+ .log_start = io_log_start,
+ .log_stop = io_log_stop,
+ .log_sync = io_log_sync,
+ .log_global_start = io_log_global_start,
+ .log_global_stop = io_log_global_stop,
+ .eventfd_add = io_eventfd_add,
+ .eventfd_del = io_eventfd_del,
+ .priority = 0,
+};
+
static void memory_map_init(void)
{
system_memory = g_malloc(sizeof(*system_memory));
@@ -3571,7 +3622,8 @@ static void memory_map_init(void)
memory_region_init(system_io, "io", 65536);
set_system_io_map(system_io);
- memory_listener_register(&core_memory_listener, NULL);
+ memory_listener_register(&core_memory_listener, system_memory);
+ memory_listener_register(&io_memory_listener, system_io);
}
MemoryRegion *get_system_memory(void)
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 04/20] memory: support stateless memory listeners
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (2 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 03/20] memory: split memory listener for the two address spaces Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 05/20] memory: change memory registration to rebuild the memory map on each change Avi Kivity
` (15 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Current memory listeners are incremental; that is, they are expected to
maintain their own state, and receive callbacks for changes to that state.
This patch adds support for stateless listeners; these work by receiving
a ->begin() callback (which tells them that new state is coming), a
sequence of ->region_add() and ->region_nop() callbacks, and then a
->commit() callback which signifies the end of the new state. They should
ignore ->region_del() callbacks.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 32 ++++++++++++++++++++++++++++++++
hw/vhost.c | 16 ++++++++++++++++
kvm-all.c | 16 ++++++++++++++++
memory.c | 5 +++++
memory.h | 3 +++
xen-all.c | 16 ++++++++++++++++
6 files changed, 88 insertions(+), 0 deletions(-)
diff --git a/exec.c b/exec.c
index 16973a6..6726afd 100644
--- a/exec.c
+++ b/exec.c
@@ -3488,6 +3488,14 @@ static void io_mem_init(void)
"watch", UINT64_MAX);
}
+static void core_begin(MemoryListener *listener)
+{
+}
+
+static void core_commit(MemoryListener *listener)
+{
+}
+
static void core_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -3500,6 +3508,11 @@ static void core_region_del(MemoryListener *listener,
cpu_register_physical_memory_log(section, false);
}
+static void core_region_nop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
static void core_log_start(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -3537,6 +3550,14 @@ static void core_eventfd_del(MemoryListener *listener,
{
}
+static void io_begin(MemoryListener *listener)
+{
+}
+
+static void io_commit(MemoryListener *listener)
+{
+}
+
static void io_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -3551,6 +3572,11 @@ static void io_region_del(MemoryListener *listener,
isa_unassign_ioport(section->offset_within_address_space, section->size);
}
+static void io_region_nop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
static void io_log_start(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -3587,8 +3613,11 @@ static void io_eventfd_del(MemoryListener *listener,
}
static MemoryListener core_memory_listener = {
+ .begin = core_begin,
+ .commit = core_commit,
.region_add = core_region_add,
.region_del = core_region_del,
+ .region_nop = core_region_nop,
.log_start = core_log_start,
.log_stop = core_log_stop,
.log_sync = core_log_sync,
@@ -3600,8 +3629,11 @@ static void io_eventfd_del(MemoryListener *listener,
};
static MemoryListener io_memory_listener = {
+ .begin = io_begin,
+ .commit = io_commit,
.region_add = io_region_add,
.region_del = io_region_del,
+ .region_nop = io_region_nop,
.log_start = io_log_start,
.log_stop = io_log_stop,
.log_sync = io_log_sync,
diff --git a/hw/vhost.c b/hw/vhost.c
index 01f676a..8d3ba5b 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -436,6 +436,14 @@ static bool vhost_section(MemoryRegionSection *section)
&& memory_region_is_ram(section->mr);
}
+static void vhost_begin(MemoryListener *listener)
+{
+}
+
+static void vhost_commit(MemoryListener *listener)
+{
+}
+
static void vhost_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -476,6 +484,11 @@ static void vhost_region_del(MemoryListener *listener,
}
}
+static void vhost_region_nop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
struct vhost_virtqueue *vq,
unsigned idx, bool enable_log)
@@ -756,8 +769,11 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
hdev->features = features;
hdev->memory_listener = (MemoryListener) {
+ .begin = vhost_begin,
+ .commit = vhost_commit,
.region_add = vhost_region_add,
.region_del = vhost_region_del,
+ .region_nop = vhost_region_nop,
.log_start = vhost_log_start,
.log_stop = vhost_log_stop,
.log_sync = vhost_log_sync,
diff --git a/kvm-all.c b/kvm-all.c
index 15bc42f..c07823d 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -680,6 +680,14 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
}
}
+static void kvm_begin(MemoryListener *listener)
+{
+}
+
+static void kvm_commit(MemoryListener *listener)
+{
+}
+
static void kvm_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -692,6 +700,11 @@ static void kvm_region_del(MemoryListener *listener,
kvm_set_phys_mem(section, false);
}
+static void kvm_region_nop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
static void kvm_log_sync(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -795,8 +808,11 @@ static void kvm_eventfd_del(MemoryListener *listener,
}
static MemoryListener kvm_memory_listener = {
+ .begin = kvm_begin,
+ .commit = kvm_commit,
.region_add = kvm_region_add,
.region_del = kvm_region_del,
+ .region_nop = kvm_region_nop,
.log_start = kvm_log_start,
.log_stop = kvm_log_stop,
.log_sync = kvm_log_sync,
diff --git a/memory.c b/memory.c
index e66e39a..414268e 100644
--- a/memory.c
+++ b/memory.c
@@ -676,6 +676,7 @@ static void address_space_update_topology_pass(AddressSpace *as,
/* In both (logging may have changed) */
if (adding) {
+ MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
if (frold->dirty_log_mask && !frnew->dirty_log_mask) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop);
} else if (frnew->dirty_log_mask && !frold->dirty_log_mask) {
@@ -722,6 +723,8 @@ static void memory_region_update_topology(MemoryRegion *mr)
return;
}
+ MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
+
if (address_space_memory.root) {
address_space_update_topology(&address_space_memory);
}
@@ -729,6 +732,8 @@ static void memory_region_update_topology(MemoryRegion *mr)
address_space_update_topology(&address_space_io);
}
+ MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
+
memory_region_update_pending = false;
}
diff --git a/memory.h b/memory.h
index bc9600b..b7bccd1 100644
--- a/memory.h
+++ b/memory.h
@@ -180,8 +180,11 @@ typedef struct MemoryListener MemoryListener;
* Use with memory_listener_register() and memory_listener_unregister().
*/
struct MemoryListener {
+ void (*begin)(MemoryListener *listener);
+ void (*commit)(MemoryListener *listener);
void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
+ void (*region_nop)(MemoryListener *listener, MemoryRegionSection *section);
void (*log_start)(MemoryListener *listener, MemoryRegionSection *section);
void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section);
void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section);
diff --git a/xen-all.c b/xen-all.c
index a58a397..6a11342 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -394,6 +394,14 @@ static void xen_set_memory(struct MemoryListener *listener,
}
}
+static void xen_begin(MemoryListener *listener)
+{
+}
+
+static void xen_commit(MemoryListener *listener)
+{
+}
+
static void xen_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -406,6 +414,11 @@ static void xen_region_del(MemoryListener *listener,
xen_set_memory(listener, section, false);
}
+static void xen_region_nop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
static void xen_sync_dirty_bitmap(XenIOState *state,
target_phys_addr_t start_addr,
ram_addr_t size)
@@ -500,8 +513,11 @@ static void xen_eventfd_del(MemoryListener *listener,
}
static MemoryListener xen_memory_listener = {
+ .begin = xen_begin,
+ .commit = xen_commit,
.region_add = xen_region_add,
.region_del = xen_region_del,
+ .region_nop = xen_region_nop,
.log_start = xen_log_start,
.log_stop = xen_log_stop,
.log_sync = xen_log_sync,
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 05/20] memory: change memory registration to rebuild the memory map on each change
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (3 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 04/20] memory: support stateless memory listeners Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 06/20] memory: remove first level of l1_phys_map Avi Kivity
` (14 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Instead of incrementally building the memory map, rebuild it every time.
This allows later simplification, since the code need not consider overlaying
a previous mapping. It is also RCU friendly.
With large memory guests this can get expensive, since the operation is
O(mem size), but this will be optimized later.
As a side effect subpage and L2 leaks are fixed here.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 49 insertions(+), 1 deletions(-)
diff --git a/exec.c b/exec.c
index 6726afd..b36c301 100644
--- a/exec.c
+++ b/exec.c
@@ -2520,6 +2520,53 @@ static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
} \
} while (0)
+static void destroy_page_desc(PhysPageDesc pd)
+{
+ unsigned io_index = pd.phys_offset & ~TARGET_PAGE_MASK;
+ MemoryRegion *mr = io_mem_region[io_index];
+
+ if (mr->subpage) {
+ subpage_t *subpage = container_of(mr, subpage_t, iomem);
+ memory_region_destroy(&subpage->iomem);
+ g_free(subpage);
+ }
+}
+
+static void destroy_l2_mapping(void **lp, unsigned level)
+{
+ unsigned i;
+ void **p;
+ PhysPageDesc *pd;
+
+ if (!*lp) {
+ return;
+ }
+
+ if (level > 0) {
+ p = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ destroy_l2_mapping(&p[i], level - 1);
+ }
+ g_free(p);
+ } else {
+ pd = *lp;
+ for (i = 0; i < L2_SIZE; ++i) {
+ destroy_page_desc(pd[i]);
+ }
+ g_free(pd);
+ }
+ *lp = NULL;
+}
+
+static void destroy_all_mappings(void)
+{
+ unsigned i;
+
+ for (i = 0; i < P_L1_SIZE; ++i) {
+ destroy_l2_mapping(&l1_phys_map[i], P_L1_SHIFT / L2_BITS - 1);
+ }
+}
+
/* register physical memory.
For RAM, 'size' must be a multiple of the target page size.
If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
@@ -3490,6 +3537,7 @@ static void io_mem_init(void)
static void core_begin(MemoryListener *listener)
{
+ destroy_all_mappings();
}
static void core_commit(MemoryListener *listener)
@@ -3505,12 +3553,12 @@ static void core_region_add(MemoryListener *listener,
static void core_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
- cpu_register_physical_memory_log(section, false);
}
static void core_region_nop(MemoryListener *listener,
MemoryRegionSection *section)
{
+ cpu_register_physical_memory_log(section, section->readonly);
}
static void core_log_start(MemoryListener *listener,
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 06/20] memory: remove first level of l1_phys_map
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (4 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 05/20] memory: change memory registration to rebuild the memory map on each change Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 07/20] memory: unify phys_map last level with intermediate levels Avi Kivity
` (13 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
L1 and the lower levels in l1_phys_map are equivalent, except that L1 has
a different size, and is always allocated. Simplify the code by removing
L1. This leaves us with a tree composed solely of L2 tables, but that
problem can be renamed away later.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 29 ++++++++---------------------
1 files changed, 8 insertions(+), 21 deletions(-)
diff --git a/exec.c b/exec.c
index b36c301..c541ee7 100644
--- a/exec.c
+++ b/exec.c
@@ -160,29 +160,21 @@
#define L2_BITS 10
#define L2_SIZE (1 << L2_BITS)
+#define P_L2_LEVELS \
+ (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
+
/* The bits remaining after N lower levels of page tables. */
-#define P_L1_BITS_REM \
- ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
#define V_L1_BITS_REM \
((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
-/* Size of the L1 page table. Avoid silly small sizes. */
-#if P_L1_BITS_REM < 4
-#define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
-#else
-#define P_L1_BITS P_L1_BITS_REM
-#endif
-
#if V_L1_BITS_REM < 4
#define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
#else
#define V_L1_BITS V_L1_BITS_REM
#endif
-#define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
#define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
-#define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
unsigned long qemu_real_host_page_size;
@@ -202,7 +194,7 @@
/* This is a multi-level map on the physical address space.
The bottom level has pointers to PhysPageDesc. */
-static void *l1_phys_map[P_L1_SIZE];
+static void *phys_map;
static void io_mem_init(void);
static void memory_map_init(void);
@@ -404,11 +396,10 @@ static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
void **lp;
int i;
- /* Level 1. Always allocated. */
- lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
+ lp = &phys_map;
- /* Level 2..N-1. */
- for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+ /* Level 1..N-1. */
+ for (i = P_L2_LEVELS - 1; i > 0; i--) {
void **p = *lp;
if (p == NULL) {
if (!alloc) {
@@ -2560,11 +2551,7 @@ static void destroy_l2_mapping(void **lp, unsigned level)
static void destroy_all_mappings(void)
{
- unsigned i;
-
- for (i = 0; i < P_L1_SIZE; ++i) {
- destroy_l2_mapping(&l1_phys_map[i], P_L1_SHIFT / L2_BITS - 1);
- }
+ destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
}
/* register physical memory.
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 07/20] memory: unify phys_map last level with intermediate levels
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (5 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 06/20] memory: remove first level of l1_phys_map Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map Avi Kivity
` (12 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
This lays the groundwork for storing leaf data in intermediate levels,
saving space.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 78 ++++++++++++++++++++++++++++-----------------------------------
1 files changed, 35 insertions(+), 43 deletions(-)
diff --git a/exec.c b/exec.c
index c541ee7..536e70f 100644
--- a/exec.c
+++ b/exec.c
@@ -192,9 +192,18 @@
ram_addr_t region_offset;
} PhysPageDesc;
+typedef struct PhysPageEntry PhysPageEntry;
+
+struct PhysPageEntry {
+ union {
+ PhysPageDesc leaf;
+ PhysPageEntry *node;
+ } u;
+};
+
/* This is a multi-level map on the physical address space.
The bottom level has pointers to PhysPageDesc. */
-static void *phys_map;
+static PhysPageEntry phys_map;
static void io_mem_init(void);
static void memory_map_init(void);
@@ -392,42 +401,31 @@ static inline PageDesc *page_find(tb_page_addr_t index)
#if !defined(CONFIG_USER_ONLY)
static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
{
- PhysPageDesc *pd;
- void **lp;
- int i;
+ PhysPageEntry *lp, *p;
+ int i, j;
lp = &phys_map;
- /* Level 1..N-1. */
- for (i = P_L2_LEVELS - 1; i > 0; i--) {
- void **p = *lp;
- if (p == NULL) {
+ /* Level 1..N. */
+ for (i = P_L2_LEVELS - 1; i >= 0; i--) {
+ if (lp->u.node == NULL) {
if (!alloc) {
return NULL;
}
- *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
- }
- lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
- }
-
- pd = *lp;
- if (pd == NULL) {
- int i;
- int first_index = index & ~(L2_SIZE - 1);
-
- if (!alloc) {
- return NULL;
- }
-
- *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
-
- for (i = 0; i < L2_SIZE; i++) {
- pd[i].phys_offset = io_mem_unassigned.ram_addr;
- pd[i].region_offset = (first_index + i) << TARGET_PAGE_BITS;
+ lp->u.node = p = g_malloc0(sizeof(PhysPageEntry) * L2_SIZE);
+ if (i == 0) {
+ int first_index = index & ~(L2_SIZE - 1);
+ for (j = 0; j < L2_SIZE; j++) {
+ p[j].u.leaf.phys_offset = io_mem_unassigned.ram_addr;
+ p[j].u.leaf.region_offset
+ = (first_index + j) << TARGET_PAGE_BITS;
+ }
+ }
}
+ lp = &lp->u.node[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
}
- return pd + (index & (L2_SIZE - 1));
+ return &lp->u.leaf;
}
static inline PhysPageDesc phys_page_find(target_phys_addr_t index)
@@ -2523,30 +2521,24 @@ static void destroy_page_desc(PhysPageDesc pd)
}
}
-static void destroy_l2_mapping(void **lp, unsigned level)
+static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
{
unsigned i;
- void **p;
- PhysPageDesc *pd;
+ PhysPageEntry *p = lp->u.node;
- if (!*lp) {
+ if (!p) {
return;
}
- if (level > 0) {
- p = *lp;
- for (i = 0; i < L2_SIZE; ++i) {
+ for (i = 0; i < L2_SIZE; ++i) {
+ if (level > 0) {
destroy_l2_mapping(&p[i], level - 1);
+ } else {
+ destroy_page_desc(p[i].u.leaf);
}
- g_free(p);
- } else {
- pd = *lp;
- for (i = 0; i < L2_SIZE; ++i) {
- destroy_page_desc(pd[i]);
- }
- g_free(pd);
}
- *lp = NULL;
+ g_free(p);
+ lp->u.node = NULL;
}
static void destroy_all_mappings(void)
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (6 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 07/20] memory: unify phys_map last level with intermediate levels Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-03-07 17:49 ` Peter Maydell
2012-02-14 9:27 ` [Qemu-devel] [PATCH 09/20] memory: compress phys_map node pointers to 16 bits Avi Kivity
` (11 subsequent siblings)
19 siblings, 1 reply; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Instead of storing PhysPageDesc, store pointers to MemoryRegionSections.
The various offsets (phys_offset & ~TARGET_PAGE_MASK,
PHYS_OFFSET & TARGET_PAGE_MASK, region_offset) can all be synthesized
from the information in a MemoryRegionSection. Adjust phys_page_find()
to synthesize a PhysPageDesc.
The upshot is that phys_map now contains uniform values, so it's easier
to generate and compress.
The end result is somewhat clumsy but this will be improved as we we
propagate MemoryRegionSections throughout the code instead of transforming
them to PhysPageDesc.
The MemoryRegionSection pointers are stored as uint16_t offsets in an
array. This saves space (when we also compress node pointers) and is
more cache friendly.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 187 ++++++++++++++++++++++++++++++++++++---------------------------
1 files changed, 107 insertions(+), 80 deletions(-)
diff --git a/exec.c b/exec.c
index 536e70f..957bc6d 100644
--- a/exec.c
+++ b/exec.c
@@ -194,9 +194,13 @@
typedef struct PhysPageEntry PhysPageEntry;
+static MemoryRegionSection *phys_sections;
+static unsigned phys_sections_nb, phys_sections_nb_alloc;
+static uint16_t phys_section_unassigned;
+
struct PhysPageEntry {
union {
- PhysPageDesc leaf;
+ uint16_t leaf; /* index into phys_sections */
PhysPageEntry *node;
} u;
};
@@ -399,7 +403,7 @@ static inline PageDesc *page_find(tb_page_addr_t index)
}
#if !defined(CONFIG_USER_ONLY)
-static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
+static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
{
PhysPageEntry *lp, *p;
int i, j;
@@ -414,11 +418,8 @@ static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
}
lp->u.node = p = g_malloc0(sizeof(PhysPageEntry) * L2_SIZE);
if (i == 0) {
- int first_index = index & ~(L2_SIZE - 1);
for (j = 0; j < L2_SIZE; j++) {
- p[j].u.leaf.phys_offset = io_mem_unassigned.ram_addr;
- p[j].u.leaf.region_offset
- = (first_index + j) << TARGET_PAGE_BITS;
+ p[j].u.leaf = phys_section_unassigned;
}
}
}
@@ -430,16 +431,31 @@ static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
static inline PhysPageDesc phys_page_find(target_phys_addr_t index)
{
- PhysPageDesc *p = phys_page_find_alloc(index, 0);
+ uint16_t *p = phys_page_find_alloc(index, 0);
+ uint16_t s_index = phys_section_unassigned;
+ MemoryRegionSection *section;
+ PhysPageDesc pd;
if (p) {
- return *p;
- } else {
- return (PhysPageDesc) {
- .phys_offset = io_mem_unassigned.ram_addr,
- .region_offset = index << TARGET_PAGE_BITS,
- };
+ s_index = *p;
+ }
+ section = &phys_sections[s_index];
+ index <<= TARGET_PAGE_BITS;
+ assert(section->offset_within_address_space <= index
+ && index <= section->offset_within_address_space + section->size-1);
+ pd.phys_offset = section->mr->ram_addr;
+ pd.region_offset = (index - section->offset_within_address_space)
+ + section->offset_within_region;
+ if (memory_region_is_ram(section->mr)) {
+ pd.phys_offset += pd.region_offset;
+ pd.region_offset = 0;
+ } else if (section->mr->rom_device) {
+ pd.phys_offset += pd.region_offset;
}
+ if (section->readonly) {
+ pd.phys_offset |= io_mem_rom.ram_addr;
+ }
+ return pd;
}
static void tlb_protect_code(ram_addr_t ram_addr);
@@ -2480,15 +2496,13 @@ static inline void tlb_set_dirty(CPUState *env,
typedef struct subpage_t {
MemoryRegion iomem;
target_phys_addr_t base;
- ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
- ram_addr_t region_offset[TARGET_PAGE_SIZE];
+ uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
- ram_addr_t memory, ram_addr_t region_offset);
-static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
- ram_addr_t orig_memory,
- ram_addr_t region_offset);
+ uint16_t section);
+static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section,
+ uint16_t orig_section);
#define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
need_subpage) \
do { \
@@ -2509,10 +2523,10 @@ static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
} \
} while (0)
-static void destroy_page_desc(PhysPageDesc pd)
+static void destroy_page_desc(uint16_t section_index)
{
- unsigned io_index = pd.phys_offset & ~TARGET_PAGE_MASK;
- MemoryRegion *mr = io_mem_region[io_index];
+ MemoryRegionSection *section = &phys_sections[section_index];
+ MemoryRegion *mr = section->mr;
if (mr->subpage) {
subpage_t *subpage = container_of(mr, subpage_t, iomem);
@@ -2546,6 +2560,22 @@ static void destroy_all_mappings(void)
destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
}
+static uint16_t phys_section_add(MemoryRegionSection *section)
+{
+ if (phys_sections_nb == phys_sections_nb_alloc) {
+ phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
+ phys_sections = g_renew(MemoryRegionSection, phys_sections,
+ phys_sections_nb_alloc);
+ }
+ phys_sections[phys_sections_nb] = *section;
+ return phys_sections_nb++;
+}
+
+static void phys_sections_clear(void)
+{
+ phys_sections_nb = 0;
+}
+
/* register physical memory.
For RAM, 'size' must be a multiple of the target page size.
If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
@@ -2559,67 +2589,46 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
{
target_phys_addr_t start_addr = section->offset_within_address_space;
ram_addr_t size = section->size;
- ram_addr_t phys_offset = section->mr->ram_addr;
- ram_addr_t region_offset = section->offset_within_region;
target_phys_addr_t addr, end_addr;
- PhysPageDesc *p;
+ uint16_t *p;
CPUState *env;
ram_addr_t orig_size = size;
subpage_t *subpage;
-
- if (memory_region_is_ram(section->mr)) {
- phys_offset += region_offset;
- region_offset = 0;
- }
-
- if (readonly) {
- phys_offset |= io_mem_rom.ram_addr;
- }
+ uint16_t section_index = phys_section_add(section);
assert(size);
- if (phys_offset == io_mem_unassigned.ram_addr) {
- region_offset = start_addr;
- }
- region_offset &= TARGET_PAGE_MASK;
size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
end_addr = start_addr + (target_phys_addr_t)size;
addr = start_addr;
do {
p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 0);
- if (p && p->phys_offset != io_mem_unassigned.ram_addr) {
- ram_addr_t orig_memory = p->phys_offset;
+ if (p && *p != phys_section_unassigned) {
+ uint16_t orig_memory= *p;
target_phys_addr_t start_addr2, end_addr2;
int need_subpage = 0;
- MemoryRegion *mr = io_mem_region[orig_memory & ~TARGET_PAGE_MASK];
+ MemoryRegion *mr = phys_sections[orig_memory].mr;
CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
need_subpage);
if (need_subpage) {
if (!(mr->subpage)) {
subpage = subpage_init((addr & TARGET_PAGE_MASK),
- &p->phys_offset, orig_memory,
- p->region_offset);
+ p, orig_memory);
} else {
subpage = container_of(mr, subpage_t, iomem);
}
- subpage_register(subpage, start_addr2, end_addr2, phys_offset,
- region_offset);
- p->region_offset = 0;
+ subpage_register(subpage, start_addr2, end_addr2,
+ section_index);
} else {
- p->phys_offset = phys_offset;
- p->region_offset = region_offset;
- if (is_ram_rom_romd(phys_offset))
- phys_offset += TARGET_PAGE_SIZE;
+ *p = section_index;
}
} else {
+ MemoryRegion *mr = section->mr;
p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
- p->phys_offset = phys_offset;
- p->region_offset = region_offset;
- if (is_ram_rom_romd(phys_offset)) {
- phys_offset += TARGET_PAGE_SIZE;
- } else {
+ *p = section_index;
+ if (!(memory_region_is_ram(mr) || mr->rom_device)) {
target_phys_addr_t start_addr2, end_addr2;
int need_subpage = 0;
@@ -2628,16 +2637,12 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
if (need_subpage) {
subpage = subpage_init((addr & TARGET_PAGE_MASK),
- &p->phys_offset,
- io_mem_unassigned.ram_addr,
- addr & TARGET_PAGE_MASK);
+ p, phys_section_unassigned);
subpage_register(subpage, start_addr2, end_addr2,
- phys_offset, region_offset);
- p->region_offset = 0;
+ section_index);
}
}
}
- region_offset += TARGET_PAGE_SIZE;
addr += TARGET_PAGE_SIZE;
} while (addr != end_addr);
@@ -3333,14 +3338,17 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
{
subpage_t *mmio = opaque;
unsigned int idx = SUBPAGE_IDX(addr);
+ MemoryRegionSection *section;
#if defined(DEBUG_SUBPAGE)
printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
mmio, len, addr, idx);
#endif
- addr += mmio->region_offset[idx];
- idx = mmio->sub_io_index[idx];
- return io_mem_read(idx, addr, len);
+ section = &phys_sections[mmio->sub_section[idx]];
+ addr += mmio->base;
+ addr -= section->offset_within_address_space;
+ addr += section->offset_within_region;
+ return io_mem_read(section->mr->ram_addr, addr, len);
}
static void subpage_write(void *opaque, target_phys_addr_t addr,
@@ -3348,15 +3356,18 @@ static void subpage_write(void *opaque, target_phys_addr_t addr,
{
subpage_t *mmio = opaque;
unsigned int idx = SUBPAGE_IDX(addr);
+ MemoryRegionSection *section;
#if defined(DEBUG_SUBPAGE)
printf("%s: subpage %p len %d addr " TARGET_FMT_plx
" idx %d value %"PRIx64"\n",
__func__, mmio, len, addr, idx, value);
#endif
- addr += mmio->region_offset[idx];
- idx = mmio->sub_io_index[idx];
- io_mem_write(idx, addr, value, len);
+ section = &phys_sections[mmio->sub_section[idx]];
+ addr += mmio->base;
+ addr -= section->offset_within_address_space;
+ addr += section->offset_within_region;
+ io_mem_write(section->mr->ram_addr, addr, value, len);
}
static const MemoryRegionOps subpage_ops = {
@@ -3398,7 +3409,7 @@ static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
};
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
- ram_addr_t memory, ram_addr_t region_offset)
+ uint16_t section)
{
int idx, eidx;
@@ -3410,24 +3421,26 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
mmio, start, end, idx, eidx, memory);
#endif
- if ((memory & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
- memory = io_mem_subpage_ram.ram_addr;
+ if (memory_region_is_ram(phys_sections[section].mr)) {
+ MemoryRegionSection new_section = phys_sections[section];
+ new_section.mr = &io_mem_subpage_ram;
+ section = phys_section_add(&new_section);
}
- memory &= IO_MEM_NB_ENTRIES - 1;
for (; idx <= eidx; idx++) {
- mmio->sub_io_index[idx] = memory;
- mmio->region_offset[idx] = region_offset;
+ mmio->sub_section[idx] = section;
}
return 0;
}
-static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
- ram_addr_t orig_memory,
- ram_addr_t region_offset)
+static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section_ind,
+ uint16_t orig_section)
{
subpage_t *mmio;
- int subpage_memory;
+ MemoryRegionSection section = {
+ .offset_within_address_space = base,
+ .size = TARGET_PAGE_SIZE,
+ };
mmio = g_malloc0(sizeof(subpage_t));
@@ -3435,13 +3448,13 @@ static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
"subpage", TARGET_PAGE_SIZE);
mmio->iomem.subpage = true;
- subpage_memory = mmio->iomem.ram_addr;
+ section.mr = &mmio->iomem;
#if defined(DEBUG_SUBPAGE)
printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
mmio, base, TARGET_PAGE_SIZE, subpage_memory);
#endif
- *phys = subpage_memory;
- subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
+ *section_ind = phys_section_add(§ion);
+ subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_section);
return mmio;
}
@@ -3493,6 +3506,18 @@ void cpu_unregister_io_memory(int io_index)
io_mem_used[io_index] = 0;
}
+static uint16_t dummy_section(MemoryRegion *mr)
+{
+ MemoryRegionSection section = {
+ .mr = mr,
+ .offset_within_address_space = 0,
+ .offset_within_region = 0,
+ .size = UINT64_MAX,
+ };
+
+ return phys_section_add(§ion);
+}
+
static void io_mem_init(void)
{
int i;
@@ -3517,6 +3542,8 @@ static void io_mem_init(void)
static void core_begin(MemoryListener *listener)
{
destroy_all_mappings();
+ phys_sections_clear();
+ phys_section_unassigned = dummy_section(&io_mem_unassigned);
}
static void core_commit(MemoryListener *listener)
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 09/20] memory: compress phys_map node pointers to 16 bits
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (7 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 10/20] memory: fix RAM subpages in newly initialized pages Avi Kivity
` (10 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Use an expanding vector to store nodes. Allocation is baroque to g_renew()
potentially invalidating pointers; this will be addressed later.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 54 +++++++++++++++++++++++++++++++++++++++++++++---------
1 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/exec.c b/exec.c
index 957bc6d..0756919 100644
--- a/exec.c
+++ b/exec.c
@@ -201,13 +201,19 @@
struct PhysPageEntry {
union {
uint16_t leaf; /* index into phys_sections */
- PhysPageEntry *node;
+ uint16_t node; /* index into phys_map_nodes */
} u;
};
+/* Simple allocator for PhysPageEntry nodes */
+static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
+static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
+
+#define PHYS_MAP_NODE_NIL ((uint16_t)~0)
+
/* This is a multi-level map on the physical address space.
The bottom level has pointers to PhysPageDesc. */
-static PhysPageEntry phys_map;
+static PhysPageEntry phys_map = { .u.node = PHYS_MAP_NODE_NIL };
static void io_mem_init(void);
static void memory_map_init(void);
@@ -403,6 +409,32 @@ static inline PageDesc *page_find(tb_page_addr_t index)
}
#if !defined(CONFIG_USER_ONLY)
+
+static PhysPageEntry *phys_map_node_alloc(uint16_t *ptr)
+{
+ unsigned i;
+ uint16_t ret;
+
+ /* Assign early to avoid the pointer being invalidated by g_renew() */
+ *ptr = ret = phys_map_nodes_nb++;
+ assert(ret != PHYS_MAP_NODE_NIL);
+ if (ret == phys_map_nodes_nb_alloc) {
+ typedef PhysPageEntry Node[L2_SIZE];
+ phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
+ phys_map_nodes = g_renew(Node, phys_map_nodes,
+ phys_map_nodes_nb_alloc);
+ }
+ for (i = 0; i < L2_SIZE; ++i) {
+ phys_map_nodes[ret][i].u.node = PHYS_MAP_NODE_NIL;
+ }
+ return phys_map_nodes[ret];
+}
+
+static void phys_map_nodes_reset(void)
+{
+ phys_map_nodes_nb = 0;
+}
+
static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
{
PhysPageEntry *lp, *p;
@@ -412,18 +444,20 @@ static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
/* Level 1..N. */
for (i = P_L2_LEVELS - 1; i >= 0; i--) {
- if (lp->u.node == NULL) {
+ if (lp->u.node == PHYS_MAP_NODE_NIL) {
if (!alloc) {
return NULL;
}
- lp->u.node = p = g_malloc0(sizeof(PhysPageEntry) * L2_SIZE);
+ p = phys_map_node_alloc(&lp->u.node);
if (i == 0) {
for (j = 0; j < L2_SIZE; j++) {
p[j].u.leaf = phys_section_unassigned;
}
}
+ } else {
+ p = phys_map_nodes[lp->u.node];
}
- lp = &lp->u.node[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
+ lp = &p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
}
return &lp->u.leaf;
@@ -2538,12 +2572,13 @@ static void destroy_page_desc(uint16_t section_index)
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
{
unsigned i;
- PhysPageEntry *p = lp->u.node;
+ PhysPageEntry *p;
- if (!p) {
+ if (lp->u.node == PHYS_MAP_NODE_NIL) {
return;
}
+ p = phys_map_nodes[lp->u.node];
for (i = 0; i < L2_SIZE; ++i) {
if (level > 0) {
destroy_l2_mapping(&p[i], level - 1);
@@ -2551,13 +2586,13 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
destroy_page_desc(p[i].u.leaf);
}
}
- g_free(p);
- lp->u.node = NULL;
+ lp->u.node = PHYS_MAP_NODE_NIL;
}
static void destroy_all_mappings(void)
{
destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
+ phys_map_nodes_reset();
}
static uint16_t phys_section_add(MemoryRegionSection *section)
@@ -3543,6 +3578,7 @@ static void core_begin(MemoryListener *listener)
{
destroy_all_mappings();
phys_sections_clear();
+ phys_map.u.node = PHYS_MAP_NODE_NIL;
phys_section_unassigned = dummy_section(&io_mem_unassigned);
}
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 10/20] memory: fix RAM subpages in newly initialized pages
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (8 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 09/20] memory: compress phys_map node pointers to 16 bits Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 11/20] memory: unify the two branches of cpu_register_physical_memory_log() Avi Kivity
` (9 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
If the first subpage installed in a page is RAM, then we install it as
a full page, instead of a subpage. Fix by not special casing RAM.
The issue dates to commit db7b5426a4b4242, which introduced subpages.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 22 ++++++++++------------
1 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/exec.c b/exec.c
index 0756919..5d0afdb 100644
--- a/exec.c
+++ b/exec.c
@@ -2660,22 +2660,20 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
*p = section_index;
}
} else {
- MemoryRegion *mr = section->mr;
+ target_phys_addr_t start_addr2, end_addr2;
+ int need_subpage = 0;
+
p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
*p = section_index;
- if (!(memory_region_is_ram(mr) || mr->rom_device)) {
- target_phys_addr_t start_addr2, end_addr2;
- int need_subpage = 0;
- CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
- end_addr2, need_subpage);
+ CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
+ end_addr2, need_subpage);
- if (need_subpage) {
- subpage = subpage_init((addr & TARGET_PAGE_MASK),
- p, phys_section_unassigned);
- subpage_register(subpage, start_addr2, end_addr2,
- section_index);
- }
+ if (need_subpage) {
+ subpage = subpage_init((addr & TARGET_PAGE_MASK),
+ p, phys_section_unassigned);
+ subpage_register(subpage, start_addr2, end_addr2,
+ section_index);
}
}
addr += TARGET_PAGE_SIZE;
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 11/20] memory: unify the two branches of cpu_register_physical_memory_log()
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (9 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 10/20] memory: fix RAM subpages in newly initialized pages Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 12/20] memory: move tlb flush to MemoryListener commit callback Avi Kivity
` (8 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Identical except that the second branch knows its not modifying an existing
subpage.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 49 +++++++++++++++----------------------------------
1 files changed, 15 insertions(+), 34 deletions(-)
diff --git a/exec.c b/exec.c
index 5d0afdb..6232a39 100644
--- a/exec.c
+++ b/exec.c
@@ -2625,7 +2625,6 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
target_phys_addr_t start_addr = section->offset_within_address_space;
ram_addr_t size = section->size;
target_phys_addr_t addr, end_addr;
- uint16_t *p;
CPUState *env;
ram_addr_t orig_size = size;
subpage_t *subpage;
@@ -2638,43 +2637,25 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
addr = start_addr;
do {
- p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 0);
- if (p && *p != phys_section_unassigned) {
- uint16_t orig_memory= *p;
- target_phys_addr_t start_addr2, end_addr2;
- int need_subpage = 0;
- MemoryRegion *mr = phys_sections[orig_memory].mr;
-
- CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
- need_subpage);
- if (need_subpage) {
- if (!(mr->subpage)) {
- subpage = subpage_init((addr & TARGET_PAGE_MASK),
- p, orig_memory);
- } else {
- subpage = container_of(mr, subpage_t, iomem);
- }
- subpage_register(subpage, start_addr2, end_addr2,
- section_index);
+ uint16_t *p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+ uint16_t orig_memory = *p;
+ target_phys_addr_t start_addr2, end_addr2;
+ int need_subpage = 0;
+ MemoryRegion *mr = phys_sections[orig_memory].mr;
+
+ CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
+ need_subpage);
+ if (need_subpage) {
+ if (!(mr->subpage)) {
+ subpage = subpage_init((addr & TARGET_PAGE_MASK),
+ p, orig_memory);
} else {
- *p = section_index;
+ subpage = container_of(mr, subpage_t, iomem);
}
+ subpage_register(subpage, start_addr2, end_addr2,
+ section_index);
} else {
- target_phys_addr_t start_addr2, end_addr2;
- int need_subpage = 0;
-
- p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
*p = section_index;
-
- CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
- end_addr2, need_subpage);
-
- if (need_subpage) {
- subpage = subpage_init((addr & TARGET_PAGE_MASK),
- p, phys_section_unassigned);
- subpage_register(subpage, start_addr2, end_addr2,
- section_index);
- }
}
addr += TARGET_PAGE_SIZE;
} while (addr != end_addr);
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 12/20] memory: move tlb flush to MemoryListener commit callback
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (10 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 11/20] memory: unify the two branches of cpu_register_physical_memory_log() Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 13/20] memory: make phys_page_find() return a MemoryRegionSection Avi Kivity
` (7 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
This way, if we have several changes in a single transaction, we flush just
once.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 16 ++++++++--------
1 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/exec.c b/exec.c
index 6232a39..2171eba 100644
--- a/exec.c
+++ b/exec.c
@@ -2625,7 +2625,6 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
target_phys_addr_t start_addr = section->offset_within_address_space;
ram_addr_t size = section->size;
target_phys_addr_t addr, end_addr;
- CPUState *env;
ram_addr_t orig_size = size;
subpage_t *subpage;
uint16_t section_index = phys_section_add(section);
@@ -2659,13 +2658,6 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
}
addr += TARGET_PAGE_SIZE;
} while (addr != end_addr);
-
- /* since each CPU stores ram addresses in its TLB cache, we must
- reset the modified entries */
- /* XXX: slow ! */
- for(env = first_cpu; env != NULL; env = env->next_cpu) {
- tlb_flush(env, 1);
- }
}
void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
@@ -3563,6 +3555,14 @@ static void core_begin(MemoryListener *listener)
static void core_commit(MemoryListener *listener)
{
+ CPUState *env;
+
+ /* since each CPU stores ram addresses in its TLB cache, we must
+ reset the modified entries */
+ /* XXX: slow ! */
+ for(env = first_cpu; env != NULL; env = env->next_cpu) {
+ tlb_flush(env, 1);
+ }
}
static void core_region_add(MemoryListener *listener,
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 13/20] memory: make phys_page_find() return a MemoryRegionSection
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (11 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 12/20] memory: move tlb flush to MemoryListener commit callback Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 14/20] memory: give phys_page_find() its own tree search loop Avi Kivity
` (6 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
We no longer describe memory in terms of individual pages; use sections
throughout instead.
PhysPageDesc no longer used - remove.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 299 ++++++++++++++++++++++++++++++++++------------------------------
1 files changed, 160 insertions(+), 139 deletions(-)
diff --git a/exec.c b/exec.c
index 2171eba..bf34dc9 100644
--- a/exec.c
+++ b/exec.c
@@ -186,12 +186,6 @@
static void *l1_map[V_L1_SIZE];
#if !defined(CONFIG_USER_ONLY)
-typedef struct PhysPageDesc {
- /* offset in host memory of the page + io_index in the low bits */
- ram_addr_t phys_offset;
- ram_addr_t region_offset;
-} PhysPageDesc;
-
typedef struct PhysPageEntry PhysPageEntry;
static MemoryRegionSection *phys_sections;
@@ -212,7 +206,7 @@ struct PhysPageEntry {
#define PHYS_MAP_NODE_NIL ((uint16_t)~0)
/* This is a multi-level map on the physical address space.
- The bottom level has pointers to PhysPageDesc. */
+ The bottom level has pointers to MemoryRegionSections. */
static PhysPageEntry phys_map = { .u.node = PHYS_MAP_NODE_NIL };
static void io_mem_init(void);
@@ -463,33 +457,25 @@ static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
return &lp->u.leaf;
}
-static inline PhysPageDesc phys_page_find(target_phys_addr_t index)
+static MemoryRegionSection phys_page_find(target_phys_addr_t index)
{
uint16_t *p = phys_page_find_alloc(index, 0);
uint16_t s_index = phys_section_unassigned;
- MemoryRegionSection *section;
- PhysPageDesc pd;
+ MemoryRegionSection section;
+ target_phys_addr_t delta;
if (p) {
s_index = *p;
}
- section = &phys_sections[s_index];
+ section = phys_sections[s_index];
index <<= TARGET_PAGE_BITS;
- assert(section->offset_within_address_space <= index
- && index <= section->offset_within_address_space + section->size-1);
- pd.phys_offset = section->mr->ram_addr;
- pd.region_offset = (index - section->offset_within_address_space)
- + section->offset_within_region;
- if (memory_region_is_ram(section->mr)) {
- pd.phys_offset += pd.region_offset;
- pd.region_offset = 0;
- } else if (section->mr->rom_device) {
- pd.phys_offset += pd.region_offset;
- }
- if (section->readonly) {
- pd.phys_offset |= io_mem_rom.ram_addr;
- }
- return pd;
+ assert(section.offset_within_address_space <= index
+ && index <= section.offset_within_address_space + section.size-1);
+ delta = index - section.offset_within_address_space;
+ section.offset_within_address_space += delta;
+ section.offset_within_region += delta;
+ section.size -= delta;
+ return section;
}
static void tlb_protect_code(ram_addr_t ram_addr);
@@ -1449,14 +1435,18 @@ static void breakpoint_invalidate(CPUState *env, target_ulong pc)
static void breakpoint_invalidate(CPUState *env, target_ulong pc)
{
target_phys_addr_t addr;
- target_ulong pd;
ram_addr_t ram_addr;
- PhysPageDesc p;
+ MemoryRegionSection section;
addr = cpu_get_phys_page_debug(env, pc);
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
- ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
+ if (!(memory_region_is_ram(section.mr)
+ || (section.mr->rom_device && section.mr->readable))) {
+ return;
+ }
+ ram_addr = (memory_region_get_ram_addr(section.mr)
+ + section.offset_within_region) & TARGET_PAGE_MASK;
+ ram_addr |= (pc & ~TARGET_PAGE_MASK);
tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
}
#endif
@@ -2134,24 +2124,21 @@ static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
env->tlb_flush_mask = mask;
}
-static bool is_ram_rom(ram_addr_t pd)
+static bool is_ram_rom(MemoryRegionSection *s)
{
- pd &= ~TARGET_PAGE_MASK;
- return pd == io_mem_ram.ram_addr || pd == io_mem_rom.ram_addr;
+ return memory_region_is_ram(s->mr);
}
-static bool is_romd(ram_addr_t pd)
+static bool is_romd(MemoryRegionSection *s)
{
- MemoryRegion *mr;
+ MemoryRegion *mr = s->mr;
- pd &= ~TARGET_PAGE_MASK;
- mr = io_mem_region[pd];
return mr->rom_device && mr->readable;
}
-static bool is_ram_rom_romd(ram_addr_t pd)
+static bool is_ram_rom_romd(MemoryRegionSection *s)
{
- return is_ram_rom(pd) || is_romd(pd);
+ return is_ram_rom(s) || is_romd(s);
}
/* Add a new TLB entry. At most one entry for a given virtual address
@@ -2161,8 +2148,7 @@ void tlb_set_page(CPUState *env, target_ulong vaddr,
target_phys_addr_t paddr, int prot,
int mmu_idx, target_ulong size)
{
- PhysPageDesc p;
- unsigned long pd;
+ MemoryRegionSection section;
unsigned int index;
target_ulong address;
target_ulong code_address;
@@ -2175,8 +2161,7 @@ void tlb_set_page(CPUState *env, target_ulong vaddr,
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
}
- p = phys_page_find(paddr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(paddr >> TARGET_PAGE_BITS);
#if defined(DEBUG_TLB)
printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
" prot=%x idx=%d pd=0x%08lx\n",
@@ -2184,15 +2169,21 @@ void tlb_set_page(CPUState *env, target_ulong vaddr,
#endif
address = vaddr;
- if (!is_ram_rom_romd(pd)) {
+ if (!is_ram_rom_romd(§ion)) {
/* IO memory case (romd handled later) */
address |= TLB_MMIO;
}
- addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
- if (is_ram_rom(pd)) {
+ if (is_ram_rom_romd(§ion)) {
+ addend = (unsigned long)(memory_region_get_ram_ptr(section.mr)
+ + section.offset_within_region);
+ } else {
+ addend = 0;
+ }
+ if (is_ram_rom(§ion)) {
/* Normal RAM. */
- iotlb = pd & TARGET_PAGE_MASK;
- if ((pd & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr)
+ iotlb = (memory_region_get_ram_addr(section.mr)
+ + section.offset_within_region) & TARGET_PAGE_MASK;
+ if (!section.readonly)
iotlb |= io_mem_notdirty.ram_addr;
else
iotlb |= io_mem_rom.ram_addr;
@@ -2203,8 +2194,8 @@ void tlb_set_page(CPUState *env, target_ulong vaddr,
and avoid full address decoding in every device.
We can't use the high bits of pd for this because
IO_MEM_ROMD uses these as a ram address. */
- iotlb = (pd & ~TARGET_PAGE_MASK);
- iotlb += p.region_offset;
+ iotlb = memory_region_get_ram_addr(section.mr) & ~TARGET_PAGE_MASK;
+ iotlb += section.offset_within_region;
}
code_address = address;
@@ -2237,11 +2228,14 @@ void tlb_set_page(CPUState *env, target_ulong vaddr,
te->addr_code = -1;
}
if (prot & PAGE_WRITE) {
- if ((pd & ~TARGET_PAGE_MASK) == io_mem_rom.ram_addr || is_romd(pd)) {
+ if ((memory_region_is_ram(section.mr) && section.readonly)
+ || is_romd(§ion)) {
/* Write access calls the I/O callback. */
te->addr_write = address | TLB_MMIO;
- } else if ((pd & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr &&
- !cpu_physical_memory_is_dirty(pd)) {
+ } else if (memory_region_is_ram(section.mr)
+ && !cpu_physical_memory_is_dirty(
+ section.mr->ram_addr
+ + section.offset_within_region)) {
te->addr_write = address | TLB_NOTDIRTY;
} else {
te->addr_write = address;
@@ -3788,22 +3782,22 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
uint8_t *ptr;
uint32_t val;
target_phys_addr_t page;
- ram_addr_t pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
while (len > 0) {
page = addr & TARGET_PAGE_MASK;
l = (page + TARGET_PAGE_SIZE) - addr;
if (l > len)
l = len;
- p = phys_page_find(page >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(page >> TARGET_PAGE_BITS);
if (is_write) {
- if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
+ if (!memory_region_is_ram(section.mr)) {
target_phys_addr_t addr1;
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr1 = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ addr1 = (addr & ~TARGET_PAGE_MASK)
+ + section.offset_within_region;
/* XXX: could force cpu_single_env to NULL to avoid
potential bugs */
if (l >= 4 && ((addr1 & 3) == 0)) {
@@ -3822,9 +3816,11 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
io_mem_write(io_index, addr1, val, 1);
l = 1;
}
- } else {
+ } else if (!section.readonly) {
ram_addr_t addr1;
- addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ addr1 = (memory_region_get_ram_addr(section.mr)
+ + section.offset_within_region)
+ | (addr & ~TARGET_PAGE_MASK);
/* RAM case */
ptr = qemu_get_ram_ptr(addr1);
memcpy(ptr, buf, l);
@@ -3838,11 +3834,13 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
qemu_put_ram_ptr(ptr);
}
} else {
- if (!is_ram_rom_romd(pd)) {
+ if (!is_ram_rom_romd(§ion)) {
target_phys_addr_t addr1;
/* I/O case */
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr1 = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ addr1 = (addr & ~TARGET_PAGE_MASK)
+ + section.offset_within_region;
if (l >= 4 && ((addr1 & 3) == 0)) {
/* 32 bit read access */
val = io_mem_read(io_index, addr1, 4);
@@ -3861,7 +3859,8 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
}
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
+ ptr = qemu_get_ram_ptr(section.mr->ram_addr
+ + section.offset_within_region);
memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
qemu_put_ram_ptr(ptr);
}
@@ -3879,22 +3878,22 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr,
int l;
uint8_t *ptr;
target_phys_addr_t page;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
while (len > 0) {
page = addr & TARGET_PAGE_MASK;
l = (page + TARGET_PAGE_SIZE) - addr;
if (l > len)
l = len;
- p = phys_page_find(page >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(page >> TARGET_PAGE_BITS);
- if (!is_ram_rom_romd(pd)) {
+ if (!is_ram_rom_romd(§ion)) {
/* do nothing */
} else {
unsigned long addr1;
- addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ addr1 = (memory_region_get_ram_addr(section.mr)
+ + section.offset_within_region)
+ + (addr & ~TARGET_PAGE_MASK);
/* ROM/RAM case */
ptr = qemu_get_ram_ptr(addr1);
memcpy(ptr, buf, l);
@@ -3967,8 +3966,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
target_phys_addr_t todo = 0;
int l;
target_phys_addr_t page;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
ram_addr_t raddr = RAM_ADDR_MAX;
ram_addr_t rlen;
void *ret;
@@ -3978,10 +3976,9 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
l = (page + TARGET_PAGE_SIZE) - addr;
if (l > len)
l = len;
- p = phys_page_find(page >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(page >> TARGET_PAGE_BITS);
- if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
+ if (!(memory_region_is_ram(section.mr) && !section.readonly)) {
if (todo || bounce.buffer) {
break;
}
@@ -3996,7 +3993,9 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
return bounce.buffer;
}
if (!todo) {
- raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ raddr = memory_region_get_ram_addr(section.mr)
+ + section.offset_within_region
+ + (addr & ~TARGET_PAGE_MASK);
}
len -= l;
@@ -4055,16 +4054,15 @@ static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
int io_index;
uint8_t *ptr;
uint32_t val;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
- if (!is_ram_rom_romd(pd)) {
+ if (!is_ram_rom_romd(§ion)) {
/* I/O case */
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
val = io_mem_read(io_index, addr, 4);
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -4077,7 +4075,9 @@ static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
#endif
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
+ ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
+ & TARGET_PAGE_MASK)
+ + section.offset_within_region) +
(addr & ~TARGET_PAGE_MASK);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
@@ -4116,16 +4116,15 @@ static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
int io_index;
uint8_t *ptr;
uint64_t val;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
- if (!is_ram_rom_romd(pd)) {
+ if (!is_ram_rom_romd(§ion)) {
/* I/O case */
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
/* XXX This is broken when device endian != cpu endian.
Fix and add "endian" variable check */
@@ -4138,8 +4137,10 @@ static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
#endif
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
- (addr & ~TARGET_PAGE_MASK);
+ ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
+ & TARGET_PAGE_MASK)
+ + section.offset_within_region)
+ + (addr & ~TARGET_PAGE_MASK);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = ldq_le_p(ptr);
@@ -4185,16 +4186,15 @@ static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
int io_index;
uint8_t *ptr;
uint64_t val;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
- if (!is_ram_rom_romd(pd)) {
+ if (!is_ram_rom_romd(§ion)) {
/* I/O case */
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
val = io_mem_read(io_index, addr, 2);
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -4207,8 +4207,10 @@ static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
#endif
} else {
/* RAM case */
- ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
- (addr & ~TARGET_PAGE_MASK);
+ ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
+ & TARGET_PAGE_MASK)
+ + section.offset_within_region)
+ + (addr & ~TARGET_PAGE_MASK);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = lduw_le_p(ptr);
@@ -4246,18 +4248,23 @@ void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
{
int io_index;
uint8_t *ptr;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
- if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ if (!memory_region_is_ram(section.mr) || section.readonly) {
+ if (memory_region_is_ram(section.mr)) {
+ io_index = io_mem_rom.ram_addr;
+ } else {
+ io_index = memory_region_get_ram_addr(section.mr);
+ }
+ addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
io_mem_write(io_index, addr, val, 4);
} else {
- unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ unsigned long addr1 = (memory_region_get_ram_addr(section.mr)
+ & TARGET_PAGE_MASK)
+ + section.offset_within_region
+ + (addr & ~TARGET_PAGE_MASK);
ptr = qemu_get_ram_ptr(addr1);
stl_p(ptr, val);
@@ -4277,15 +4284,18 @@ void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
{
int io_index;
uint8_t *ptr;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
- if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ if (!memory_region_is_ram(section.mr) || section.readonly) {
+ if (memory_region_is_ram(section.mr)) {
+ io_index = io_mem_rom.ram_addr;
+ } else {
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ }
+ addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
#ifdef TARGET_WORDS_BIGENDIAN
io_mem_write(io_index, addr, val >> 32, 4);
io_mem_write(io_index, addr + 4, (uint32_t)val, 4);
@@ -4294,8 +4304,10 @@ void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
io_mem_write(io_index, addr + 4, val >> 32, 4);
#endif
} else {
- ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
- (addr & ~TARGET_PAGE_MASK);
+ ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
+ & TARGET_PAGE_MASK)
+ + section.offset_within_region)
+ + (addr & ~TARGET_PAGE_MASK);
stq_p(ptr, val);
}
}
@@ -4306,15 +4318,18 @@ static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
{
int io_index;
uint8_t *ptr;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
- if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ if (!memory_region_is_ram(section.mr) || section.readonly) {
+ if (memory_region_is_ram(section.mr)) {
+ io_index = io_mem_rom.ram_addr;
+ } else {
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ }
+ addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
val = bswap32(val);
@@ -4327,7 +4342,9 @@ static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
io_mem_write(io_index, addr, val, 4);
} else {
unsigned long addr1;
- addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
+ + section.offset_within_region
+ + (addr & ~TARGET_PAGE_MASK);
/* RAM case */
ptr = qemu_get_ram_ptr(addr1);
switch (endian) {
@@ -4379,15 +4396,18 @@ static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
{
int io_index;
uint8_t *ptr;
- unsigned long pd;
- PhysPageDesc p;
+ MemoryRegionSection section;
- p = phys_page_find(addr >> TARGET_PAGE_BITS);
- pd = p.phys_offset;
+ section = phys_page_find(addr >> TARGET_PAGE_BITS);
- if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
- io_index = pd & (IO_MEM_NB_ENTRIES - 1);
- addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
+ if (!memory_region_is_ram(section.mr) || section.readonly) {
+ if (memory_region_is_ram(section.mr)) {
+ io_index = io_mem_rom.ram_addr;
+ } else {
+ io_index = memory_region_get_ram_addr(section.mr)
+ & (IO_MEM_NB_ENTRIES - 1);
+ }
+ addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
val = bswap16(val);
@@ -4400,7 +4420,8 @@ static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
io_mem_write(io_index, addr, val, 2);
} else {
unsigned long addr1;
- addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
+ addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
+ + section.offset_within_region + (addr & ~TARGET_PAGE_MASK);
/* RAM case */
ptr = qemu_get_ram_ptr(addr1);
switch (endian) {
@@ -4617,7 +4638,7 @@ tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
}
pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
- && !is_romd(pd)) {
+ && !io_mem_region[pd]->rom_device) {
#if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
#else
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 14/20] memory: give phys_page_find() its own tree search loop
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (12 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 13/20] memory: make phys_page_find() return a MemoryRegionSection Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 15/20] memory: simplify multipage/subpage registration Avi Kivity
` (5 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
We'll change phys_page_find_alloc() soon, but phys_page_find()
doesn't need to bear the consequences.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 17 +++++++++++++----
1 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/exec.c b/exec.c
index bf34dc9..24423d5 100644
--- a/exec.c
+++ b/exec.c
@@ -459,14 +459,23 @@ static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
static MemoryRegionSection phys_page_find(target_phys_addr_t index)
{
- uint16_t *p = phys_page_find_alloc(index, 0);
- uint16_t s_index = phys_section_unassigned;
+ PhysPageEntry lp = phys_map;
+ PhysPageEntry *p;
+ int i;
MemoryRegionSection section;
target_phys_addr_t delta;
+ uint16_t s_index = phys_section_unassigned;
- if (p) {
- s_index = *p;
+ for (i = P_L2_LEVELS - 1; i >= 0; i--) {
+ if (lp.u.node == PHYS_MAP_NODE_NIL) {
+ goto not_found;
+ }
+ p = phys_map_nodes[lp.u.node];
+ lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
}
+
+ s_index = lp.u.leaf;
+not_found:
section = phys_sections[s_index];
index <<= TARGET_PAGE_BITS;
assert(section.offset_within_address_space <= index
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 15/20] memory: simplify multipage/subpage registration
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (13 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 14/20] memory: give phys_page_find() its own tree search loop Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 16/20] memory: replace phys_page_find_alloc() with phys_page_set() Avi Kivity
` (4 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Instead of considering subpage on a per-page basis, split each section
into a subpage head, multipage body, and subpage tail, and register
each separately. This simplifies the registration functions.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 120 ++++++++++++++++++++++++++++++++++-----------------------------
1 files changed, 65 insertions(+), 55 deletions(-)
diff --git a/exec.c b/exec.c
index 24423d5..e382858 100644
--- a/exec.c
+++ b/exec.c
@@ -2538,28 +2538,7 @@ static inline void tlb_set_dirty(CPUState *env,
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
uint16_t section);
-static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section,
- uint16_t orig_section);
-#define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
- need_subpage) \
- do { \
- if (addr > start_addr) \
- start_addr2 = 0; \
- else { \
- start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
- if (start_addr2 > 0) \
- need_subpage = 1; \
- } \
- \
- if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
- end_addr2 = TARGET_PAGE_SIZE - 1; \
- else { \
- end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
- if (end_addr2 < TARGET_PAGE_SIZE - 1) \
- need_subpage = 1; \
- } \
- } while (0)
-
+static subpage_t *subpage_init(target_phys_addr_t base);
static void destroy_page_desc(uint16_t section_index)
{
MemoryRegionSection *section = &phys_sections[section_index];
@@ -2622,47 +2601,85 @@ static void phys_sections_clear(void)
start_addr and region_offset are rounded down to a page boundary
before calculating this offset. This should not be a problem unless
the low bits of start_addr and region_offset differ. */
-void cpu_register_physical_memory_log(MemoryRegionSection *section,
- bool readonly)
+static void register_subpage(MemoryRegionSection *section)
+{
+ subpage_t *subpage;
+ target_phys_addr_t base = section->offset_within_address_space
+ & TARGET_PAGE_MASK;
+ MemoryRegionSection existing = phys_page_find(base >> TARGET_PAGE_BITS);
+ MemoryRegionSection subsection = {
+ .offset_within_address_space = base,
+ .size = TARGET_PAGE_SIZE,
+ };
+ uint16_t *ptr;
+ target_phys_addr_t start, end;
+
+ assert(existing.mr->subpage || existing.mr == &io_mem_unassigned);
+
+ if (!(existing.mr->subpage)) {
+ subpage = subpage_init(base);
+ subsection.mr = &subpage->iomem;
+ ptr = phys_page_find_alloc(base >> TARGET_PAGE_BITS, 1);
+ *ptr = phys_section_add(&subsection);
+ } else {
+ subpage = container_of(existing.mr, subpage_t, iomem);
+ }
+ start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
+ end = start + section->size;
+ subpage_register(subpage, start, end, phys_section_add(section));
+}
+
+
+static void register_multipage(MemoryRegionSection *section)
{
target_phys_addr_t start_addr = section->offset_within_address_space;
ram_addr_t size = section->size;
target_phys_addr_t addr, end_addr;
- ram_addr_t orig_size = size;
- subpage_t *subpage;
uint16_t section_index = phys_section_add(section);
assert(size);
- size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
end_addr = start_addr + (target_phys_addr_t)size;
addr = start_addr;
do {
uint16_t *p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
- uint16_t orig_memory = *p;
- target_phys_addr_t start_addr2, end_addr2;
- int need_subpage = 0;
- MemoryRegion *mr = phys_sections[orig_memory].mr;
-
- CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
- need_subpage);
- if (need_subpage) {
- if (!(mr->subpage)) {
- subpage = subpage_init((addr & TARGET_PAGE_MASK),
- p, orig_memory);
- } else {
- subpage = container_of(mr, subpage_t, iomem);
- }
- subpage_register(subpage, start_addr2, end_addr2,
- section_index);
- } else {
- *p = section_index;
- }
+ assert(*p == phys_section_unassigned);
+ *p = section_index;
addr += TARGET_PAGE_SIZE;
} while (addr != end_addr);
}
+void cpu_register_physical_memory_log(MemoryRegionSection *section,
+ bool readonly)
+{
+ MemoryRegionSection now = *section, remain = *section;
+
+ if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
+ || (now.size < TARGET_PAGE_SIZE)) {
+ now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
+ - now.offset_within_address_space,
+ now.size);
+ register_subpage(&now);
+ remain.size -= now.size;
+ remain.offset_within_address_space += now.size;
+ remain.offset_within_region += now.size;
+ }
+ now = remain;
+ now.size &= TARGET_PAGE_MASK;
+ if (now.size) {
+ register_multipage(&now);
+ remain.size -= now.size;
+ remain.offset_within_address_space += now.size;
+ remain.offset_within_region += now.size;
+ }
+ now = remain;
+ if (now.size) {
+ register_subpage(&now);
+ }
+}
+
+
void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
{
if (kvm_enabled())
@@ -3442,14 +3459,9 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
return 0;
}
-static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section_ind,
- uint16_t orig_section)
+static subpage_t *subpage_init(target_phys_addr_t base)
{
subpage_t *mmio;
- MemoryRegionSection section = {
- .offset_within_address_space = base,
- .size = TARGET_PAGE_SIZE,
- };
mmio = g_malloc0(sizeof(subpage_t));
@@ -3457,13 +3469,11 @@ static subpage_t *subpage_init (target_phys_addr_t base, uint16_t *section_ind,
memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
"subpage", TARGET_PAGE_SIZE);
mmio->iomem.subpage = true;
- section.mr = &mmio->iomem;
#if defined(DEBUG_SUBPAGE)
printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
mmio, base, TARGET_PAGE_SIZE, subpage_memory);
#endif
- *section_ind = phys_section_add(§ion);
- subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_section);
+ subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
return mmio;
}
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 16/20] memory: replace phys_page_find_alloc() with phys_page_set()
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (14 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 15/20] memory: simplify multipage/subpage registration Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 17/20] memory: switch phys_page_set() to a recursive implementation Avi Kivity
` (3 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
By giving the function the value we want to set, we make it
more flexible for the next patch.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 15 ++++-----------
1 files changed, 4 insertions(+), 11 deletions(-)
diff --git a/exec.c b/exec.c
index e382858..26e70c3 100644
--- a/exec.c
+++ b/exec.c
@@ -429,7 +429,7 @@ static void phys_map_nodes_reset(void)
phys_map_nodes_nb = 0;
}
-static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
+static void phys_page_set(target_phys_addr_t index, uint16_t leaf)
{
PhysPageEntry *lp, *p;
int i, j;
@@ -439,9 +439,6 @@ static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
/* Level 1..N. */
for (i = P_L2_LEVELS - 1; i >= 0; i--) {
if (lp->u.node == PHYS_MAP_NODE_NIL) {
- if (!alloc) {
- return NULL;
- }
p = phys_map_node_alloc(&lp->u.node);
if (i == 0) {
for (j = 0; j < L2_SIZE; j++) {
@@ -454,7 +451,7 @@ static uint16_t *phys_page_find_alloc(target_phys_addr_t index, int alloc)
lp = &p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
}
- return &lp->u.leaf;
+ lp->u.leaf = leaf;
}
static MemoryRegionSection phys_page_find(target_phys_addr_t index)
@@ -2611,7 +2608,6 @@ static void register_subpage(MemoryRegionSection *section)
.offset_within_address_space = base,
.size = TARGET_PAGE_SIZE,
};
- uint16_t *ptr;
target_phys_addr_t start, end;
assert(existing.mr->subpage || existing.mr == &io_mem_unassigned);
@@ -2619,8 +2615,7 @@ static void register_subpage(MemoryRegionSection *section)
if (!(existing.mr->subpage)) {
subpage = subpage_init(base);
subsection.mr = &subpage->iomem;
- ptr = phys_page_find_alloc(base >> TARGET_PAGE_BITS, 1);
- *ptr = phys_section_add(&subsection);
+ phys_page_set(base >> TARGET_PAGE_BITS, phys_section_add(&subsection));
} else {
subpage = container_of(existing.mr, subpage_t, iomem);
}
@@ -2643,9 +2638,7 @@ static void register_multipage(MemoryRegionSection *section)
addr = start_addr;
do {
- uint16_t *p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
- assert(*p == phys_section_unassigned);
- *p = section_index;
+ phys_page_set(addr >> TARGET_PAGE_BITS, section_index);
addr += TARGET_PAGE_SIZE;
} while (addr != end_addr);
}
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 17/20] memory: switch phys_page_set() to a recursive implementation
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (15 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 16/20] memory: replace phys_page_find_alloc() with phys_page_set() Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 18/20] memory: change phys_page_set() to set multiple pages Avi Kivity
` (2 subsequent siblings)
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Setting multiple pages at once requires backtracking to previous
nodes; easiest to achieve via recursion.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 67 +++++++++++++++++++++++++++++++++++++++------------------------
1 files changed, 41 insertions(+), 26 deletions(-)
diff --git a/exec.c b/exec.c
index 26e70c3..f4cd867 100644
--- a/exec.c
+++ b/exec.c
@@ -404,24 +404,30 @@ static inline PageDesc *page_find(tb_page_addr_t index)
#if !defined(CONFIG_USER_ONLY)
-static PhysPageEntry *phys_map_node_alloc(uint16_t *ptr)
+static void phys_map_node_reserve(unsigned nodes)
{
- unsigned i;
- uint16_t ret;
-
- /* Assign early to avoid the pointer being invalidated by g_renew() */
- *ptr = ret = phys_map_nodes_nb++;
- assert(ret != PHYS_MAP_NODE_NIL);
- if (ret == phys_map_nodes_nb_alloc) {
+ if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
typedef PhysPageEntry Node[L2_SIZE];
phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
+ phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
+ phys_map_nodes_nb + nodes);
phys_map_nodes = g_renew(Node, phys_map_nodes,
phys_map_nodes_nb_alloc);
}
+}
+
+static uint16_t phys_map_node_alloc(void)
+{
+ unsigned i;
+ uint16_t ret;
+
+ ret = phys_map_nodes_nb++;
+ assert(ret != PHYS_MAP_NODE_NIL);
+ assert(ret != phys_map_nodes_nb_alloc);
for (i = 0; i < L2_SIZE; ++i) {
phys_map_nodes[ret][i].u.node = PHYS_MAP_NODE_NIL;
}
- return phys_map_nodes[ret];
+ return ret;
}
static void phys_map_nodes_reset(void)
@@ -429,29 +435,38 @@ static void phys_map_nodes_reset(void)
phys_map_nodes_nb = 0;
}
-static void phys_page_set(target_phys_addr_t index, uint16_t leaf)
-{
- PhysPageEntry *lp, *p;
- int i, j;
- lp = &phys_map;
+static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t index,
+ uint16_t leaf, int level)
+{
+ PhysPageEntry *p;
+ int i;
- /* Level 1..N. */
- for (i = P_L2_LEVELS - 1; i >= 0; i--) {
- if (lp->u.node == PHYS_MAP_NODE_NIL) {
- p = phys_map_node_alloc(&lp->u.node);
- if (i == 0) {
- for (j = 0; j < L2_SIZE; j++) {
- p[j].u.leaf = phys_section_unassigned;
- }
+ if (lp->u.node == PHYS_MAP_NODE_NIL) {
+ lp->u.node = phys_map_node_alloc();
+ p = phys_map_nodes[lp->u.node];
+ if (level == 0) {
+ for (i = 0; i < L2_SIZE; i++) {
+ p[i].u.leaf = phys_section_unassigned;
}
- } else {
- p = phys_map_nodes[lp->u.node];
}
- lp = &p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
+ } else {
+ p = phys_map_nodes[lp->u.node];
}
+ lp = &p[(index >> (level * L2_BITS)) & (L2_SIZE - 1)];
+
+ if (level == 0) {
+ lp->u.leaf = leaf;
+ } else {
+ phys_page_set_level(lp, index, leaf, level - 1);
+ }
+}
+
+static void phys_page_set(target_phys_addr_t index, uint16_t leaf)
+{
+ phys_map_node_reserve(P_L2_LEVELS);
- lp->u.leaf = leaf;
+ phys_page_set_level(&phys_map, index, leaf, P_L2_LEVELS - 1);
}
static MemoryRegionSection phys_page_find(target_phys_addr_t index)
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 18/20] memory: change phys_page_set() to set multiple pages
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (16 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 17/20] memory: switch phys_page_set() to a recursive implementation Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 19/20] memory: unify PhysPageEntry::node and ::leaf Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 20/20] memory: allow phys_map tree paths to terminate early Avi Kivity
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 41 +++++++++++++++++++++++------------------
1 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/exec.c b/exec.c
index f4cd867..98c0056 100644
--- a/exec.c
+++ b/exec.c
@@ -436,8 +436,9 @@ static void phys_map_nodes_reset(void)
}
-static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t index,
- uint16_t leaf, int level)
+static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
+ target_phys_addr_t *nb, uint16_t leaf,
+ int level)
{
PhysPageEntry *p;
int i;
@@ -453,20 +454,27 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t index,
} else {
p = phys_map_nodes[lp->u.node];
}
- lp = &p[(index >> (level * L2_BITS)) & (L2_SIZE - 1)];
+ lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
- if (level == 0) {
- lp->u.leaf = leaf;
- } else {
- phys_page_set_level(lp, index, leaf, level - 1);
+ while (*nb && lp < &p[L2_SIZE]) {
+ if (level == 0) {
+ lp->u.leaf = leaf;
+ ++*index;
+ --*nb;
+ } else {
+ phys_page_set_level(lp, index, nb, leaf, level - 1);
+ }
+ ++lp;
}
}
-static void phys_page_set(target_phys_addr_t index, uint16_t leaf)
+static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
+ uint16_t leaf)
{
- phys_map_node_reserve(P_L2_LEVELS);
+ /* Wildly overreserve - it doesn't matter much. */
+ phys_map_node_reserve((nb + L2_SIZE - 1) / L2_SIZE * P_L2_LEVELS);
- phys_page_set_level(&phys_map, index, leaf, P_L2_LEVELS - 1);
+ phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
}
static MemoryRegionSection phys_page_find(target_phys_addr_t index)
@@ -2630,7 +2638,8 @@ static void register_subpage(MemoryRegionSection *section)
if (!(existing.mr->subpage)) {
subpage = subpage_init(base);
subsection.mr = &subpage->iomem;
- phys_page_set(base >> TARGET_PAGE_BITS, phys_section_add(&subsection));
+ phys_page_set(base >> TARGET_PAGE_BITS, 1,
+ phys_section_add(&subsection));
} else {
subpage = container_of(existing.mr, subpage_t, iomem);
}
@@ -2644,18 +2653,14 @@ static void register_multipage(MemoryRegionSection *section)
{
target_phys_addr_t start_addr = section->offset_within_address_space;
ram_addr_t size = section->size;
- target_phys_addr_t addr, end_addr;
+ target_phys_addr_t addr;
uint16_t section_index = phys_section_add(section);
assert(size);
- end_addr = start_addr + (target_phys_addr_t)size;
-
addr = start_addr;
- do {
- phys_page_set(addr >> TARGET_PAGE_BITS, section_index);
- addr += TARGET_PAGE_SIZE;
- } while (addr != end_addr);
+ phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
+ section_index);
}
void cpu_register_physical_memory_log(MemoryRegionSection *section,
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 19/20] memory: unify PhysPageEntry::node and ::leaf
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (17 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 18/20] memory: change phys_page_set() to set multiple pages Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 20/20] memory: allow phys_map tree paths to terminate early Avi Kivity
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
They have the same type, unify them.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 38 ++++++++++++++++++--------------------
1 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/exec.c b/exec.c
index 98c0056..a2015f7 100644
--- a/exec.c
+++ b/exec.c
@@ -193,10 +193,8 @@
static uint16_t phys_section_unassigned;
struct PhysPageEntry {
- union {
- uint16_t leaf; /* index into phys_sections */
- uint16_t node; /* index into phys_map_nodes */
- } u;
+ /* index into phys_sections (last level) or phys_map_nodes (others) */
+ uint16_t ptr;
};
/* Simple allocator for PhysPageEntry nodes */
@@ -207,7 +205,7 @@ struct PhysPageEntry {
/* This is a multi-level map on the physical address space.
The bottom level has pointers to MemoryRegionSections. */
-static PhysPageEntry phys_map = { .u.node = PHYS_MAP_NODE_NIL };
+static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL };
static void io_mem_init(void);
static void memory_map_init(void);
@@ -425,7 +423,7 @@ static uint16_t phys_map_node_alloc(void)
assert(ret != PHYS_MAP_NODE_NIL);
assert(ret != phys_map_nodes_nb_alloc);
for (i = 0; i < L2_SIZE; ++i) {
- phys_map_nodes[ret][i].u.node = PHYS_MAP_NODE_NIL;
+ phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
}
return ret;
}
@@ -443,22 +441,22 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
PhysPageEntry *p;
int i;
- if (lp->u.node == PHYS_MAP_NODE_NIL) {
- lp->u.node = phys_map_node_alloc();
- p = phys_map_nodes[lp->u.node];
+ if (lp->ptr == PHYS_MAP_NODE_NIL) {
+ lp->ptr = phys_map_node_alloc();
+ p = phys_map_nodes[lp->ptr];
if (level == 0) {
for (i = 0; i < L2_SIZE; i++) {
- p[i].u.leaf = phys_section_unassigned;
+ p[i].ptr = phys_section_unassigned;
}
}
} else {
- p = phys_map_nodes[lp->u.node];
+ p = phys_map_nodes[lp->ptr];
}
lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
while (*nb && lp < &p[L2_SIZE]) {
if (level == 0) {
- lp->u.leaf = leaf;
+ lp->ptr = leaf;
++*index;
--*nb;
} else {
@@ -487,14 +485,14 @@ static MemoryRegionSection phys_page_find(target_phys_addr_t index)
uint16_t s_index = phys_section_unassigned;
for (i = P_L2_LEVELS - 1; i >= 0; i--) {
- if (lp.u.node == PHYS_MAP_NODE_NIL) {
+ if (lp.ptr == PHYS_MAP_NODE_NIL) {
goto not_found;
}
- p = phys_map_nodes[lp.u.node];
+ p = phys_map_nodes[lp.ptr];
lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
}
- s_index = lp.u.leaf;
+ s_index = lp.ptr;
not_found:
section = phys_sections[s_index];
index <<= TARGET_PAGE_BITS;
@@ -2576,19 +2574,19 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
unsigned i;
PhysPageEntry *p;
- if (lp->u.node == PHYS_MAP_NODE_NIL) {
+ if (lp->ptr == PHYS_MAP_NODE_NIL) {
return;
}
- p = phys_map_nodes[lp->u.node];
+ p = phys_map_nodes[lp->ptr];
for (i = 0; i < L2_SIZE; ++i) {
if (level > 0) {
destroy_l2_mapping(&p[i], level - 1);
} else {
- destroy_page_desc(p[i].u.leaf);
+ destroy_page_desc(p[i].ptr);
}
}
- lp->u.node = PHYS_MAP_NODE_NIL;
+ lp->ptr = PHYS_MAP_NODE_NIL;
}
static void destroy_all_mappings(void)
@@ -3575,7 +3573,7 @@ static void core_begin(MemoryListener *listener)
{
destroy_all_mappings();
phys_sections_clear();
- phys_map.u.node = PHYS_MAP_NODE_NIL;
+ phys_map.ptr = PHYS_MAP_NODE_NIL;
phys_section_unassigned = dummy_section(&io_mem_unassigned);
}
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [Qemu-devel] [PATCH 20/20] memory: allow phys_map tree paths to terminate early
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
` (18 preceding siblings ...)
2012-02-14 9:27 ` [Qemu-devel] [PATCH 19/20] memory: unify PhysPageEntry::node and ::leaf Avi Kivity
@ 2012-02-14 9:27 ` Avi Kivity
19 siblings, 0 replies; 27+ messages in thread
From: Avi Kivity @ 2012-02-14 9:27 UTC (permalink / raw)
To: qemu-devel
When storing large contiguous ranges in phys_map, all values tend to
be the same pointers to a single MemoryRegionSection. Collapse them
by marking nodes with level > 0 as leaves. This reduces tree memory
usage dramatically.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
exec.c | 28 +++++++++++++++++-----------
1 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/exec.c b/exec.c
index a2015f7..0c93b26 100644
--- a/exec.c
+++ b/exec.c
@@ -193,19 +193,20 @@
static uint16_t phys_section_unassigned;
struct PhysPageEntry {
- /* index into phys_sections (last level) or phys_map_nodes (others) */
- uint16_t ptr;
+ uint16_t is_leaf : 1;
+ /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
+ uint16_t ptr : 15;
};
/* Simple allocator for PhysPageEntry nodes */
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
-#define PHYS_MAP_NODE_NIL ((uint16_t)~0)
+#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
/* This is a multi-level map on the physical address space.
The bottom level has pointers to MemoryRegionSections. */
-static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL };
+static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
static void io_mem_init(void);
static void memory_map_init(void);
@@ -423,6 +424,7 @@ static uint16_t phys_map_node_alloc(void)
assert(ret != PHYS_MAP_NODE_NIL);
assert(ret != phys_map_nodes_nb_alloc);
for (i = 0; i < L2_SIZE; ++i) {
+ phys_map_nodes[ret][i].is_leaf = 0;
phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
}
return ret;
@@ -440,12 +442,14 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
{
PhysPageEntry *p;
int i;
+ target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
- if (lp->ptr == PHYS_MAP_NODE_NIL) {
+ if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
lp->ptr = phys_map_node_alloc();
p = phys_map_nodes[lp->ptr];
if (level == 0) {
for (i = 0; i < L2_SIZE; i++) {
+ p[i].is_leaf = 1;
p[i].ptr = phys_section_unassigned;
}
}
@@ -455,10 +459,11 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
while (*nb && lp < &p[L2_SIZE]) {
- if (level == 0) {
+ if ((*index & (step - 1)) == 0 && *nb >= step) {
+ lp->is_leaf = true;
lp->ptr = leaf;
- ++*index;
- --*nb;
+ *index += step;
+ *nb -= step;
} else {
phys_page_set_level(lp, index, nb, leaf, level - 1);
}
@@ -470,7 +475,7 @@ static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
uint16_t leaf)
{
/* Wildly overreserve - it doesn't matter much. */
- phys_map_node_reserve((nb + L2_SIZE - 1) / L2_SIZE * P_L2_LEVELS);
+ phys_map_node_reserve(3 * P_L2_LEVELS);
phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
}
@@ -484,7 +489,7 @@ static MemoryRegionSection phys_page_find(target_phys_addr_t index)
target_phys_addr_t delta;
uint16_t s_index = phys_section_unassigned;
- for (i = P_L2_LEVELS - 1; i >= 0; i--) {
+ for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
if (lp.ptr == PHYS_MAP_NODE_NIL) {
goto not_found;
}
@@ -2580,12 +2585,13 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
p = phys_map_nodes[lp->ptr];
for (i = 0; i < L2_SIZE; ++i) {
- if (level > 0) {
+ if (!p[i].is_leaf) {
destroy_l2_mapping(&p[i], level - 1);
} else {
destroy_page_desc(p[i].ptr);
}
}
+ lp->is_leaf = 0;
lp->ptr = PHYS_MAP_NODE_NIL;
}
--
1.7.9
^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map
2012-02-14 9:27 ` [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map Avi Kivity
@ 2012-03-07 17:49 ` Peter Maydell
2012-03-07 19:32 ` Peter Maydell
0 siblings, 1 reply; 27+ messages in thread
From: Peter Maydell @ 2012-03-07 17:49 UTC (permalink / raw)
To: Avi Kivity; +Cc: qemu-devel
On 14 February 2012 09:27, Avi Kivity <avi@redhat.com> wrote:
> Instead of storing PhysPageDesc, store pointers to MemoryRegionSections.
> The various offsets (phys_offset & ~TARGET_PAGE_MASK,
> PHYS_OFFSET & TARGET_PAGE_MASK, region_offset) can all be synthesized
> from the information in a MemoryRegionSection. Adjust phys_page_find()
> to synthesize a PhysPageDesc.
git bisect blames this commit (5312bd8b3) for causing a Linux kernel
on spitz to produce a bunch of pxa2xx_i2c warnings that weren't
being emitted before:
$ ./arm-softmmu/qemu-system-arm -M spitz --kernel ~/linaro/zaurus/zImage
spitz_out_switch: Charging off.
pxa2xx_i2c_read: Bad register 0xffffff90
pxa2xx_i2c_write: Bad register 0xffffff90
pxa2xx_i2c_write: Bad register 0xffffff98
pxa2xx_i2c_read: Bad register 0xffffff90
pxa2xx_i2c_write: Bad register 0xffffff90
pxa2xx_i2c_write: Bad register 0xffffffa0
pxa2xx_i2c_write: Bad register 0xffffff90
pxa2xx_i2c_read: Bad register 0xffffff90
pxa2xx_i2c_write: Bad register 0xffffff90
spitz_lcdtg_transfer: LCD in VGA mode
spitz_bl_update: LCD Backlight now at 47/63
spitz_out_switch: Charging on.
The 'spitz' messages are just chatter, but the pxa_i2c 'Bad register'
warnings are new with this commit.
You can get the kernel image from here:
https://bugs.launchpad.net/qemu-linaro/+bug/885239/+attachment/2583099/+files/zImage
NB that the kernel will panic because it can't find init because
we haven't given it a rootfs, but that's not necessary for the
purposes of this regression.
I haven't looked any closer at what's going on here yet, I've just
run the git bisect on it...
-- PMM
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map
2012-03-07 17:49 ` Peter Maydell
@ 2012-03-07 19:32 ` Peter Maydell
2012-03-08 9:50 ` Avi Kivity
0 siblings, 1 reply; 27+ messages in thread
From: Peter Maydell @ 2012-03-07 19:32 UTC (permalink / raw)
To: Avi Kivity; +Cc: qemu-devel
On 7 March 2012 17:49, Peter Maydell <peter.maydell@linaro.org> wrote:
> git bisect blames this commit (5312bd8b3) for causing a Linux kernel
> on spitz to produce a bunch of pxa2xx_i2c warnings that weren't
> being emitted before:
What seems to happen here is that we register a memory region
(this is for the second i2c device in hw/pxa2xx.c):
memory_region_init_io(&s->iomem, &pxa2xx_i2c_ops, s,
"pxa2xx-i2x", s->region_size);
where region_size is 0x100. We then map it at 0x40f00100
(via sysbus_mmio_map). This used to result in our read and write
functions being called with offsets from the start of the page,
so in this case for the register at 0x90 into the device the
passed in addr would be 0x190. There is some hackery in pxa2xx_i2c_init
to work out what the offset is from the start of the region
when we map the device, we pass it in as a qdev 'offset'
property, and then read/write can fix things up to get the
actual register offset.
With this commit read and write functions are now passed the actual
offset from the start of the device region, ie 0x90. So the hackery
ends up doing fixing up it doesn't need to do, and generates negative
offsets which cause the diagnostic messages.
So it seems like the new behaviour is more like the right thing,
but was it an intentional change? Should we just drop the offset
hackery as a workaround for a now-fixed bug?
Are we running into the "mapping devices at non-page-offsets isn't
supported" issue here? <optimism>Is that now supported after this
patch series?</>
(I think the other devices I know of which include workarounds
for being passed relative-to-page-base addresses handle it by
masking out the high bits of the address, eg arm11mpcore.c,
so they weren't broken by this commit.)
-- PMM
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map
2012-03-07 19:32 ` Peter Maydell
@ 2012-03-08 9:50 ` Avi Kivity
2012-03-08 10:09 ` Peter Maydell
0 siblings, 1 reply; 27+ messages in thread
From: Avi Kivity @ 2012-03-08 9:50 UTC (permalink / raw)
To: Peter Maydell; +Cc: qemu-devel
On 03/07/2012 09:32 PM, Peter Maydell wrote:
> On 7 March 2012 17:49, Peter Maydell <peter.maydell@linaro.org> wrote:
> > git bisect blames this commit (5312bd8b3) for causing a Linux kernel
> > on spitz to produce a bunch of pxa2xx_i2c warnings that weren't
> > being emitted before:
>
> What seems to happen here is that we register a memory region
> (this is for the second i2c device in hw/pxa2xx.c):
>
> memory_region_init_io(&s->iomem, &pxa2xx_i2c_ops, s,
> "pxa2xx-i2x", s->region_size);
>
> where region_size is 0x100. We then map it at 0x40f00100
> (via sysbus_mmio_map). This used to result in our read and write
> functions being called with offsets from the start of the page,
> so in this case for the register at 0x90 into the device the
> passed in addr would be 0x190. There is some hackery in pxa2xx_i2c_init
> to work out what the offset is from the start of the region
> when we map the device, we pass it in as a qdev 'offset'
> property, and then read/write can fix things up to get the
> actual register offset.
>
> With this commit read and write functions are now passed the actual
> offset from the start of the device region, ie 0x90. So the hackery
> ends up doing fixing up it doesn't need to do, and generates negative
> offsets which cause the diagnostic messages.
>
> So it seems like the new behaviour is more like the right thing,
> but was it an intentional change?
I don't recall whether it was intentional or not (i.e., whether I was
aware I was changing behaviour or not), but it's certainly the desired
behaviour.
> Should we just drop the offset
> hackery as a workaround for a now-fixed bug?
Yes. I'll live the patch to you.
>
> Are we running into the "mapping devices at non-page-offsets isn't
> supported" issue here?
It wasn't supported?
> <optimism>Is that now supported after this
> patch series?</>
I'm sure that there are some rough edges but I made quite an effort to
cover corner cases. For example, a region that starts and ends and
non-aligned offsets, but spans more than a page, ought to work.
> (I think the other devices I know of which include workarounds
> for being passed relative-to-page-base addresses handle it by
> masking out the high bits of the address, eg arm11mpcore.c,
> so they weren't broken by this commit.)
I assumed this was due to the days where absolute addresses were passed,
but yes.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map
2012-03-08 9:50 ` Avi Kivity
@ 2012-03-08 10:09 ` Peter Maydell
2012-03-08 11:11 ` Avi Kivity
0 siblings, 1 reply; 27+ messages in thread
From: Peter Maydell @ 2012-03-08 10:09 UTC (permalink / raw)
To: Avi Kivity; +Cc: qemu-devel
On 8 March 2012 09:50, Avi Kivity <avi@redhat.com> wrote:
> On 03/07/2012 09:32 PM, Peter Maydell wrote:
>> Are we running into the "mapping devices at non-page-offsets isn't
>> supported" issue here?
>
> It wasn't supported?
Well, you used to run into the issue noted in the comment above
exec.c:register_subpage():
The address used when calling the IO function is
the offset from the start of the region, plus region_offset. Both
start_addr and region_offset are rounded down to a page boundary
before calculating this offset. This should not be a problem unless
the low bits of start_addr and region_offset differ.
and for non-page-aligned IO regions we were getting bitten by
this rounding, which is why the offset passed into the read/write
function was strange.
Is that exec.c comment now out of date?
(You might recall we had a conversation about this a little while
back: http://patchwork.ozlabs.org/patch/129267/ )
-- PMM
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map
2012-03-08 10:09 ` Peter Maydell
@ 2012-03-08 11:11 ` Avi Kivity
2012-03-08 11:25 ` Peter Maydell
0 siblings, 1 reply; 27+ messages in thread
From: Avi Kivity @ 2012-03-08 11:11 UTC (permalink / raw)
To: Peter Maydell; +Cc: qemu-devel
On 03/08/2012 12:09 PM, Peter Maydell wrote:
> On 8 March 2012 09:50, Avi Kivity <avi@redhat.com> wrote:
> > On 03/07/2012 09:32 PM, Peter Maydell wrote:
> >> Are we running into the "mapping devices at non-page-offsets isn't
> >> supported" issue here?
> >
> > It wasn't supported?
>
> Well, you used to run into the issue noted in the comment above
> exec.c:register_subpage():
> The address used when calling the IO function is
> the offset from the start of the region, plus region_offset. Both
> start_addr and region_offset are rounded down to a page boundary
> before calculating this offset. This should not be a problem unless
> the low bits of start_addr and region_offset differ.
>
> and for non-page-aligned IO regions we were getting bitten by
> this rounding, which is why the offset passed into the read/write
> function was strange.
>
> Is that exec.c comment now out of date?
For subpage, I think so. It's still broken for page aligned regions
that have low bits set in their offset:
memory_region_init_io(foo, ..., 4097)
memory_region_add_subregion(sysmem, 4095, foo);
the 4095-4096 region will work, but the 4096-8191 region will not. I'll
address it later when I'll regress the memory core in other ways.
> (You might recall we had a conversation about this a little while
> back: http://patchwork.ozlabs.org/patch/129267/ )
I had a full memory wipe done during my vacation, so no.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map
2012-03-08 11:11 ` Avi Kivity
@ 2012-03-08 11:25 ` Peter Maydell
0 siblings, 0 replies; 27+ messages in thread
From: Peter Maydell @ 2012-03-08 11:25 UTC (permalink / raw)
To: Avi Kivity; +Cc: qemu-devel
On 8 March 2012 11:11, Avi Kivity <avi@redhat.com> wrote:
> On 03/08/2012 12:09 PM, Peter Maydell wrote:
>> Is that exec.c comment now out of date?
>
> For subpage, I think so. It's still broken for page aligned regions
> that have low bits set in their offset:
>
> memory_region_init_io(foo, ..., 4097)
> memory_region_add_subregion(sysmem, 4095, foo);
>
> the 4095-4096 region will work, but the 4096-8191 region will not. I'll
> address it later when I'll regress the memory core in other ways.
Right, so as long as your region is less than a page in size it's
OK. That should be enough for me to do a cleanup I've been wanting
to do for ages: the v7M NVIC should really be modelled as a region
from 0xE000E100 .. 0xE000ECFF overlaying the general "system registers"
region from 0xE000E000 .. 0xE000EFFF, but at the moment there's
a nasty hack in the interrupt controller read/write routines that
says "if offset in this range then call a different function in
another device"...
-- PMM
^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2012-03-08 11:25 UTC | newest]
Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-14 9:27 [Qemu-devel] [PATCH 00/20] Reduce storage overhead of memory core Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 01/20] memory: allow MemoryListeners to observe a specific address space Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 02/20] xen: ignore I/O memory regions Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 03/20] memory: split memory listener for the two address spaces Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 04/20] memory: support stateless memory listeners Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 05/20] memory: change memory registration to rebuild the memory map on each change Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 06/20] memory: remove first level of l1_phys_map Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 07/20] memory: unify phys_map last level with intermediate levels Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 08/20] memory: store MemoryRegionSection pointers in phys_map Avi Kivity
2012-03-07 17:49 ` Peter Maydell
2012-03-07 19:32 ` Peter Maydell
2012-03-08 9:50 ` Avi Kivity
2012-03-08 10:09 ` Peter Maydell
2012-03-08 11:11 ` Avi Kivity
2012-03-08 11:25 ` Peter Maydell
2012-02-14 9:27 ` [Qemu-devel] [PATCH 09/20] memory: compress phys_map node pointers to 16 bits Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 10/20] memory: fix RAM subpages in newly initialized pages Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 11/20] memory: unify the two branches of cpu_register_physical_memory_log() Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 12/20] memory: move tlb flush to MemoryListener commit callback Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 13/20] memory: make phys_page_find() return a MemoryRegionSection Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 14/20] memory: give phys_page_find() its own tree search loop Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 15/20] memory: simplify multipage/subpage registration Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 16/20] memory: replace phys_page_find_alloc() with phys_page_set() Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 17/20] memory: switch phys_page_set() to a recursive implementation Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 18/20] memory: change phys_page_set() to set multiple pages Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 19/20] memory: unify PhysPageEntry::node and ::leaf Avi Kivity
2012-02-14 9:27 ` [Qemu-devel] [PATCH 20/20] memory: allow phys_map tree paths to terminate early Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).