From: Michael Roth <mdroth@linux.vnet.ibm.com>
To: qemu-devel@nongnu.org
Cc: lersek@redhat.com, qemu-stable@nongnu.org, Petar.Jovanovic@imgtec.com
Subject: [Qemu-devel] [PATCH 25/51] exec: separate sections and nodes per address space
Date: Fri, 21 Feb 2014 02:17:01 -0600 [thread overview]
Message-ID: <1392970647-21528-26-git-send-email-mdroth@linux.vnet.ibm.com> (raw)
In-Reply-To: <1392970647-21528-1-git-send-email-mdroth@linux.vnet.ibm.com>
From: Marcel Apfelbaum <marcel.a@redhat.com>
Every address space has its own nodes and sections, but
it uses the same global arrays of nodes/section.
This limits the number of devices that can be attached
to the guest to 20-30 devices. It happens because:
- The sections array is limited to 2^12 entries.
- The main memory has at least 100 sections.
- Each device address space is actually an alias to
main memory, multiplying its number of nodes/sections.
Remove the limitation by using separate arrays of
nodes and sections for each address space.
Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 53cb28cbfea038f8ad50132dc8a684e638c7d48b)
Conflicts:
exec.c
*removed dependency on b35ba30
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
exec.c | 151 +++++++++++++++++++++++++++-------------------------------------
1 file changed, 64 insertions(+), 87 deletions(-)
diff --git a/exec.c b/exec.c
index 283b196..df94429 100644
--- a/exec.c
+++ b/exec.c
@@ -99,13 +99,21 @@ struct PhysPageEntry {
typedef PhysPageEntry Node[P_L2_SIZE];
+typedef struct PhysPageMap {
+ unsigned sections_nb;
+ unsigned sections_nb_alloc;
+ unsigned nodes_nb;
+ unsigned nodes_nb_alloc;
+ Node *nodes;
+ MemoryRegionSection *sections;
+} PhysPageMap;
+
struct AddressSpaceDispatch {
/* This is a multi-level map on the physical address space.
* The bottom level has pointers to MemoryRegionSections.
*/
PhysPageEntry phys_map;
- Node *nodes;
- MemoryRegionSection *sections;
+ PhysPageMap map;
AddressSpace *as;
};
@@ -122,18 +130,6 @@ typedef struct subpage_t {
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
-typedef struct PhysPageMap {
- unsigned sections_nb;
- unsigned sections_nb_alloc;
- unsigned nodes_nb;
- unsigned nodes_nb_alloc;
- Node *nodes;
- MemoryRegionSection *sections;
-} PhysPageMap;
-
-static PhysPageMap *prev_map;
-static PhysPageMap next_map;
-
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
static void io_mem_init(void);
@@ -144,35 +140,32 @@ static MemoryRegion io_mem_watch;
#if !defined(CONFIG_USER_ONLY)
-static void phys_map_node_reserve(unsigned nodes)
+static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
{
- if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
- next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
- 16);
- next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
- next_map.nodes_nb + nodes);
- next_map.nodes = g_renew(Node, next_map.nodes,
- next_map.nodes_nb_alloc);
+ if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
+ map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
+ map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
}
}
-static uint16_t phys_map_node_alloc(void)
+static uint16_t phys_map_node_alloc(PhysPageMap *map)
{
unsigned i;
uint16_t ret;
- ret = next_map.nodes_nb++;
+ ret = map->nodes_nb++;
assert(ret != PHYS_MAP_NODE_NIL);
- assert(ret != next_map.nodes_nb_alloc);
+ assert(ret != map->nodes_nb_alloc);
for (i = 0; i < P_L2_SIZE; ++i) {
- next_map.nodes[ret][i].skip = 1;
- next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+ map->nodes[ret][i].skip = 1;
+ map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
}
return ret;
}
-static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
- hwaddr *nb, uint16_t leaf,
+static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
+ hwaddr *index, hwaddr *nb, uint16_t leaf,
int level)
{
PhysPageEntry *p;
@@ -180,8 +173,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
- lp->ptr = phys_map_node_alloc();
- p = next_map.nodes[lp->ptr];
+ lp->ptr = phys_map_node_alloc(map);
+ p = map->nodes[lp->ptr];
if (level == 0) {
for (i = 0; i < P_L2_SIZE; i++) {
p[i].skip = 0;
@@ -189,7 +182,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
}
}
} else {
- p = next_map.nodes[lp->ptr];
+ p = map->nodes[lp->ptr];
}
lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
@@ -200,7 +193,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
*index += step;
*nb -= step;
} else {
- phys_page_set_level(lp, index, nb, leaf, level - 1);
+ phys_page_set_level(map, lp, index, nb, leaf, level - 1);
}
++lp;
}
@@ -211,9 +204,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
uint16_t leaf)
{
/* Wildly overreserve - it doesn't matter much. */
- phys_map_node_reserve(3 * P_L2_LEVELS);
+ phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
- phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+ phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
}
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
@@ -246,10 +239,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
MemoryRegionSection *section;
subpage_t *subpage;
- section = phys_page_find(d->phys_map, addr, d->nodes, d->sections);
+ section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
if (resolve_subpage && section->mr->subpage) {
subpage = container_of(section->mr, subpage_t, iomem);
- section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
+ section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
}
return section;
}
@@ -717,7 +710,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
iotlb |= PHYS_SECTION_ROM;
}
} else {
- iotlb = section - address_space_memory.dispatch->sections;
+ iotlb = section - address_space_memory.dispatch->map.sections;
iotlb += xlat;
}
@@ -756,23 +749,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
phys_mem_alloc = alloc;
}
-static uint16_t phys_section_add(MemoryRegionSection *section)
+static uint16_t phys_section_add(PhysPageMap *map,
+ MemoryRegionSection *section)
{
/* The physical section number is ORed with a page-aligned
* pointer to produce the iotlb entries. Thus it should
* never overflow into the page-aligned value.
*/
- assert(next_map.sections_nb < TARGET_PAGE_SIZE);
+ assert(map->sections_nb < TARGET_PAGE_SIZE);
- if (next_map.sections_nb == next_map.sections_nb_alloc) {
- next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
- 16);
- next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
- next_map.sections_nb_alloc);
+ if (map->sections_nb == map->sections_nb_alloc) {
+ map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
+ map->sections = g_renew(MemoryRegionSection, map->sections,
+ map->sections_nb_alloc);
}
- next_map.sections[next_map.sections_nb] = *section;
+ map->sections[map->sections_nb] = *section;
memory_region_ref(section->mr);
- return next_map.sections_nb++;
+ return map->sections_nb++;
}
static void phys_section_destroy(MemoryRegion *mr)
@@ -794,7 +787,6 @@ static void phys_sections_free(PhysPageMap *map)
}
g_free(map->sections);
g_free(map->nodes);
- g_free(map);
}
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
@@ -803,7 +795,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
hwaddr base = section->offset_within_address_space
& TARGET_PAGE_MASK;
MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
- next_map.nodes, next_map.sections);
+ d->map.nodes, d->map.sections);
MemoryRegionSection subsection = {
.offset_within_address_space = base,
.size = int128_make64(TARGET_PAGE_SIZE),
@@ -816,13 +808,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
subpage = subpage_init(d->as, base);
subsection.mr = &subpage->iomem;
phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
- phys_section_add(&subsection));
+ phys_section_add(&d->map, &subsection));
} else {
subpage = container_of(existing->mr, subpage_t, iomem);
}
start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
end = start + int128_get64(section->size) - 1;
- subpage_register(subpage, start, end, phys_section_add(section));
+ subpage_register(subpage, start, end,
+ phys_section_add(&d->map, section));
}
@@ -830,7 +823,7 @@ static void register_multipage(AddressSpaceDispatch *d,
MemoryRegionSection *section)
{
hwaddr start_addr = section->offset_within_address_space;
- uint16_t section_index = phys_section_add(section);
+ uint16_t section_index = phys_section_add(&d->map, section);
uint64_t num_pages = int128_get64(int128_rshift(section->size,
TARGET_PAGE_BITS));
@@ -1614,7 +1607,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
return mmio;
}
-static uint16_t dummy_section(MemoryRegion *mr)
+static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
{
MemoryRegionSection section = {
.mr = mr,
@@ -1623,12 +1616,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
.size = int128_2_64(),
};
- return phys_section_add(§ion);
+ return phys_section_add(map, §ion);
}
MemoryRegion *iotlb_to_region(hwaddr index)
{
- return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
+ return address_space_memory.dispatch->map.sections[
+ index & ~TARGET_PAGE_MASK].mr;
}
static void io_mem_init(void)
@@ -1645,7 +1639,17 @@ static void io_mem_init(void)
static void mem_begin(MemoryListener *listener)
{
AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
- AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
+ AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
+ uint16_t n;
+
+ n = dummy_section(&d->map, &io_mem_unassigned);
+ assert(n == PHYS_SECTION_UNASSIGNED);
+ n = dummy_section(&d->map, &io_mem_notdirty);
+ assert(n == PHYS_SECTION_NOTDIRTY);
+ n = dummy_section(&d->map, &io_mem_rom);
+ assert(n == PHYS_SECTION_ROM);
+ n = dummy_section(&d->map, &io_mem_watch);
+ assert(n == PHYS_SECTION_WATCH);
d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
d->as = as;
@@ -1658,37 +1662,12 @@ static void mem_commit(MemoryListener *listener)
AddressSpaceDispatch *cur = as->dispatch;
AddressSpaceDispatch *next = as->next_dispatch;
- next->nodes = next_map.nodes;
- next->sections = next_map.sections;
-
as->dispatch = next;
- g_free(cur);
-}
-
-static void core_begin(MemoryListener *listener)
-{
- uint16_t n;
- prev_map = g_new(PhysPageMap, 1);
- *prev_map = next_map;
-
- memset(&next_map, 0, sizeof(next_map));
- n = dummy_section(&io_mem_unassigned);
- assert(n == PHYS_SECTION_UNASSIGNED);
- n = dummy_section(&io_mem_notdirty);
- assert(n == PHYS_SECTION_NOTDIRTY);
- n = dummy_section(&io_mem_rom);
- assert(n == PHYS_SECTION_ROM);
- n = dummy_section(&io_mem_watch);
- assert(n == PHYS_SECTION_WATCH);
-}
-
-/* This listener's commit run after the other AddressSpaceDispatch listeners'.
- * All AddressSpaceDispatch instances have switched to the next map.
- */
-static void core_commit(MemoryListener *listener)
-{
- phys_sections_free(prev_map);
+ if (cur) {
+ phys_sections_free(&cur->map);
+ g_free(cur);
+ }
}
static void tcg_commit(MemoryListener *listener)
@@ -1716,8 +1695,6 @@ static void core_log_global_stop(MemoryListener *listener)
}
static MemoryListener core_memory_listener = {
- .begin = core_begin,
- .commit = core_commit,
.log_global_start = core_log_global_start,
.log_global_stop = core_log_global_stop,
.priority = 1,
--
1.7.9.5
next prev parent reply other threads:[~2014-02-21 8:18 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-02-21 8:16 [Qemu-devel] Patch Round-up for stable 1.7.1, freeze on 2013-02-27 Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 01/51] virtio-ccw: move virtio_ccw_stop_ioeventfd to virtio_ccw_busdev_unplug Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 02/51] virtio-bus: remove vdev field Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 03/51] virtio-ccw: " Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 04/51] virtio-pci: " Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 05/51] virtio-bus: cleanup plug/unplug interface Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 06/51] virtio-blk: switch exit callback to VirtioDeviceClass Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 07/51] virtio-serial: " Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 08/51] virtio-net: " Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 09/51] virtio-scsi: " Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 10/51] virtio-balloon: " Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 11/51] virtio-rng: " Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 12/51] virtio-pci: add device_unplugged callback Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 13/51] scsi-bus: fix transfer length and direction for VERIFY command Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 14/51] scsi-disk: fix VERIFY emulation Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 15/51] intel-hda: fix position buffer Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 16/51] memory.c: bugfix - ref counting mismatch in memory_region_find Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 17/51] qom: Split out object and class caches Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 18/51] migration: Fix rate limit Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 19/51] vl: add missing transition debug->finish_migrate Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 20/51] x86: only allow real mode to access 32bit without LMA Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 21/51] qdev-monitor: Avoid device_add crashing on non-device driver name Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 22/51] split definitions for exec.c and translate-all.c radix trees Michael Roth
2014-02-21 8:16 ` [Qemu-devel] [PATCH 23/51] exec: replace leaf with skip Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 24/51] exec: pass hw address to phys_page_find Michael Roth
2014-02-21 8:17 ` Michael Roth [this message]
2014-02-21 8:17 ` [Qemu-devel] [PATCH 26/51] pc: map PCI address space as catchall region for not mapped addresses Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 27/51] piix: fix 32bit pci hole Michael Roth
2014-02-21 14:15 ` Laszlo Ersek
2014-02-21 8:17 ` [Qemu-devel] [PATCH 28/51] target-mips: fix 64-bit FPU config for user-mode emulation Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 29/51] linux-user: pass correct parameter to do_shmctl() Michael Roth
2014-02-21 9:58 ` Laurent Vivier
2014-02-21 20:14 ` Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 30/51] linux-user: create target_structs header to place ipc_perm and shmid_ds Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 31/51] Fix QEMU build on OpenBSD on x86 archs Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 32/51] tcg/optimize: fix known-zero bits for right shift ops Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 33/51] hpet: fix build with CONFIG_HPET off Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 34/51] block/iscsi: use a bh to schedule co reentrance Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 35/51] qemu_opts_parse(): always check return value Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 36/51] s390x/kvm: Fix diagnose handling Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 37/51] seccomp: exit if seccomp_init() fails Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 38/51] mainstone: Fix duplicate array values for key 'space' Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 39/51] migration: qmp_migrate(): keep working after syntax error Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 40/51] vfio-pci: Release all MSI-X vectors when disabled Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 41/51] block/curl: Implement the libcurl timer callback interface Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 42/51] scsi: Support TEST UNIT READY in the dummy LUN0 Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 43/51] scsi: Assign cancel_io vector for scsi_disk_emulate_ops Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 44/51] virtio-scsi: Cleanup of I/Os that never started Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 45/51] virtio-scsi: Prevent assertion on missed events Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 46/51] KVM: Retry KVM_CREATE_VM on EINTR Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 47/51] i386: Add missing include file for QEMU_PACKED Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 48/51] linux-user: Fix trampoline code for CRIS Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 50/51] memory: fix limiting of translation at a page boundary Michael Roth
2014-02-21 8:17 ` [Qemu-devel] [PATCH 51/51] tcg-arm: The shift count of op_rotl_i32 is in args[2] not args[1] Michael Roth
2014-02-21 10:23 ` [Qemu-devel] Patch Round-up for stable 1.7.1, freeze on 2013-02-27 Paolo Bonzini
2014-03-04 17:04 ` Laszlo Ersek
2014-03-04 17:46 ` Petar Jovanovic
2014-03-04 17:54 ` Michael Roth
2014-03-12 10:53 ` [Qemu-devel] [Qemu-stable] " Michael Tokarev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1392970647-21528-26-git-send-email-mdroth@linux.vnet.ibm.com \
--to=mdroth@linux.vnet.ibm.com \
--cc=Petar.Jovanovic@imgtec.com \
--cc=lersek@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-stable@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).