qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
@ 2013-12-01 12:02 Marcel Apfelbaum
  2013-12-02 13:31 ` Michael S. Tsirkin
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Marcel Apfelbaum @ 2013-12-01 12:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, mst, jan.kiszka, pbonzini, afaerber, rth

Every address space has its own nodes and sections, but
it uses the same global arrays of nodes/section.

This limits the number of devices that can be attached
to the guest to 20-30 devices. It happens because:
 - The sections array is limited to 2^12 entries.
 - The main memory has at least 100 sections.
 - Each device address space is actually an alias to
   main memory, multiplying its number of nodes/sections.

Remove the limitation by using separate arrays of
nodes and sections for each address space.

Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
---
 exec.c | 151 ++++++++++++++++++++++++++++-------------------------------------
 1 file changed, 64 insertions(+), 87 deletions(-)

diff --git a/exec.c b/exec.c
index 95c4356..db2844c 100644
--- a/exec.c
+++ b/exec.c
@@ -90,13 +90,21 @@ struct PhysPageEntry {
 
 typedef PhysPageEntry Node[L2_SIZE];
 
+typedef struct PhysPageMap {
+    unsigned sections_nb;
+    unsigned sections_nb_alloc;
+    unsigned nodes_nb;
+    unsigned nodes_nb_alloc;
+    Node *nodes;
+    MemoryRegionSection *sections;
+} PhysPageMap;
+
 struct AddressSpaceDispatch {
     /* This is a multi-level map on the physical address space.
      * The bottom level has pointers to MemoryRegionSections.
      */
     PhysPageEntry phys_map;
-    Node *nodes;
-    MemoryRegionSection *sections;
+    PhysPageMap map;
     AddressSpace *as;
 };
 
@@ -113,18 +121,6 @@ typedef struct subpage_t {
 #define PHYS_SECTION_ROM 2
 #define PHYS_SECTION_WATCH 3
 
-typedef struct PhysPageMap {
-    unsigned sections_nb;
-    unsigned sections_nb_alloc;
-    unsigned nodes_nb;
-    unsigned nodes_nb_alloc;
-    Node *nodes;
-    MemoryRegionSection *sections;
-} PhysPageMap;
-
-static PhysPageMap *prev_map;
-static PhysPageMap next_map;
-
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
 static void io_mem_init(void);
@@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch;
 
 #if !defined(CONFIG_USER_ONLY)
 
-static void phys_map_node_reserve(unsigned nodes)
+static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
 {
-    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
-        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
-                                            16);
-        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
-                                      next_map.nodes_nb + nodes);
-        next_map.nodes = g_renew(Node, next_map.nodes,
-                                 next_map.nodes_nb_alloc);
+    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
+        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
+        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
+        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
     }
 }
 
-static uint16_t phys_map_node_alloc(void)
+static uint16_t phys_map_node_alloc(PhysPageMap *map)
 {
     unsigned i;
     uint16_t ret;
 
-    ret = next_map.nodes_nb++;
+    ret = map->nodes_nb++;
     assert(ret != PHYS_MAP_NODE_NIL);
-    assert(ret != next_map.nodes_nb_alloc);
+    assert(ret != map->nodes_nb_alloc);
     for (i = 0; i < L2_SIZE; ++i) {
-        next_map.nodes[ret][i].is_leaf = 0;
-        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+        map->nodes[ret][i].is_leaf = 0;
+        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
     }
     return ret;
 }
 
-static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
-                                hwaddr *nb, uint16_t leaf,
+static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
+                                hwaddr *index, hwaddr *nb, uint16_t leaf,
                                 int level)
 {
     PhysPageEntry *p;
@@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
     hwaddr step = (hwaddr)1 << (level * L2_BITS);
 
     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
-        lp->ptr = phys_map_node_alloc();
-        p = next_map.nodes[lp->ptr];
+        lp->ptr = phys_map_node_alloc(map);
+        p = map->nodes[lp->ptr];
         if (level == 0) {
             for (i = 0; i < L2_SIZE; i++) {
                 p[i].is_leaf = 1;
@@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
             }
         }
     } else {
-        p = next_map.nodes[lp->ptr];
+        p = map->nodes[lp->ptr];
     }
     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 
@@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
             *index += step;
             *nb -= step;
         } else {
-            phys_page_set_level(lp, index, nb, leaf, level - 1);
+            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
         }
         ++lp;
     }
@@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
                           uint16_t leaf)
 {
     /* Wildly overreserve - it doesn't matter much. */
-    phys_map_node_reserve(3 * P_L2_LEVELS);
+    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
 
-    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 }
 
 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
@@ -237,10 +230,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
     subpage_t *subpage;
 
     section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
-                             d->nodes, d->sections);
+                             d->map.nodes, d->map.sections);
     if (resolve_subpage && section->mr->subpage) {
         subpage = container_of(section->mr, subpage_t, iomem);
-        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
+        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
     }
     return section;
 }
@@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
             iotlb |= PHYS_SECTION_ROM;
         }
     } else {
-        iotlb = section - address_space_memory.dispatch->sections;
+        iotlb = section - address_space_memory.dispatch->map.sections;
         iotlb += xlat;
     }
 
@@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
     phys_mem_alloc = alloc;
 }
 
-static uint16_t phys_section_add(MemoryRegionSection *section)
+static uint16_t phys_section_add(PhysPageMap *map,
+                                 MemoryRegionSection *section)
 {
     /* The physical section number is ORed with a page-aligned
      * pointer to produce the iotlb entries.  Thus it should
      * never overflow into the page-aligned value.
      */
-    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
+    assert(map->sections_nb < TARGET_PAGE_SIZE);
 
-    if (next_map.sections_nb == next_map.sections_nb_alloc) {
-        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
-                                         16);
-        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
-                                    next_map.sections_nb_alloc);
+    if (map->sections_nb == map->sections_nb_alloc) {
+        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
+        map->sections = g_renew(MemoryRegionSection, map->sections,
+                                map->sections_nb_alloc);
     }
-    next_map.sections[next_map.sections_nb] = *section;
+    map->sections[map->sections_nb] = *section;
     memory_region_ref(section->mr);
-    return next_map.sections_nb++;
+    return map->sections_nb++;
 }
 
 static void phys_section_destroy(MemoryRegion *mr)
@@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map)
     }
     g_free(map->sections);
     g_free(map->nodes);
-    g_free(map);
 }
 
 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
@@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
     hwaddr base = section->offset_within_address_space
         & TARGET_PAGE_MASK;
     MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
-                                                   next_map.nodes, next_map.sections);
+                                                   d->map.nodes, d->map.sections);
     MemoryRegionSection subsection = {
         .offset_within_address_space = base,
         .size = int128_make64(TARGET_PAGE_SIZE),
@@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
         subpage = subpage_init(d->as, base);
         subsection.mr = &subpage->iomem;
         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
-                      phys_section_add(&subsection));
+                      phys_section_add(&d->map, &subsection));
     } else {
         subpage = container_of(existing->mr, subpage_t, iomem);
     }
     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
     end = start + int128_get64(section->size) - 1;
-    subpage_register(subpage, start, end, phys_section_add(section));
+    subpage_register(subpage, start, end,
+                     phys_section_add(&d->map, section));
 }
 
 
@@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d,
                                MemoryRegionSection *section)
 {
     hwaddr start_addr = section->offset_within_address_space;
-    uint16_t section_index = phys_section_add(section);
+    uint16_t section_index = phys_section_add(&d->map, section);
     uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                     TARGET_PAGE_BITS));
 
@@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
     return mmio;
 }
 
-static uint16_t dummy_section(MemoryRegion *mr)
+static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
 {
     MemoryRegionSection section = {
         .mr = mr,
@@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
         .size = int128_2_64(),
     };
 
-    return phys_section_add(&section);
+    return phys_section_add(map, &section);
 }
 
 MemoryRegion *iotlb_to_region(hwaddr index)
 {
-    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
+    return address_space_memory.dispatch->map.sections[
+           index & ~TARGET_PAGE_MASK].mr;
 }
 
 static void io_mem_init(void)
@@ -1636,7 +1630,17 @@ static void io_mem_init(void)
 static void mem_begin(MemoryListener *listener)
 {
     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
-    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
+    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
+    uint16_t n;
+
+    n = dummy_section(&d->map, &io_mem_unassigned);
+    assert(n == PHYS_SECTION_UNASSIGNED);
+    n = dummy_section(&d->map, &io_mem_notdirty);
+    assert(n == PHYS_SECTION_NOTDIRTY);
+    n = dummy_section(&d->map, &io_mem_rom);
+    assert(n == PHYS_SECTION_ROM);
+    n = dummy_section(&d->map, &io_mem_watch);
+    assert(n == PHYS_SECTION_WATCH);
 
     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
     d->as = as;
@@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener)
     AddressSpaceDispatch *cur = as->dispatch;
     AddressSpaceDispatch *next = as->next_dispatch;
 
-    next->nodes = next_map.nodes;
-    next->sections = next_map.sections;
-
     as->dispatch = next;
-    g_free(cur);
-}
-
-static void core_begin(MemoryListener *listener)
-{
-    uint16_t n;
 
-    prev_map = g_new(PhysPageMap, 1);
-    *prev_map = next_map;
-
-    memset(&next_map, 0, sizeof(next_map));
-    n = dummy_section(&io_mem_unassigned);
-    assert(n == PHYS_SECTION_UNASSIGNED);
-    n = dummy_section(&io_mem_notdirty);
-    assert(n == PHYS_SECTION_NOTDIRTY);
-    n = dummy_section(&io_mem_rom);
-    assert(n == PHYS_SECTION_ROM);
-    n = dummy_section(&io_mem_watch);
-    assert(n == PHYS_SECTION_WATCH);
-}
-
-/* This listener's commit run after the other AddressSpaceDispatch listeners'.
- * All AddressSpaceDispatch instances have switched to the next map.
- */
-static void core_commit(MemoryListener *listener)
-{
-    phys_sections_free(prev_map);
+    if (cur) {
+        phys_sections_free(&cur->map);
+        g_free(cur);
+    }
 }
 
 static void tcg_commit(MemoryListener *listener)
@@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener *listener)
 }
 
 static MemoryListener core_memory_listener = {
-    .begin = core_begin,
-    .commit = core_commit,
     .log_global_start = core_log_global_start,
     .log_global_stop = core_log_global_stop,
     .priority = 1,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-01 12:02 [Qemu-devel] [PATCH] exec: separate sections and nodes per address space Marcel Apfelbaum
@ 2013-12-02 13:31 ` Michael S. Tsirkin
  2013-12-09 18:02 ` Paolo Bonzini
  2013-12-10 11:49 ` Michael S. Tsirkin
  2 siblings, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2013-12-02 13:31 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: peter.maydell, jan.kiszka, qemu-devel, qemu-stable, pbonzini,
	afaerber, rth

On Sun, Dec 01, 2013 at 02:02:23PM +0200, Marcel Apfelbaum wrote:
> Every address space has its own nodes and sections, but
> it uses the same global arrays of nodes/section.
> 
> This limits the number of devices that can be attached
> to the guest to 20-30 devices. It happens because:
>  - The sections array is limited to 2^12 entries.
>  - The main memory has at least 100 sections.
>  - Each device address space is actually an alias to
>    main memory, multiplying its number of nodes/sections.
> 
> Remove the limitation by using separate arrays of
> nodes and sections for each address space.
> 
> Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>

Reviewed-by: Michael S. Tsirkin <mst@redhat.com>

I think this is actually needed for stable branches:
this failure to support large number of devices is
a regression since 1.3.0

Cc: qemu-stable@nongnu.org

> ---
>  exec.c | 151 ++++++++++++++++++++++++++++-------------------------------------
>  1 file changed, 64 insertions(+), 87 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 95c4356..db2844c 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -90,13 +90,21 @@ struct PhysPageEntry {
>  
>  typedef PhysPageEntry Node[L2_SIZE];
>  
> +typedef struct PhysPageMap {
> +    unsigned sections_nb;
> +    unsigned sections_nb_alloc;
> +    unsigned nodes_nb;
> +    unsigned nodes_nb_alloc;
> +    Node *nodes;
> +    MemoryRegionSection *sections;
> +} PhysPageMap;
> +
>  struct AddressSpaceDispatch {
>      /* This is a multi-level map on the physical address space.
>       * The bottom level has pointers to MemoryRegionSections.
>       */
>      PhysPageEntry phys_map;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> +    PhysPageMap map;
>      AddressSpace *as;
>  };
>  
> @@ -113,18 +121,6 @@ typedef struct subpage_t {
>  #define PHYS_SECTION_ROM 2
>  #define PHYS_SECTION_WATCH 3
>  
> -typedef struct PhysPageMap {
> -    unsigned sections_nb;
> -    unsigned sections_nb_alloc;
> -    unsigned nodes_nb;
> -    unsigned nodes_nb_alloc;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> -} PhysPageMap;
> -
> -static PhysPageMap *prev_map;
> -static PhysPageMap next_map;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
>  static void io_mem_init(void);
> @@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch;
>  
>  #if !defined(CONFIG_USER_ONLY)
>  
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
>  {
> -    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
> -                                            16);
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
> -                                      next_map.nodes_nb + nodes);
> -        next_map.nodes = g_renew(Node, next_map.nodes,
> -                                 next_map.nodes_nb_alloc);
> +    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
> +        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
>      }
>  }
>  
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysPageMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>  
> -    ret = next_map.nodes_nb++;
> +    ret = map->nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != next_map.nodes_nb_alloc);
> +    assert(ret != map->nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        next_map.nodes[ret][i].is_leaf = 0;
> -        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->nodes[ret][i].is_leaf = 0;
> +        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>  
> -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> -                                hwaddr *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
> +                                hwaddr *index, hwaddr *nb, uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>      hwaddr step = (hwaddr)1 << (level * L2_BITS);
>  
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = next_map.nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              }
>          }
>      } else {
> -        p = next_map.nodes[lp->ptr];
> +        p = map->nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>  
> @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
> @@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
>                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
>  
> -    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>  
>  static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
> @@ -237,10 +230,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>      subpage_t *subpage;
>  
>      section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
> -                             d->nodes, d->sections);
> +                             d->map.nodes, d->map.sections);
>      if (resolve_subpage && section->mr->subpage) {
>          subpage = container_of(section->mr, subpage_t, iomem);
> -        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
> +        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
>      }
>      return section;
>  }
> @@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
>              iotlb |= PHYS_SECTION_ROM;
>          }
>      } else {
> -        iotlb = section - address_space_memory.dispatch->sections;
> +        iotlb = section - address_space_memory.dispatch->map.sections;
>          iotlb += xlat;
>      }
>  
> @@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
>      phys_mem_alloc = alloc;
>  }
>  
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> +static uint16_t phys_section_add(PhysPageMap *map,
> +                                 MemoryRegionSection *section)
>  {
>      /* The physical section number is ORed with a page-aligned
>       * pointer to produce the iotlb entries.  Thus it should
>       * never overflow into the page-aligned value.
>       */
> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    assert(map->sections_nb < TARGET_PAGE_SIZE);
>  
> -    if (next_map.sections_nb == next_map.sections_nb_alloc) {
> -        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> -                                         16);
> -        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
> -                                    next_map.sections_nb_alloc);
> +    if (map->sections_nb == map->sections_nb_alloc) {
> +        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
> +        map->sections = g_renew(MemoryRegionSection, map->sections,
> +                                map->sections_nb_alloc);
>      }
> -    next_map.sections[next_map.sections_nb] = *section;
> +    map->sections[map->sections_nb] = *section;
>      memory_region_ref(section->mr);
> -    return next_map.sections_nb++;
> +    return map->sections_nb++;
>  }
>  
>  static void phys_section_destroy(MemoryRegion *mr)
> @@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map)
>      }
>      g_free(map->sections);
>      g_free(map->nodes);
> -    g_free(map);
>  }
>  
>  static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
> @@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>      hwaddr base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
>      MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
> -                                                   next_map.nodes, next_map.sections);
> +                                                   d->map.nodes, d->map.sections);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = int128_make64(TARGET_PAGE_SIZE),
> @@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>          subpage = subpage_init(d->as, base);
>          subsection.mr = &subpage->iomem;
>          phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +                      phys_section_add(&d->map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + int128_get64(section->size) - 1;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(subpage, start, end,
> +                     phys_section_add(&d->map, section));
>  }
>  
>  
> @@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d,
>                                 MemoryRegionSection *section)
>  {
>      hwaddr start_addr = section->offset_within_address_space;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(&d->map, section);
>      uint64_t num_pages = int128_get64(int128_rshift(section->size,
>                                                      TARGET_PAGE_BITS));
>  
> @@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
>      return mmio;
>  }
>  
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = int128_2_64(),
>      };
>  
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>  
>  MemoryRegion *iotlb_to_region(hwaddr index)
>  {
> -    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
> +    return address_space_memory.dispatch->map.sections[
> +           index & ~TARGET_PAGE_MASK].mr;
>  }
>  
>  static void io_mem_init(void)
> @@ -1636,7 +1630,17 @@ static void io_mem_init(void)
>  static void mem_begin(MemoryListener *listener)
>  {
>      AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
> -    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
> +    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
> +    uint16_t n;
> +
> +    n = dummy_section(&d->map, &io_mem_unassigned);
> +    assert(n == PHYS_SECTION_UNASSIGNED);
> +    n = dummy_section(&d->map, &io_mem_notdirty);
> +    assert(n == PHYS_SECTION_NOTDIRTY);
> +    n = dummy_section(&d->map, &io_mem_rom);
> +    assert(n == PHYS_SECTION_ROM);
> +    n = dummy_section(&d->map, &io_mem_watch);
> +    assert(n == PHYS_SECTION_WATCH);
>  
>      d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>      d->as = as;
> @@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener)
>      AddressSpaceDispatch *cur = as->dispatch;
>      AddressSpaceDispatch *next = as->next_dispatch;
>  
> -    next->nodes = next_map.nodes;
> -    next->sections = next_map.sections;
> -
>      as->dispatch = next;
> -    g_free(cur);
> -}
> -
> -static void core_begin(MemoryListener *listener)
> -{
> -    uint16_t n;
>  
> -    prev_map = g_new(PhysPageMap, 1);
> -    *prev_map = next_map;
> -
> -    memset(&next_map, 0, sizeof(next_map));
> -    n = dummy_section(&io_mem_unassigned);
> -    assert(n == PHYS_SECTION_UNASSIGNED);
> -    n = dummy_section(&io_mem_notdirty);
> -    assert(n == PHYS_SECTION_NOTDIRTY);
> -    n = dummy_section(&io_mem_rom);
> -    assert(n == PHYS_SECTION_ROM);
> -    n = dummy_section(&io_mem_watch);
> -    assert(n == PHYS_SECTION_WATCH);
> -}
> -
> -/* This listener's commit run after the other AddressSpaceDispatch listeners'.
> - * All AddressSpaceDispatch instances have switched to the next map.
> - */
> -static void core_commit(MemoryListener *listener)
> -{
> -    phys_sections_free(prev_map);
> +    if (cur) {
> +        phys_sections_free(&cur->map);
> +        g_free(cur);
> +    }
>  }
>  
>  static void tcg_commit(MemoryListener *listener)
> @@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener *listener)
>  }
>  
>  static MemoryListener core_memory_listener = {
> -    .begin = core_begin,
> -    .commit = core_commit,
>      .log_global_start = core_log_global_start,
>      .log_global_stop = core_log_global_stop,
>      .priority = 1,
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-01 12:02 [Qemu-devel] [PATCH] exec: separate sections and nodes per address space Marcel Apfelbaum
  2013-12-02 13:31 ` Michael S. Tsirkin
@ 2013-12-09 18:02 ` Paolo Bonzini
  2013-12-10 11:49 ` Michael S. Tsirkin
  2 siblings, 0 replies; 9+ messages in thread
From: Paolo Bonzini @ 2013-12-09 18:02 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: peter.maydell, mst, jan.kiszka, qemu-devel, afaerber, rth

Il 01/12/2013 13:02, Marcel Apfelbaum ha scritto:
> Every address space has its own nodes and sections, but
> it uses the same global arrays of nodes/section.
> 
> This limits the number of devices that can be attached
> to the guest to 20-30 devices. It happens because:
>  - The sections array is limited to 2^12 entries.
>  - The main memory has at least 100 sections.
>  - Each device address space is actually an alias to
>    main memory, multiplying its number of nodes/sections.
> 
> Remove the limitation by using separate arrays of
> nodes and sections for each address space.
> 
> Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> ---
>  exec.c | 151 ++++++++++++++++++++++++++++-------------------------------------
>  1 file changed, 64 insertions(+), 87 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 95c4356..db2844c 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -90,13 +90,21 @@ struct PhysPageEntry {
>  
>  typedef PhysPageEntry Node[L2_SIZE];
>  
> +typedef struct PhysPageMap {
> +    unsigned sections_nb;
> +    unsigned sections_nb_alloc;
> +    unsigned nodes_nb;
> +    unsigned nodes_nb_alloc;
> +    Node *nodes;
> +    MemoryRegionSection *sections;
> +} PhysPageMap;
> +
>  struct AddressSpaceDispatch {
>      /* This is a multi-level map on the physical address space.
>       * The bottom level has pointers to MemoryRegionSections.
>       */
>      PhysPageEntry phys_map;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> +    PhysPageMap map;
>      AddressSpace *as;
>  };
>  
> @@ -113,18 +121,6 @@ typedef struct subpage_t {
>  #define PHYS_SECTION_ROM 2
>  #define PHYS_SECTION_WATCH 3
>  
> -typedef struct PhysPageMap {
> -    unsigned sections_nb;
> -    unsigned sections_nb_alloc;
> -    unsigned nodes_nb;
> -    unsigned nodes_nb_alloc;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> -} PhysPageMap;
> -
> -static PhysPageMap *prev_map;
> -static PhysPageMap next_map;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
>  static void io_mem_init(void);
> @@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch;
>  
>  #if !defined(CONFIG_USER_ONLY)
>  
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
>  {
> -    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
> -                                            16);
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
> -                                      next_map.nodes_nb + nodes);
> -        next_map.nodes = g_renew(Node, next_map.nodes,
> -                                 next_map.nodes_nb_alloc);
> +    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
> +        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
>      }
>  }
>  
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysPageMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>  
> -    ret = next_map.nodes_nb++;
> +    ret = map->nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != next_map.nodes_nb_alloc);
> +    assert(ret != map->nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        next_map.nodes[ret][i].is_leaf = 0;
> -        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->nodes[ret][i].is_leaf = 0;
> +        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>  
> -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> -                                hwaddr *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
> +                                hwaddr *index, hwaddr *nb, uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>      hwaddr step = (hwaddr)1 << (level * L2_BITS);
>  
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = next_map.nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              }
>          }
>      } else {
> -        p = next_map.nodes[lp->ptr];
> +        p = map->nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>  
> @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
> @@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
>                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
>  
> -    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>  
>  static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
> @@ -237,10 +230,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>      subpage_t *subpage;
>  
>      section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
> -                             d->nodes, d->sections);
> +                             d->map.nodes, d->map.sections);
>      if (resolve_subpage && section->mr->subpage) {
>          subpage = container_of(section->mr, subpage_t, iomem);
> -        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
> +        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
>      }
>      return section;
>  }
> @@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
>              iotlb |= PHYS_SECTION_ROM;
>          }
>      } else {
> -        iotlb = section - address_space_memory.dispatch->sections;
> +        iotlb = section - address_space_memory.dispatch->map.sections;
>          iotlb += xlat;
>      }
>  
> @@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
>      phys_mem_alloc = alloc;
>  }
>  
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> +static uint16_t phys_section_add(PhysPageMap *map,
> +                                 MemoryRegionSection *section)
>  {
>      /* The physical section number is ORed with a page-aligned
>       * pointer to produce the iotlb entries.  Thus it should
>       * never overflow into the page-aligned value.
>       */
> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    assert(map->sections_nb < TARGET_PAGE_SIZE);
>  
> -    if (next_map.sections_nb == next_map.sections_nb_alloc) {
> -        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> -                                         16);
> -        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
> -                                    next_map.sections_nb_alloc);
> +    if (map->sections_nb == map->sections_nb_alloc) {
> +        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
> +        map->sections = g_renew(MemoryRegionSection, map->sections,
> +                                map->sections_nb_alloc);
>      }
> -    next_map.sections[next_map.sections_nb] = *section;
> +    map->sections[map->sections_nb] = *section;
>      memory_region_ref(section->mr);
> -    return next_map.sections_nb++;
> +    return map->sections_nb++;
>  }
>  
>  static void phys_section_destroy(MemoryRegion *mr)
> @@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map)
>      }
>      g_free(map->sections);
>      g_free(map->nodes);
> -    g_free(map);
>  }
>  
>  static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
> @@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>      hwaddr base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
>      MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
> -                                                   next_map.nodes, next_map.sections);
> +                                                   d->map.nodes, d->map.sections);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = int128_make64(TARGET_PAGE_SIZE),
> @@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>          subpage = subpage_init(d->as, base);
>          subsection.mr = &subpage->iomem;
>          phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +                      phys_section_add(&d->map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + int128_get64(section->size) - 1;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(subpage, start, end,
> +                     phys_section_add(&d->map, section));
>  }
>  
>  
> @@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d,
>                                 MemoryRegionSection *section)
>  {
>      hwaddr start_addr = section->offset_within_address_space;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(&d->map, section);
>      uint64_t num_pages = int128_get64(int128_rshift(section->size,
>                                                      TARGET_PAGE_BITS));
>  
> @@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
>      return mmio;
>  }
>  
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = int128_2_64(),
>      };
>  
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>  
>  MemoryRegion *iotlb_to_region(hwaddr index)
>  {
> -    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
> +    return address_space_memory.dispatch->map.sections[
> +           index & ~TARGET_PAGE_MASK].mr;
>  }
>  
>  static void io_mem_init(void)
> @@ -1636,7 +1630,17 @@ static void io_mem_init(void)
>  static void mem_begin(MemoryListener *listener)
>  {
>      AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
> -    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
> +    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
> +    uint16_t n;
> +
> +    n = dummy_section(&d->map, &io_mem_unassigned);
> +    assert(n == PHYS_SECTION_UNASSIGNED);
> +    n = dummy_section(&d->map, &io_mem_notdirty);
> +    assert(n == PHYS_SECTION_NOTDIRTY);
> +    n = dummy_section(&d->map, &io_mem_rom);
> +    assert(n == PHYS_SECTION_ROM);
> +    n = dummy_section(&d->map, &io_mem_watch);
> +    assert(n == PHYS_SECTION_WATCH);
>  
>      d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>      d->as = as;
> @@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener)
>      AddressSpaceDispatch *cur = as->dispatch;
>      AddressSpaceDispatch *next = as->next_dispatch;
>  
> -    next->nodes = next_map.nodes;
> -    next->sections = next_map.sections;
> -
>      as->dispatch = next;
> -    g_free(cur);
> -}
> -
> -static void core_begin(MemoryListener *listener)
> -{
> -    uint16_t n;
>  
> -    prev_map = g_new(PhysPageMap, 1);
> -    *prev_map = next_map;
> -
> -    memset(&next_map, 0, sizeof(next_map));
> -    n = dummy_section(&io_mem_unassigned);
> -    assert(n == PHYS_SECTION_UNASSIGNED);
> -    n = dummy_section(&io_mem_notdirty);
> -    assert(n == PHYS_SECTION_NOTDIRTY);
> -    n = dummy_section(&io_mem_rom);
> -    assert(n == PHYS_SECTION_ROM);
> -    n = dummy_section(&io_mem_watch);
> -    assert(n == PHYS_SECTION_WATCH);
> -}
> -
> -/* This listener's commit run after the other AddressSpaceDispatch listeners'.
> - * All AddressSpaceDispatch instances have switched to the next map.
> - */
> -static void core_commit(MemoryListener *listener)
> -{
> -    phys_sections_free(prev_map);
> +    if (cur) {
> +        phys_sections_free(&cur->map);
> +        g_free(cur);
> +    }
>  }
>  
>  static void tcg_commit(MemoryListener *listener)
> @@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener *listener)
>  }
>  
>  static MemoryListener core_memory_listener = {
> -    .begin = core_begin,
> -    .commit = core_commit,
>      .log_global_start = core_log_global_start,
>      .log_global_stop = core_log_global_stop,
>      .priority = 1,
> 

Simpler than before.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-01 12:02 [Qemu-devel] [PATCH] exec: separate sections and nodes per address space Marcel Apfelbaum
  2013-12-02 13:31 ` Michael S. Tsirkin
  2013-12-09 18:02 ` Paolo Bonzini
@ 2013-12-10 11:49 ` Michael S. Tsirkin
  2013-12-10 12:37   ` Marcel Apfelbaum
  2 siblings, 1 reply; 9+ messages in thread
From: Michael S. Tsirkin @ 2013-12-10 11:49 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: peter.maydell, jan.kiszka, qemu-devel, pbonzini, afaerber, rth

On Sun, Dec 01, 2013 at 02:02:23PM +0200, Marcel Apfelbaum wrote:
> Every address space has its own nodes and sections, but
> it uses the same global arrays of nodes/section.
> 
> This limits the number of devices that can be attached
> to the guest to 20-30 devices. It happens because:
>  - The sections array is limited to 2^12 entries.
>  - The main memory has at least 100 sections.
>  - Each device address space is actually an alias to
>    main memory, multiplying its number of nodes/sections.
> 
> Remove the limitation by using separate arrays of
> nodes and sections for each address space.
> 
> Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>

I applied this on the pci tree, pls verify it's applied correctly.

> ---
>  exec.c | 151 ++++++++++++++++++++++++++++-------------------------------------
>  1 file changed, 64 insertions(+), 87 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 95c4356..db2844c 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -90,13 +90,21 @@ struct PhysPageEntry {
>  
>  typedef PhysPageEntry Node[L2_SIZE];
>  
> +typedef struct PhysPageMap {
> +    unsigned sections_nb;
> +    unsigned sections_nb_alloc;
> +    unsigned nodes_nb;
> +    unsigned nodes_nb_alloc;
> +    Node *nodes;
> +    MemoryRegionSection *sections;
> +} PhysPageMap;
> +
>  struct AddressSpaceDispatch {
>      /* This is a multi-level map on the physical address space.
>       * The bottom level has pointers to MemoryRegionSections.
>       */
>      PhysPageEntry phys_map;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> +    PhysPageMap map;
>      AddressSpace *as;
>  };
>  
> @@ -113,18 +121,6 @@ typedef struct subpage_t {
>  #define PHYS_SECTION_ROM 2
>  #define PHYS_SECTION_WATCH 3
>  
> -typedef struct PhysPageMap {
> -    unsigned sections_nb;
> -    unsigned sections_nb_alloc;
> -    unsigned nodes_nb;
> -    unsigned nodes_nb_alloc;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> -} PhysPageMap;
> -
> -static PhysPageMap *prev_map;
> -static PhysPageMap next_map;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
>  static void io_mem_init(void);
> @@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch;
>  
>  #if !defined(CONFIG_USER_ONLY)
>  
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
>  {
> -    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
> -                                            16);
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
> -                                      next_map.nodes_nb + nodes);
> -        next_map.nodes = g_renew(Node, next_map.nodes,
> -                                 next_map.nodes_nb_alloc);
> +    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
> +        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
>      }
>  }
>  
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysPageMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>  
> -    ret = next_map.nodes_nb++;
> +    ret = map->nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != next_map.nodes_nb_alloc);
> +    assert(ret != map->nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        next_map.nodes[ret][i].is_leaf = 0;
> -        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->nodes[ret][i].is_leaf = 0;
> +        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>  
> -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> -                                hwaddr *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
> +                                hwaddr *index, hwaddr *nb, uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>      hwaddr step = (hwaddr)1 << (level * L2_BITS);
>  
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = next_map.nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              }
>          }
>      } else {
> -        p = next_map.nodes[lp->ptr];
> +        p = map->nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>  
> @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
> @@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
>                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
>  
> -    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>  
>  static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
> @@ -237,10 +230,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>      subpage_t *subpage;
>  
>      section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
> -                             d->nodes, d->sections);
> +                             d->map.nodes, d->map.sections);
>      if (resolve_subpage && section->mr->subpage) {
>          subpage = container_of(section->mr, subpage_t, iomem);
> -        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
> +        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
>      }
>      return section;
>  }
> @@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
>              iotlb |= PHYS_SECTION_ROM;
>          }
>      } else {
> -        iotlb = section - address_space_memory.dispatch->sections;
> +        iotlb = section - address_space_memory.dispatch->map.sections;
>          iotlb += xlat;
>      }
>  
> @@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
>      phys_mem_alloc = alloc;
>  }
>  
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> +static uint16_t phys_section_add(PhysPageMap *map,
> +                                 MemoryRegionSection *section)
>  {
>      /* The physical section number is ORed with a page-aligned
>       * pointer to produce the iotlb entries.  Thus it should
>       * never overflow into the page-aligned value.
>       */
> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    assert(map->sections_nb < TARGET_PAGE_SIZE);
>  
> -    if (next_map.sections_nb == next_map.sections_nb_alloc) {
> -        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> -                                         16);
> -        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
> -                                    next_map.sections_nb_alloc);
> +    if (map->sections_nb == map->sections_nb_alloc) {
> +        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
> +        map->sections = g_renew(MemoryRegionSection, map->sections,
> +                                map->sections_nb_alloc);
>      }
> -    next_map.sections[next_map.sections_nb] = *section;
> +    map->sections[map->sections_nb] = *section;
>      memory_region_ref(section->mr);
> -    return next_map.sections_nb++;
> +    return map->sections_nb++;
>  }
>  
>  static void phys_section_destroy(MemoryRegion *mr)
> @@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map)
>      }
>      g_free(map->sections);
>      g_free(map->nodes);
> -    g_free(map);
>  }
>  
>  static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
> @@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>      hwaddr base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
>      MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
> -                                                   next_map.nodes, next_map.sections);
> +                                                   d->map.nodes, d->map.sections);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = int128_make64(TARGET_PAGE_SIZE),
> @@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>          subpage = subpage_init(d->as, base);
>          subsection.mr = &subpage->iomem;
>          phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +                      phys_section_add(&d->map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + int128_get64(section->size) - 1;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(subpage, start, end,
> +                     phys_section_add(&d->map, section));
>  }
>  
>  
> @@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d,
>                                 MemoryRegionSection *section)
>  {
>      hwaddr start_addr = section->offset_within_address_space;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(&d->map, section);
>      uint64_t num_pages = int128_get64(int128_rshift(section->size,
>                                                      TARGET_PAGE_BITS));
>  
> @@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
>      return mmio;
>  }
>  
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = int128_2_64(),
>      };
>  
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>  
>  MemoryRegion *iotlb_to_region(hwaddr index)
>  {
> -    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
> +    return address_space_memory.dispatch->map.sections[
> +           index & ~TARGET_PAGE_MASK].mr;
>  }
>  
>  static void io_mem_init(void)
> @@ -1636,7 +1630,17 @@ static void io_mem_init(void)
>  static void mem_begin(MemoryListener *listener)
>  {
>      AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
> -    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
> +    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
> +    uint16_t n;
> +
> +    n = dummy_section(&d->map, &io_mem_unassigned);
> +    assert(n == PHYS_SECTION_UNASSIGNED);
> +    n = dummy_section(&d->map, &io_mem_notdirty);
> +    assert(n == PHYS_SECTION_NOTDIRTY);
> +    n = dummy_section(&d->map, &io_mem_rom);
> +    assert(n == PHYS_SECTION_ROM);
> +    n = dummy_section(&d->map, &io_mem_watch);
> +    assert(n == PHYS_SECTION_WATCH);
>  
>      d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>      d->as = as;
> @@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener)
>      AddressSpaceDispatch *cur = as->dispatch;
>      AddressSpaceDispatch *next = as->next_dispatch;
>  
> -    next->nodes = next_map.nodes;
> -    next->sections = next_map.sections;
> -
>      as->dispatch = next;
> -    g_free(cur);
> -}
> -
> -static void core_begin(MemoryListener *listener)
> -{
> -    uint16_t n;
>  
> -    prev_map = g_new(PhysPageMap, 1);
> -    *prev_map = next_map;
> -
> -    memset(&next_map, 0, sizeof(next_map));
> -    n = dummy_section(&io_mem_unassigned);
> -    assert(n == PHYS_SECTION_UNASSIGNED);
> -    n = dummy_section(&io_mem_notdirty);
> -    assert(n == PHYS_SECTION_NOTDIRTY);
> -    n = dummy_section(&io_mem_rom);
> -    assert(n == PHYS_SECTION_ROM);
> -    n = dummy_section(&io_mem_watch);
> -    assert(n == PHYS_SECTION_WATCH);
> -}
> -
> -/* This listener's commit run after the other AddressSpaceDispatch listeners'.
> - * All AddressSpaceDispatch instances have switched to the next map.
> - */
> -static void core_commit(MemoryListener *listener)
> -{
> -    phys_sections_free(prev_map);
> +    if (cur) {
> +        phys_sections_free(&cur->map);
> +        g_free(cur);
> +    }
>  }
>  
>  static void tcg_commit(MemoryListener *listener)
> @@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener *listener)
>  }
>  
>  static MemoryListener core_memory_listener = {
> -    .begin = core_begin,
> -    .commit = core_commit,
>      .log_global_start = core_log_global_start,
>      .log_global_stop = core_log_global_stop,
>      .priority = 1,
> -- 
> 1.8.3.1

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-10 11:49 ` Michael S. Tsirkin
@ 2013-12-10 12:37   ` Marcel Apfelbaum
  2013-12-10 12:38     ` Paolo Bonzini
  0 siblings, 1 reply; 9+ messages in thread
From: Marcel Apfelbaum @ 2013-12-10 12:37 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: peter.maydell, jan.kiszka, qemu-devel, pbonzini, afaerber, rth

On Tue, 2013-12-10 at 13:49 +0200, Michael S. Tsirkin wrote:
> On Sun, Dec 01, 2013 at 02:02:23PM +0200, Marcel Apfelbaum wrote:
> > Every address space has its own nodes and sections, but
> > it uses the same global arrays of nodes/section.
> > 
> > This limits the number of devices that can be attached
> > to the guest to 20-30 devices. It happens because:
> >  - The sections array is limited to 2^12 entries.
> >  - The main memory has at least 100 sections.
> >  - Each device address space is actually an alias to
> >    main memory, multiplying its number of nodes/sections.
> > 
> > Remove the limitation by using separate arrays of
> > nodes and sections for each address space.
> > 
> > Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>
> 
> I applied this on the pci tree, pls verify it's applied correctly.

I saw one issue that can be solved by:

diff --git a/exec.c b/exec.c
index 07992e9..00526d1 100644
--- a/exec.c
+++ b/exec.c
@@ -269,7 +269,7 @@ static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
     DECLARE_BITMAP(compacted, nodes_nb);
 
     if (d->phys_map.skip) {
-        phys_page_compact(&d->phys_map, d->nodes, compacted);
+        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
     }
 }

Beside this it looks OK, the branch does not compile and I couldn't look into it more...
Can you please also merge my other patch
"memory.c: bugfix - ref counting mismatch in memory_region_find" ?

It is already reviewed,
Thanks!
Marcel

> 
> > ---
> >  exec.c | 151 ++++++++++++++++++++++++++++-------------------------------------
> >  1 file changed, 64 insertions(+), 87 deletions(-)
> > 
> > diff --git a/exec.c b/exec.c
> > index 95c4356..db2844c 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -90,13 +90,21 @@ struct PhysPageEntry {
> >  
> >  typedef PhysPageEntry Node[L2_SIZE];
> >  
> > +typedef struct PhysPageMap {
> > +    unsigned sections_nb;
> > +    unsigned sections_nb_alloc;
> > +    unsigned nodes_nb;
> > +    unsigned nodes_nb_alloc;
> > +    Node *nodes;
> > +    MemoryRegionSection *sections;
> > +} PhysPageMap;
> > +
> >  struct AddressSpaceDispatch {
> >      /* This is a multi-level map on the physical address space.
> >       * The bottom level has pointers to MemoryRegionSections.
> >       */
> >      PhysPageEntry phys_map;
> > -    Node *nodes;
> > -    MemoryRegionSection *sections;
> > +    PhysPageMap map;
> >      AddressSpace *as;
> >  };
> >  
> > @@ -113,18 +121,6 @@ typedef struct subpage_t {
> >  #define PHYS_SECTION_ROM 2
> >  #define PHYS_SECTION_WATCH 3
> >  
> > -typedef struct PhysPageMap {
> > -    unsigned sections_nb;
> > -    unsigned sections_nb_alloc;
> > -    unsigned nodes_nb;
> > -    unsigned nodes_nb_alloc;
> > -    Node *nodes;
> > -    MemoryRegionSection *sections;
> > -} PhysPageMap;
> > -
> > -static PhysPageMap *prev_map;
> > -static PhysPageMap next_map;
> > -
> >  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
> >  
> >  static void io_mem_init(void);
> > @@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch;
> >  
> >  #if !defined(CONFIG_USER_ONLY)
> >  
> > -static void phys_map_node_reserve(unsigned nodes)
> > +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
> >  {
> > -    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
> > -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
> > -                                            16);
> > -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
> > -                                      next_map.nodes_nb + nodes);
> > -        next_map.nodes = g_renew(Node, next_map.nodes,
> > -                                 next_map.nodes_nb_alloc);
> > +    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
> > +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
> > +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
> > +        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
> >      }
> >  }
> >  
> > -static uint16_t phys_map_node_alloc(void)
> > +static uint16_t phys_map_node_alloc(PhysPageMap *map)
> >  {
> >      unsigned i;
> >      uint16_t ret;
> >  
> > -    ret = next_map.nodes_nb++;
> > +    ret = map->nodes_nb++;
> >      assert(ret != PHYS_MAP_NODE_NIL);
> > -    assert(ret != next_map.nodes_nb_alloc);
> > +    assert(ret != map->nodes_nb_alloc);
> >      for (i = 0; i < L2_SIZE; ++i) {
> > -        next_map.nodes[ret][i].is_leaf = 0;
> > -        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> > +        map->nodes[ret][i].is_leaf = 0;
> > +        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> >      }
> >      return ret;
> >  }
> >  
> > -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> > -                                hwaddr *nb, uint16_t leaf,
> > +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
> > +                                hwaddr *index, hwaddr *nb, uint16_t leaf,
> >                                  int level)
> >  {
> >      PhysPageEntry *p;
> > @@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> >      hwaddr step = (hwaddr)1 << (level * L2_BITS);
> >  
> >      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> > -        lp->ptr = phys_map_node_alloc();
> > -        p = next_map.nodes[lp->ptr];
> > +        lp->ptr = phys_map_node_alloc(map);
> > +        p = map->nodes[lp->ptr];
> >          if (level == 0) {
> >              for (i = 0; i < L2_SIZE; i++) {
> >                  p[i].is_leaf = 1;
> > @@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> >              }
> >          }
> >      } else {
> > -        p = next_map.nodes[lp->ptr];
> > +        p = map->nodes[lp->ptr];
> >      }
> >      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
> >  
> > @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> >              *index += step;
> >              *nb -= step;
> >          } else {
> > -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> > +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
> >          }
> >          ++lp;
> >      }
> > @@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
> >                            uint16_t leaf)
> >  {
> >      /* Wildly overreserve - it doesn't matter much. */
> > -    phys_map_node_reserve(3 * P_L2_LEVELS);
> > +    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
> >  
> > -    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> > +    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> >  }
> >  
> >  static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
> > @@ -237,10 +230,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
> >      subpage_t *subpage;
> >  
> >      section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
> > -                             d->nodes, d->sections);
> > +                             d->map.nodes, d->map.sections);
> >      if (resolve_subpage && section->mr->subpage) {
> >          subpage = container_of(section->mr, subpage_t, iomem);
> > -        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
> > +        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
> >      }
> >      return section;
> >  }
> > @@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
> >              iotlb |= PHYS_SECTION_ROM;
> >          }
> >      } else {
> > -        iotlb = section - address_space_memory.dispatch->sections;
> > +        iotlb = section - address_space_memory.dispatch->map.sections;
> >          iotlb += xlat;
> >      }
> >  
> > @@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
> >      phys_mem_alloc = alloc;
> >  }
> >  
> > -static uint16_t phys_section_add(MemoryRegionSection *section)
> > +static uint16_t phys_section_add(PhysPageMap *map,
> > +                                 MemoryRegionSection *section)
> >  {
> >      /* The physical section number is ORed with a page-aligned
> >       * pointer to produce the iotlb entries.  Thus it should
> >       * never overflow into the page-aligned value.
> >       */
> > -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> > +    assert(map->sections_nb < TARGET_PAGE_SIZE);
> >  
> > -    if (next_map.sections_nb == next_map.sections_nb_alloc) {
> > -        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> > -                                         16);
> > -        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
> > -                                    next_map.sections_nb_alloc);
> > +    if (map->sections_nb == map->sections_nb_alloc) {
> > +        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
> > +        map->sections = g_renew(MemoryRegionSection, map->sections,
> > +                                map->sections_nb_alloc);
> >      }
> > -    next_map.sections[next_map.sections_nb] = *section;
> > +    map->sections[map->sections_nb] = *section;
> >      memory_region_ref(section->mr);
> > -    return next_map.sections_nb++;
> > +    return map->sections_nb++;
> >  }
> >  
> >  static void phys_section_destroy(MemoryRegion *mr)
> > @@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map)
> >      }
> >      g_free(map->sections);
> >      g_free(map->nodes);
> > -    g_free(map);
> >  }
> >  
> >  static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
> > @@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
> >      hwaddr base = section->offset_within_address_space
> >          & TARGET_PAGE_MASK;
> >      MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
> > -                                                   next_map.nodes, next_map.sections);
> > +                                                   d->map.nodes, d->map.sections);
> >      MemoryRegionSection subsection = {
> >          .offset_within_address_space = base,
> >          .size = int128_make64(TARGET_PAGE_SIZE),
> > @@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
> >          subpage = subpage_init(d->as, base);
> >          subsection.mr = &subpage->iomem;
> >          phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
> > -                      phys_section_add(&subsection));
> > +                      phys_section_add(&d->map, &subsection));
> >      } else {
> >          subpage = container_of(existing->mr, subpage_t, iomem);
> >      }
> >      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
> >      end = start + int128_get64(section->size) - 1;
> > -    subpage_register(subpage, start, end, phys_section_add(section));
> > +    subpage_register(subpage, start, end,
> > +                     phys_section_add(&d->map, section));
> >  }
> >  
> >  
> > @@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d,
> >                                 MemoryRegionSection *section)
> >  {
> >      hwaddr start_addr = section->offset_within_address_space;
> > -    uint16_t section_index = phys_section_add(section);
> > +    uint16_t section_index = phys_section_add(&d->map, section);
> >      uint64_t num_pages = int128_get64(int128_rshift(section->size,
> >                                                      TARGET_PAGE_BITS));
> >  
> > @@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
> >      return mmio;
> >  }
> >  
> > -static uint16_t dummy_section(MemoryRegion *mr)
> > +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
> >  {
> >      MemoryRegionSection section = {
> >          .mr = mr,
> > @@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
> >          .size = int128_2_64(),
> >      };
> >  
> > -    return phys_section_add(&section);
> > +    return phys_section_add(map, &section);
> >  }
> >  
> >  MemoryRegion *iotlb_to_region(hwaddr index)
> >  {
> > -    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
> > +    return address_space_memory.dispatch->map.sections[
> > +           index & ~TARGET_PAGE_MASK].mr;
> >  }
> >  
> >  static void io_mem_init(void)
> > @@ -1636,7 +1630,17 @@ static void io_mem_init(void)
> >  static void mem_begin(MemoryListener *listener)
> >  {
> >      AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
> > -    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
> > +    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
> > +    uint16_t n;
> > +
> > +    n = dummy_section(&d->map, &io_mem_unassigned);
> > +    assert(n == PHYS_SECTION_UNASSIGNED);
> > +    n = dummy_section(&d->map, &io_mem_notdirty);
> > +    assert(n == PHYS_SECTION_NOTDIRTY);
> > +    n = dummy_section(&d->map, &io_mem_rom);
> > +    assert(n == PHYS_SECTION_ROM);
> > +    n = dummy_section(&d->map, &io_mem_watch);
> > +    assert(n == PHYS_SECTION_WATCH);
> >  
> >      d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> >      d->as = as;
> > @@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener)
> >      AddressSpaceDispatch *cur = as->dispatch;
> >      AddressSpaceDispatch *next = as->next_dispatch;
> >  
> > -    next->nodes = next_map.nodes;
> > -    next->sections = next_map.sections;
> > -
> >      as->dispatch = next;
> > -    g_free(cur);
> > -}
> > -
> > -static void core_begin(MemoryListener *listener)
> > -{
> > -    uint16_t n;
> >  
> > -    prev_map = g_new(PhysPageMap, 1);
> > -    *prev_map = next_map;
> > -
> > -    memset(&next_map, 0, sizeof(next_map));
> > -    n = dummy_section(&io_mem_unassigned);
> > -    assert(n == PHYS_SECTION_UNASSIGNED);
> > -    n = dummy_section(&io_mem_notdirty);
> > -    assert(n == PHYS_SECTION_NOTDIRTY);
> > -    n = dummy_section(&io_mem_rom);
> > -    assert(n == PHYS_SECTION_ROM);
> > -    n = dummy_section(&io_mem_watch);
> > -    assert(n == PHYS_SECTION_WATCH);
> > -}
> > -
> > -/* This listener's commit run after the other AddressSpaceDispatch listeners'.
> > - * All AddressSpaceDispatch instances have switched to the next map.
> > - */
> > -static void core_commit(MemoryListener *listener)
> > -{
> > -    phys_sections_free(prev_map);
> > +    if (cur) {
> > +        phys_sections_free(&cur->map);
> > +        g_free(cur);
> > +    }
> >  }
> >  
> >  static void tcg_commit(MemoryListener *listener)
> > @@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener *listener)
> >  }
> >  
> >  static MemoryListener core_memory_listener = {
> > -    .begin = core_begin,
> > -    .commit = core_commit,
> >      .log_global_start = core_log_global_start,
> >      .log_global_stop = core_log_global_stop,
> >      .priority = 1,
> > -- 
> > 1.8.3.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-10 12:37   ` Marcel Apfelbaum
@ 2013-12-10 12:38     ` Paolo Bonzini
  2013-12-10 12:50       ` Marcel Apfelbaum
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Bonzini @ 2013-12-10 12:38 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: peter.maydell, Michael S. Tsirkin, jan.kiszka, qemu-devel,
	afaerber, rth

Il 10/12/2013 13:37, Marcel Apfelbaum ha scritto:
> 
> Beside this it looks OK, the branch does not compile and I couldn't look into it more...
> Can you please also merge my other patch
> "memory.c: bugfix - ref counting mismatch in memory_region_find" ?

Regarding the compilation issue, I suggest rebasing on top of Stefan's
pull request (commit ac9524d, qemu-iotests: filter QEMU monitor \r\n,
2013-11-14).

Paolo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-10 12:38     ` Paolo Bonzini
@ 2013-12-10 12:50       ` Marcel Apfelbaum
  2013-12-10 16:48         ` Michael S. Tsirkin
  0 siblings, 1 reply; 9+ messages in thread
From: Marcel Apfelbaum @ 2013-12-10 12:50 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: peter.maydell, Michael S. Tsirkin, jan.kiszka, qemu-devel,
	afaerber, rth

On Tue, 2013-12-10 at 13:38 +0100, Paolo Bonzini wrote:
> Il 10/12/2013 13:37, Marcel Apfelbaum ha scritto:
> > 
> > Beside this it looks OK, the branch does not compile and I couldn't look into it more...
> > Can you please also merge my other patch
> > "memory.c: bugfix - ref counting mismatch in memory_region_find" ?
> 
> Regarding the compilation issue, I suggest rebasing on top of Stefan's
> pull request (commit ac9524d, qemu-iotests: filter QEMU monitor \r\n,
> 2013-11-14).
Sure, thanks, but Michael specifically asked for his pci branch,
probably because it conflicts with his patch:
"exec: memory radix tree page level compression"
which is long (~50 patches) after "qemu-iotests: filter QEMU monitor \r\n"

Rebasing on top of the later resulted in 0 conflicts/patch unchanged...

Thanks,
Marcel

> 
> Paolo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-10 12:50       ` Marcel Apfelbaum
@ 2013-12-10 16:48         ` Michael S. Tsirkin
  2013-12-11  9:19           ` Marcel Apfelbaum
  0 siblings, 1 reply; 9+ messages in thread
From: Michael S. Tsirkin @ 2013-12-10 16:48 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: peter.maydell, jan.kiszka, qemu-devel, Paolo Bonzini, afaerber,
	rth

On Tue, Dec 10, 2013 at 02:50:32PM +0200, Marcel Apfelbaum wrote:
> On Tue, 2013-12-10 at 13:38 +0100, Paolo Bonzini wrote:
> > Il 10/12/2013 13:37, Marcel Apfelbaum ha scritto:
> > > 
> > > Beside this it looks OK, the branch does not compile and I couldn't look into it more...
> > > Can you please also merge my other patch
> > > "memory.c: bugfix - ref counting mismatch in memory_region_find" ?
> > 
> > Regarding the compilation issue, I suggest rebasing on top of Stefan's
> > pull request (commit ac9524d, qemu-iotests: filter QEMU monitor \r\n,
> > 2013-11-14).
> Sure, thanks, but Michael specifically asked for his pci branch,
> probably because it conflicts with his patch:
> "exec: memory radix tree page level compression"
> which is long (~50 patches) after "qemu-iotests: filter QEMU monitor \r\n"

Exactly.
Pls take a look, I think I fixed the issues with the rebase.

> Rebasing on top of the later resulted in 0 conflicts/patch unchanged...
> 
> Thanks,
> Marcel
> 
> > 
> > Paolo
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
  2013-12-10 16:48         ` Michael S. Tsirkin
@ 2013-12-11  9:19           ` Marcel Apfelbaum
  0 siblings, 0 replies; 9+ messages in thread
From: Marcel Apfelbaum @ 2013-12-11  9:19 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: peter.maydell, jan.kiszka, qemu-devel, Paolo Bonzini, afaerber,
	rth

On Tue, 2013-12-10 at 18:48 +0200, Michael S. Tsirkin wrote:
> On Tue, Dec 10, 2013 at 02:50:32PM +0200, Marcel Apfelbaum wrote:
> > On Tue, 2013-12-10 at 13:38 +0100, Paolo Bonzini wrote:
> > > Il 10/12/2013 13:37, Marcel Apfelbaum ha scritto:
> > > > 
> > > > Beside this it looks OK, the branch does not compile and I couldn't look into it more...
> > > > Can you please also merge my other patch
> > > > "memory.c: bugfix - ref counting mismatch in memory_region_find" ?
> > > 
> > > Regarding the compilation issue, I suggest rebasing on top of Stefan's
> > > pull request (commit ac9524d, qemu-iotests: filter QEMU monitor \r\n,
> > > 2013-11-14).
> > Sure, thanks, but Michael specifically asked for his pci branch,
> > probably because it conflicts with his patch:
> > "exec: memory radix tree page level compression"
> > which is long (~50 patches) after "qemu-iotests: filter QEMU monitor \r\n"
> 
> Exactly.
> Pls take a look, I think I fixed the issues with the rebase.
I checked the patch and the rebase is OK, thanks!
Marcel

> 
> > Rebasing on top of the later resulted in 0 conflicts/patch unchanged...
> > 
> > Thanks,
> > Marcel
> > 
> > > 
> > > Paolo
> > 
> > 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-12-11  9:19 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-12-01 12:02 [Qemu-devel] [PATCH] exec: separate sections and nodes per address space Marcel Apfelbaum
2013-12-02 13:31 ` Michael S. Tsirkin
2013-12-09 18:02 ` Paolo Bonzini
2013-12-10 11:49 ` Michael S. Tsirkin
2013-12-10 12:37   ` Marcel Apfelbaum
2013-12-10 12:38     ` Paolo Bonzini
2013-12-10 12:50       ` Marcel Apfelbaum
2013-12-10 16:48         ` Michael S. Tsirkin
2013-12-11  9:19           ` Marcel Apfelbaum

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).