All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Marcel Apfelbaum <marcel.a@redhat.com>
Cc: peter.maydell@linaro.org, jan.kiszka@siemens.com,
	qemu-devel@nongnu.org, pbonzini@redhat.com, afaerber@suse.de,
	rth@twiddle.net
Subject: Re: [Qemu-devel] [PATCH] exec: separate sections and nodes per address space
Date: Tue, 10 Dec 2013 13:49:24 +0200	[thread overview]
Message-ID: <20131210114924.GA25293@redhat.com> (raw)
In-Reply-To: <1385899343-24169-1-git-send-email-marcel.a@redhat.com>

On Sun, Dec 01, 2013 at 02:02:23PM +0200, Marcel Apfelbaum wrote:
> Every address space has its own nodes and sections, but
> it uses the same global arrays of nodes/section.
> 
> This limits the number of devices that can be attached
> to the guest to 20-30 devices. It happens because:
>  - The sections array is limited to 2^12 entries.
>  - The main memory has at least 100 sections.
>  - Each device address space is actually an alias to
>    main memory, multiplying its number of nodes/sections.
> 
> Remove the limitation by using separate arrays of
> nodes and sections for each address space.
> 
> Signed-off-by: Marcel Apfelbaum <marcel.a@redhat.com>

I applied this on the pci tree, pls verify it's applied correctly.

> ---
>  exec.c | 151 ++++++++++++++++++++++++++++-------------------------------------
>  1 file changed, 64 insertions(+), 87 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 95c4356..db2844c 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -90,13 +90,21 @@ struct PhysPageEntry {
>  
>  typedef PhysPageEntry Node[L2_SIZE];
>  
> +typedef struct PhysPageMap {
> +    unsigned sections_nb;
> +    unsigned sections_nb_alloc;
> +    unsigned nodes_nb;
> +    unsigned nodes_nb_alloc;
> +    Node *nodes;
> +    MemoryRegionSection *sections;
> +} PhysPageMap;
> +
>  struct AddressSpaceDispatch {
>      /* This is a multi-level map on the physical address space.
>       * The bottom level has pointers to MemoryRegionSections.
>       */
>      PhysPageEntry phys_map;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> +    PhysPageMap map;
>      AddressSpace *as;
>  };
>  
> @@ -113,18 +121,6 @@ typedef struct subpage_t {
>  #define PHYS_SECTION_ROM 2
>  #define PHYS_SECTION_WATCH 3
>  
> -typedef struct PhysPageMap {
> -    unsigned sections_nb;
> -    unsigned sections_nb_alloc;
> -    unsigned nodes_nb;
> -    unsigned nodes_nb_alloc;
> -    Node *nodes;
> -    MemoryRegionSection *sections;
> -} PhysPageMap;
> -
> -static PhysPageMap *prev_map;
> -static PhysPageMap next_map;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
>  static void io_mem_init(void);
> @@ -135,35 +131,32 @@ static MemoryRegion io_mem_watch;
>  
>  #if !defined(CONFIG_USER_ONLY)
>  
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
>  {
> -    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
> -                                            16);
> -        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
> -                                      next_map.nodes_nb + nodes);
> -        next_map.nodes = g_renew(Node, next_map.nodes,
> -                                 next_map.nodes_nb_alloc);
> +    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
> +        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
> +        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
>      }
>  }
>  
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysPageMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>  
> -    ret = next_map.nodes_nb++;
> +    ret = map->nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != next_map.nodes_nb_alloc);
> +    assert(ret != map->nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        next_map.nodes[ret][i].is_leaf = 0;
> -        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->nodes[ret][i].is_leaf = 0;
> +        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>  
> -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> -                                hwaddr *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
> +                                hwaddr *index, hwaddr *nb, uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -171,8 +164,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>      hwaddr step = (hwaddr)1 << (level * L2_BITS);
>  
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = next_map.nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -180,7 +173,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              }
>          }
>      } else {
> -        p = next_map.nodes[lp->ptr];
> +        p = map->nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>  
> @@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
> @@ -202,9 +195,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
>                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
>  
> -    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>  
>  static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
> @@ -237,10 +230,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
>      subpage_t *subpage;
>  
>      section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
> -                             d->nodes, d->sections);
> +                             d->map.nodes, d->map.sections);
>      if (resolve_subpage && section->mr->subpage) {
>          subpage = container_of(section->mr, subpage_t, iomem);
> -        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
> +        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
>      }
>      return section;
>  }
> @@ -708,7 +701,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
>              iotlb |= PHYS_SECTION_ROM;
>          }
>      } else {
> -        iotlb = section - address_space_memory.dispatch->sections;
> +        iotlb = section - address_space_memory.dispatch->map.sections;
>          iotlb += xlat;
>      }
>  
> @@ -747,23 +740,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
>      phys_mem_alloc = alloc;
>  }
>  
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> +static uint16_t phys_section_add(PhysPageMap *map,
> +                                 MemoryRegionSection *section)
>  {
>      /* The physical section number is ORed with a page-aligned
>       * pointer to produce the iotlb entries.  Thus it should
>       * never overflow into the page-aligned value.
>       */
> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    assert(map->sections_nb < TARGET_PAGE_SIZE);
>  
> -    if (next_map.sections_nb == next_map.sections_nb_alloc) {
> -        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> -                                         16);
> -        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
> -                                    next_map.sections_nb_alloc);
> +    if (map->sections_nb == map->sections_nb_alloc) {
> +        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
> +        map->sections = g_renew(MemoryRegionSection, map->sections,
> +                                map->sections_nb_alloc);
>      }
> -    next_map.sections[next_map.sections_nb] = *section;
> +    map->sections[map->sections_nb] = *section;
>      memory_region_ref(section->mr);
> -    return next_map.sections_nb++;
> +    return map->sections_nb++;
>  }
>  
>  static void phys_section_destroy(MemoryRegion *mr)
> @@ -785,7 +778,6 @@ static void phys_sections_free(PhysPageMap *map)
>      }
>      g_free(map->sections);
>      g_free(map->nodes);
> -    g_free(map);
>  }
>  
>  static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
> @@ -794,7 +786,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>      hwaddr base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
>      MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
> -                                                   next_map.nodes, next_map.sections);
> +                                                   d->map.nodes, d->map.sections);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = int128_make64(TARGET_PAGE_SIZE),
> @@ -807,13 +799,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
>          subpage = subpage_init(d->as, base);
>          subsection.mr = &subpage->iomem;
>          phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +                      phys_section_add(&d->map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + int128_get64(section->size) - 1;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(subpage, start, end,
> +                     phys_section_add(&d->map, section));
>  }
>  
>  
> @@ -821,7 +814,7 @@ static void register_multipage(AddressSpaceDispatch *d,
>                                 MemoryRegionSection *section)
>  {
>      hwaddr start_addr = section->offset_within_address_space;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(&d->map, section);
>      uint64_t num_pages = int128_get64(int128_rshift(section->size,
>                                                      TARGET_PAGE_BITS));
>  
> @@ -1605,7 +1598,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
>      return mmio;
>  }
>  
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -1614,12 +1607,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = int128_2_64(),
>      };
>  
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>  
>  MemoryRegion *iotlb_to_region(hwaddr index)
>  {
> -    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
> +    return address_space_memory.dispatch->map.sections[
> +           index & ~TARGET_PAGE_MASK].mr;
>  }
>  
>  static void io_mem_init(void)
> @@ -1636,7 +1630,17 @@ static void io_mem_init(void)
>  static void mem_begin(MemoryListener *listener)
>  {
>      AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
> -    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
> +    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
> +    uint16_t n;
> +
> +    n = dummy_section(&d->map, &io_mem_unassigned);
> +    assert(n == PHYS_SECTION_UNASSIGNED);
> +    n = dummy_section(&d->map, &io_mem_notdirty);
> +    assert(n == PHYS_SECTION_NOTDIRTY);
> +    n = dummy_section(&d->map, &io_mem_rom);
> +    assert(n == PHYS_SECTION_ROM);
> +    n = dummy_section(&d->map, &io_mem_watch);
> +    assert(n == PHYS_SECTION_WATCH);
>  
>      d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>      d->as = as;
> @@ -1649,37 +1653,12 @@ static void mem_commit(MemoryListener *listener)
>      AddressSpaceDispatch *cur = as->dispatch;
>      AddressSpaceDispatch *next = as->next_dispatch;
>  
> -    next->nodes = next_map.nodes;
> -    next->sections = next_map.sections;
> -
>      as->dispatch = next;
> -    g_free(cur);
> -}
> -
> -static void core_begin(MemoryListener *listener)
> -{
> -    uint16_t n;
>  
> -    prev_map = g_new(PhysPageMap, 1);
> -    *prev_map = next_map;
> -
> -    memset(&next_map, 0, sizeof(next_map));
> -    n = dummy_section(&io_mem_unassigned);
> -    assert(n == PHYS_SECTION_UNASSIGNED);
> -    n = dummy_section(&io_mem_notdirty);
> -    assert(n == PHYS_SECTION_NOTDIRTY);
> -    n = dummy_section(&io_mem_rom);
> -    assert(n == PHYS_SECTION_ROM);
> -    n = dummy_section(&io_mem_watch);
> -    assert(n == PHYS_SECTION_WATCH);
> -}
> -
> -/* This listener's commit run after the other AddressSpaceDispatch listeners'.
> - * All AddressSpaceDispatch instances have switched to the next map.
> - */
> -static void core_commit(MemoryListener *listener)
> -{
> -    phys_sections_free(prev_map);
> +    if (cur) {
> +        phys_sections_free(&cur->map);
> +        g_free(cur);
> +    }
>  }
>  
>  static void tcg_commit(MemoryListener *listener)
> @@ -1707,8 +1686,6 @@ static void core_log_global_stop(MemoryListener *listener)
>  }
>  
>  static MemoryListener core_memory_listener = {
> -    .begin = core_begin,
> -    .commit = core_commit,
>      .log_global_start = core_log_global_start,
>      .log_global_stop = core_log_global_stop,
>      .priority = 1,
> -- 
> 1.8.3.1

  parent reply	other threads:[~2013-12-10 11:46 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-01 12:02 [Qemu-devel] [PATCH] exec: separate sections and nodes per address space Marcel Apfelbaum
2013-12-02 13:31 ` Michael S. Tsirkin
2013-12-09 18:02 ` Paolo Bonzini
2013-12-10 11:49 ` Michael S. Tsirkin [this message]
2013-12-10 12:37   ` Marcel Apfelbaum
2013-12-10 12:38     ` Paolo Bonzini
2013-12-10 12:50       ` Marcel Apfelbaum
2013-12-10 16:48         ` Michael S. Tsirkin
2013-12-11  9:19           ` Marcel Apfelbaum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131210114924.GA25293@redhat.com \
    --to=mst@redhat.com \
    --cc=afaerber@suse.de \
    --cc=jan.kiszka@siemens.com \
    --cc=marcel.a@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.