qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Liu Ping Fan <qemulist@gmail.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
	Anthony Liguori <anthony@codemonkey.ws>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Subject: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU
Date: Mon, 13 May 2013 11:21:03 +0800	[thread overview]
Message-ID: <1368415264-10800-2-git-send-email-qemulist@gmail.com> (raw)
In-Reply-To: <1368415264-10800-1-git-send-email-qemulist@gmail.com>

From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Now, each AddressSpaceDispatch has its own radix-tree, and all of them
lie on phys_section[] and phys_map_nodes[]. When we want lockless
mmio dispatch, we need something like RCU.

Acheive this with PhysPageTable which contains all of the info for all
radix trees. After all address space listeners update (ie. excluding the
readers) we switch from PhysPageTable *cur_pgtbl to *next_pgtbl.
(The real RCU style is adopt by listener, see next patch)

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 exec.c                |  197 +++++++++++++++++++++++++------------------------
 include/exec/memory.h |    2 +
 memory.c              |    2 +
 3 files changed, 106 insertions(+), 95 deletions(-)

diff --git a/exec.c b/exec.c
index c5f8082..bb4e540 100644
--- a/exec.c
+++ b/exec.c
@@ -80,23 +80,34 @@ int use_icount;
 #if !defined(CONFIG_USER_ONLY)
 
 #define SUBSECTION_IDX(addr)      ((addr) & ~TARGET_PAGE_MASK)
-#define PHYS_SECTION_ID(psection) ((psection) - phys_sections)
+#define PHYS_SECTION_ID(psection, base) ((psection) - base->phys_sections)
 
 typedef struct PhysSection {
     MemoryRegionSection section;
     uint16_t *sub_section;
 } PhysSection;
 
-static PhysSection *phys_sections;
-static unsigned phys_sections_nb, phys_sections_nb_alloc;
-static uint16_t phys_section_unassigned;
-static uint16_t phys_section_notdirty;
-static uint16_t phys_section_rom;
-static uint16_t phys_section_watch;
+typedef PhysPageEntry Node[L2_SIZE];
 
-/* Simple allocator for PhysPageEntry nodes */
-static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
-static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
+typedef struct PhysPageTable PhysPageTable;
+
+struct PhysPageTable {
+    int ref;
+    PhysSection *phys_sections;
+    unsigned phys_sections_nb;
+    unsigned phys_sections_nb_alloc;
+    uint16_t phys_section_unassigned;
+    uint16_t phys_section_notdirty;
+    uint16_t phys_section_rom;
+    uint16_t phys_section_watch;
+
+    Node *phys_map_nodes;
+    unsigned phys_map_nodes_nb;
+    unsigned phys_map_nodes_nb_alloc;
+};
+
+static PhysPageTable *cur_pgtbl;
+static PhysPageTable *next_pgtbl;
 
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
@@ -111,13 +122,13 @@ static MemoryRegion io_mem_watch;
 
 static void phys_map_node_reserve(unsigned nodes)
 {
-    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
+    if (next_pgtbl->phys_map_nodes_nb + nodes > next_pgtbl->phys_map_nodes_nb_alloc) {
         typedef PhysPageEntry Node[L2_SIZE];
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
-                                      phys_map_nodes_nb + nodes);
-        phys_map_nodes = g_renew(Node, phys_map_nodes,
-                                 phys_map_nodes_nb_alloc);
+        next_pgtbl->phys_map_nodes_nb_alloc = MAX(next_pgtbl->phys_map_nodes_nb_alloc * 2, 16);
+        next_pgtbl->phys_map_nodes_nb_alloc = MAX(next_pgtbl->phys_map_nodes_nb_alloc,
+                                      next_pgtbl->phys_map_nodes_nb + nodes);
+        next_pgtbl->phys_map_nodes = g_renew(Node, next_pgtbl->phys_map_nodes,
+                                 next_pgtbl->phys_map_nodes_nb_alloc);
     }
 }
 
@@ -126,22 +137,16 @@ static uint16_t phys_map_node_alloc(void)
     unsigned i;
     uint16_t ret;
 
-    ret = phys_map_nodes_nb++;
+    ret = next_pgtbl->phys_map_nodes_nb++;
     assert(ret != PHYS_MAP_NODE_NIL);
-    assert(ret != phys_map_nodes_nb_alloc);
+    assert(ret != next_pgtbl->phys_map_nodes_nb_alloc);
     for (i = 0; i < L2_SIZE; ++i) {
-        phys_map_nodes[ret][i].is_leaf = 0;
-        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+        next_pgtbl->phys_map_nodes[ret][i].is_leaf = 0;
+        next_pgtbl->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
     }
     return ret;
 }
 
-static void phys_map_nodes_reset(void)
-{
-    phys_map_nodes_nb = 0;
-}
-
-
 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
                                 hwaddr *nb, uint16_t leaf,
                                 int level)
@@ -152,15 +157,15 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
 
     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
         lp->ptr = phys_map_node_alloc();
-        p = phys_map_nodes[lp->ptr];
+        p = next_pgtbl->phys_map_nodes[lp->ptr];
         if (level == 0) {
             for (i = 0; i < L2_SIZE; i++) {
                 p[i].is_leaf = 1;
-                p[i].ptr = phys_section_unassigned;
+                p[i].ptr = next_pgtbl->phys_section_unassigned;
             }
         }
     } else {
-        p = phys_map_nodes[lp->ptr];
+        p = next_pgtbl->phys_map_nodes[lp->ptr];
     }
     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 
@@ -192,11 +197,13 @@ static PhysSection *phys_section_find(AddressSpaceDispatch *d,
 {
     PhysPageEntry lp = d->phys_map;
     PhysPageEntry *p;
+    PhysSection *phys_sections = cur_pgtbl->phys_sections;
+    Node *phys_map_nodes = cur_pgtbl->phys_map_nodes;
     int i;
 
     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
         if (lp.ptr == PHYS_MAP_NODE_NIL) {
-            return &phys_sections[phys_section_unassigned];
+            return &phys_sections[cur_pgtbl->phys_section_unassigned];
         }
         p = phys_map_nodes[lp.ptr];
         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
@@ -209,6 +216,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
 {
     PhysSection *psection;
     uint16_t idx;
+    PhysSection *phys_sections = cur_pgtbl->phys_sections;
 
     psection = phys_section_find(as->dispatch, addr >> TARGET_PAGE_BITS);
     if (psection->sub_section) {
@@ -246,7 +254,7 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
                 | (addr & iotlb.addr_mask));
         len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
         if (!iotlb.perm[is_write]) {
-            section = &phys_sections[phys_section_unassigned].section;
+            section = &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section;
             break;
         }
 
@@ -690,12 +698,12 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
             + xlat;
         if (!section->readonly) {
-            iotlb |= phys_section_notdirty;
+            iotlb |= cur_pgtbl->phys_section_notdirty;
         } else {
-            iotlb |= phys_section_rom;
+            iotlb |= cur_pgtbl->phys_section_rom;
         }
     } else {
-        iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section));
+        iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section), cur_pgtbl);
         iotlb += xlat;
     }
 
@@ -705,7 +713,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
             /* Avoid trapping reads of pages with a write breakpoint. */
             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
-                iotlb = phys_section_watch + paddr;
+                iotlb = cur_pgtbl->phys_section_watch + paddr;
                 *address |= TLB_MMIO;
                 break;
             }
@@ -721,59 +729,40 @@ static int subsection_register(PhysSection *psection, uint32_t start,
                                uint32_t end, uint16_t section);
 static void subsections_init(PhysSection *psection);
 
-static void destroy_page_desc(uint16_t section_index)
+/* Call after all listener has been commit.
+  * we do not walk over tree, just simply drop.
+  */
+static void destroy_pagetable(PhysPageTable *pgtbl)
 {
-    g_free(phys_sections[section_index].sub_section);
-}
-
-static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
-{
-    unsigned i;
-    PhysPageEntry *p;
+    int i;
 
-    if (lp->ptr == PHYS_MAP_NODE_NIL) {
-        return;
-    }
+    g_free(pgtbl->phys_map_nodes);
 
-    p = phys_map_nodes[lp->ptr];
-    for (i = 0; i < L2_SIZE; ++i) {
-        if (!p[i].is_leaf) {
-            destroy_l2_mapping(&p[i], level - 1);
+    for (i = 0; i < pgtbl->phys_sections_nb_alloc; i++) {
+        if (pgtbl->phys_sections[i].sub_section) {
+            g_free(pgtbl->phys_sections[i].sub_section);
         } else {
-            destroy_page_desc(p[i].ptr);
+            memory_region_unref(pgtbl->phys_sections[i].section.mr);
         }
     }
-    lp->is_leaf = 0;
-    lp->ptr = PHYS_MAP_NODE_NIL;
-}
+    g_free(pgtbl->phys_sections);
 
-static void destroy_all_mappings(AddressSpaceDispatch *d)
-{
-    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
-    phys_map_nodes_reset();
+    g_free(pgtbl);
 }
 
-static uint16_t phys_section_add(MemoryRegionSection *section)
+static uint16_t phys_section_add(MemoryRegionSection *section, PhysPageTable *pgtbl)
 {
-    assert(phys_sections_nb < TARGET_PAGE_SIZE);
+    assert(pgtbl->phys_sections_nb < TARGET_PAGE_SIZE);
 
-    if (phys_sections_nb == phys_sections_nb_alloc) {
-        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
-        phys_sections = g_renew(PhysSection, phys_sections,
-                                phys_sections_nb_alloc);
+    if (pgtbl->phys_sections_nb == pgtbl->phys_sections_nb_alloc) {
+        pgtbl->phys_sections_nb_alloc = MAX(pgtbl->phys_sections_nb_alloc * 2, 16);
+        pgtbl->phys_sections = g_renew(PhysSection, pgtbl->phys_sections,
+                                pgtbl->phys_sections_nb_alloc);
     }
-    phys_sections[phys_sections_nb].section = *section;
-    phys_sections[phys_sections_nb].sub_section = NULL;
+    pgtbl->phys_sections[pgtbl->phys_sections_nb].section = *section;
+    pgtbl->phys_sections[pgtbl->phys_sections_nb].sub_section = NULL;
     memory_region_ref(section->mr);
-    return phys_sections_nb++;
-}
-
-static void phys_sections_clear(void)
-{
-    while (phys_sections_nb > 0) {
-        PhysSection *phys_section = &phys_sections[--phys_sections_nb];
-        memory_region_unref(phys_section->section.mr);
-    }
+    return pgtbl->phys_sections_nb++;
 }
 
 static void register_subsection(AddressSpaceDispatch *d,
@@ -793,18 +782,18 @@ static void register_subsection(AddressSpaceDispatch *d,
            psection->section.mr == &io_mem_unassigned);
 
     if (!psection->sub_section) {
-        new_section = phys_section_add(&subsection);
-        psection = &phys_sections[new_section];
+        new_section = phys_section_add(&subsection, next_pgtbl);
+        psection = &next_pgtbl->phys_sections[new_section];
         subsections_init(psection);
         phys_page_set(d, base >> TARGET_PAGE_BITS, 1, new_section);
     } else {
-        new_section = PHYS_SECTION_ID(psection);
+        new_section = PHYS_SECTION_ID(psection, next_pgtbl);
     }
 
-    new_subsection = phys_section_add(section);
+    new_subsection = phys_section_add(section, next_pgtbl);
 
     /* phys_section_add invalidates psection, reload it  */
-    psection = &phys_sections[new_section];
+    psection = &next_pgtbl->phys_sections[new_section];
     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
     end = start + section->size - 1;
     subsection_register(psection, start, end, new_subsection);
@@ -816,7 +805,7 @@ static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *sec
     hwaddr start_addr = section->offset_within_address_space;
     ram_addr_t size = section->size;
     hwaddr addr;
-    uint16_t section_index = phys_section_add(section);
+    uint16_t section_index = phys_section_add(section, next_pgtbl);
 
     assert(size);
 
@@ -1653,7 +1642,7 @@ static void subsections_init(PhysSection *psection)
 {
     psection->sub_section = g_malloc0(sizeof(uint16_t) * TARGET_PAGE_SIZE);
     subsection_register(psection, 0, TARGET_PAGE_SIZE-1,
-                        phys_section_unassigned);
+                        next_pgtbl->phys_section_unassigned);
 }
 
 static uint16_t dummy_section(MemoryRegion *mr)
@@ -1665,12 +1654,12 @@ static uint16_t dummy_section(MemoryRegion *mr)
         .size = UINT64_MAX,
     };
 
-    return phys_section_add(&section);
+    return phys_section_add(&section, next_pgtbl);
 }
 
 MemoryRegion *iotlb_to_region(hwaddr index)
 {
-    return phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
+    return cur_pgtbl->phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
 }
 
 static void io_mem_init(void)
@@ -1685,21 +1674,40 @@ static void io_mem_init(void)
                           "watch", UINT64_MAX);
 }
 
+void global_begin(void)
+{
+    next_pgtbl = g_new0(PhysPageTable, 1);
+    next_pgtbl->ref = 1;
+    next_pgtbl->phys_section_unassigned = dummy_section(&io_mem_unassigned);
+    next_pgtbl->phys_section_notdirty = dummy_section(&io_mem_notdirty);
+    next_pgtbl->phys_section_rom = dummy_section(&io_mem_rom);
+    next_pgtbl->phys_section_watch = dummy_section(&io_mem_watch);
+}
+
+/* other listeners finished */
+void global_commit(void)
+{
+    PhysPageTable *t = cur_pgtbl;
+
+    cur_pgtbl = next_pgtbl;
+    /* Fix me,  currently, we rely on each address space listener agaist its
+      * reader. So when we come here, no readers will touch old phys_map_node.
+      * After rcu, should changed to call_rcu()
+      */
+    if (__sync_sub_and_fetch(&t->ref, 1) == 0) {
+        destroy_pagetable(t);
+    }
+}
+
 static void mem_begin(MemoryListener *listener)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
 
-    destroy_all_mappings(d);
     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
 }
 
 static void core_begin(MemoryListener *listener)
 {
-    phys_sections_clear();
-    phys_section_unassigned = dummy_section(&io_mem_unassigned);
-    phys_section_notdirty = dummy_section(&io_mem_notdirty);
-    phys_section_rom = dummy_section(&io_mem_rom);
-    phys_section_watch = dummy_section(&io_mem_watch);
 }
 
 static void tcg_commit(MemoryListener *listener)
@@ -1779,7 +1787,6 @@ void address_space_destroy_dispatch(AddressSpace *as)
     AddressSpaceDispatch *d = as->dispatch;
 
     memory_listener_unregister(&d->listener);
-    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
     g_free(d);
     as->dispatch = NULL;
 }
@@ -2386,7 +2393,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val)
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
         io_mem_write(section->mr, addr, val, 4);
     } else {
@@ -2422,7 +2429,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val)
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
 #ifdef TARGET_WORDS_BIGENDIAN
         io_mem_write(section->mr, addr, val >> 32, 4);
@@ -2455,7 +2462,7 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2526,7 +2533,7 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b97ace7..cc654fa 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -992,6 +992,8 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                          int is_write, hwaddr access_len);
 
+void global_begin(void);
+void global_commit(void);
 
 #endif
 
diff --git a/memory.c b/memory.c
index 1a86607..da06dfd 100644
--- a/memory.c
+++ b/memory.c
@@ -805,6 +805,7 @@ void memory_region_transaction_commit(void)
     --memory_region_transaction_depth;
     if (!memory_region_transaction_depth && memory_region_update_pending) {
         memory_region_update_pending = false;
+        global_begin();
         MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
 
         QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
@@ -812,6 +813,7 @@ void memory_region_transaction_commit(void)
         }
 
         MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
+        global_commit();
     }
 }
 
-- 
1.7.4.4

  reply	other threads:[~2013-05-13  3:21 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-13  3:21 [Qemu-devel] [RFC PATCH 0/2] make memory listener prepared for rcu style Liu Ping Fan
2013-05-13  3:21 ` Liu Ping Fan [this message]
2013-05-13  9:20   ` [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU Paolo Bonzini
2013-05-14  3:38     ` liu ping fan
2013-05-14  9:27       ` Paolo Bonzini
2013-05-15  7:04         ` liu ping fan
2013-05-26 13:02         ` liu ping fan
2013-05-27 11:54           ` Paolo Bonzini
2013-05-29  1:52             ` liu ping fan
2013-05-13  3:21 ` [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style Liu Ping Fan
2013-05-13  9:31   ` Paolo Bonzini
2013-05-14  5:47     ` liu ping fan
2013-05-14  9:34       ` Paolo Bonzini
2013-05-15  1:29         ` liu ping fan
2013-05-15  8:22           ` Paolo Bonzini
2013-05-15  9:11             ` liu ping fan
2013-05-15  9:19               ` Paolo Bonzini
2013-05-16  9:09     ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1368415264-10800-2-git-send-email-qemulist@gmail.com \
    --to=qemulist@gmail.com \
    --cc=anthony@codemonkey.ws \
    --cc=jan.kiszka@siemens.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).