xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86/NUMA: make init_node_heap() respect Xen heap limit
@ 2015-08-27  8:37 Jan Beulich
  2015-08-27  9:25 ` Wei Liu
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: Jan Beulich @ 2015-08-27  8:37 UTC (permalink / raw)
  To: xen-devel
  Cc: Keir Fraser, Andrew Cooper, Ian Jackson, Tim Deegan, Ian Campbell,
	Wei Liu

[-- Attachment #1: Type: text/plain, Size: 5036 bytes --]

On NUMA systems, where we try to use node local memory for the basic
control structures of the buddy allocator, this special case needs to
take into consideration a possible address width limit placed on the
Xen heap. In turn this (but also other, more abstract considerations)
requires that xenheap_max_mfn() not be called more than once (at most
we might permit it to be called a second time with a larger value than
was passed the first time), and be called only before calling
end_boot_allocator().

While inspecting all the involved code, a couple of off-by-one issues
were found (and are being corrected here at once):
- arch_init_memory() cleared one too many page table slots
- the highmem_start based invocation of xenheap_max_mfn() passed too
  big a value
- xenheap_max_mfn() calculated the wrong bit count in edge cases

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -369,7 +369,7 @@ void __init arch_init_memory(void)
 
                     for ( i = 0; i < l3_table_offset(split_va); ++i )
                         l3tab[i] = l3idle[i];
-                    for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
+                    for ( ; i < L3_PAGETABLE_ENTRIES; ++i )
                         l3tab[i] = l3e_empty();
                     split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
                                              __PAGE_HYPERVISOR);
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1002,7 +1002,7 @@ void __init noreturn __start_xen(unsigne
 
     setup_max_pdx(raw_max_page);
     if ( highmem_start )
-        xenheap_max_mfn(PFN_DOWN(highmem_start));
+        xenheap_max_mfn(PFN_DOWN(highmem_start - 1));
 
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
@@ -1183,9 +1183,6 @@ void __init noreturn __start_xen(unsigne
 
     numa_initmem_init(0, raw_max_page);
 
-    end_boot_allocator();
-    system_state = SYS_STATE_boot;
-
     if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
     {
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
@@ -1194,6 +1191,8 @@ void __init noreturn __start_xen(unsigne
         if ( !highmem_start )
             xenheap_max_mfn(limit);
 
+        end_boot_allocator();
+
         /* Pass the remaining memory to the allocator. */
         for ( i = 0; i < boot_e820.nr_map; i++ )
         {
@@ -1217,6 +1216,10 @@ void __init noreturn __start_xen(unsigne
            opt_tmem = 0;
         }
     }
+    else
+        end_boot_allocator();
+
+    system_state = SYS_STATE_boot;
 
     vm_init();
     console_init_ring();
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -404,13 +404,19 @@ void get_outstanding_claims(uint64_t *fr
     spin_unlock(&heap_lock);
 }
 
+static bool_t __read_mostly first_node_initialised;
+#ifndef CONFIG_SEPARATE_XENHEAP
+static unsigned int __read_mostly xenheap_bits;
+#else
+#define xenheap_bits 0
+#endif
+
 static unsigned long init_node_heap(int node, unsigned long mfn,
                                     unsigned long nr, bool_t *use_tail)
 {
     /* First node to be discovered has its heap metadata statically alloced. */
     static heap_by_zone_and_order_t _heap_static;
     static unsigned long avail_static[NR_ZONES];
-    static int first_node_initialised;
     unsigned long needed = (sizeof(**_heap) +
                             sizeof(**avail) * NR_ZONES +
                             PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -428,14 +434,18 @@ static unsigned long init_node_heap(int 
     }
 #ifdef DIRECTMAP_VIRT_END
     else if ( *use_tail && nr >= needed &&
-              (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) )
+              (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) &&
+              (!xenheap_bits ||
+               !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
         _heap[node] = mfn_to_virt(mfn + nr - needed);
         avail[node] = mfn_to_virt(mfn + nr - 1) +
                       PAGE_SIZE - sizeof(**avail) * NR_ZONES;
     }
     else if ( nr >= needed &&
-              (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) )
+              (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) &&
+              (!xenheap_bits ||
+               !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
         _heap[node] = mfn_to_virt(mfn);
         avail[node] = mfn_to_virt(mfn + needed - 1) +
@@ -1543,11 +1553,13 @@ void free_xenheap_pages(void *v, unsigne
 
 #else
 
-static unsigned int __read_mostly xenheap_bits;
-
 void __init xenheap_max_mfn(unsigned long mfn)
 {
-    xenheap_bits = flsl(mfn) + PAGE_SHIFT;
+    ASSERT(!first_node_initialised);
+    ASSERT(!xenheap_bits);
+    BUILD_BUG_ON(PADDR_BITS >= BITS_PER_LONG);
+    xenheap_bits = min(flsl(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS);
+    printk(XENLOG_INFO "Xen heap: %u bits\n", xenheap_bits);
 }
 
 void init_xenheap_pages(paddr_t ps, paddr_t pe)



[-- Attachment #2: x86-highmem-start-adjust.patch --]
[-- Type: text/plain, Size: 5090 bytes --]

x86/NUMA: make init_node_heap() respect Xen heap limit

On NUMA systems, where we try to use node local memory for the basic
control structures of the buddy allocator, this special case needs to
take into consideration a possible address width limit placed on the
Xen heap. In turn this (but also other, more abstract considerations)
requires that xenheap_max_mfn() not be called more than once (at most
we might permit it to be called a second time with a larger value than
was passed the first time), and be called only before calling
end_boot_allocator().

While inspecting all the involved code, a couple of off-by-one issues
were found (and are being corrected here at once):
- arch_init_memory() cleared one too many page table slots
- the highmem_start based invocation of xenheap_max_mfn() passed too
  big a value
- xenheap_max_mfn() calculated the wrong bit count in edge cases

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -369,7 +369,7 @@ void __init arch_init_memory(void)
 
                     for ( i = 0; i < l3_table_offset(split_va); ++i )
                         l3tab[i] = l3idle[i];
-                    for ( ; i <= L3_PAGETABLE_ENTRIES; ++i )
+                    for ( ; i < L3_PAGETABLE_ENTRIES; ++i )
                         l3tab[i] = l3e_empty();
                     split_l4e = l4e_from_pfn(virt_to_mfn(l3tab),
                                              __PAGE_HYPERVISOR);
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1002,7 +1002,7 @@ void __init noreturn __start_xen(unsigne
 
     setup_max_pdx(raw_max_page);
     if ( highmem_start )
-        xenheap_max_mfn(PFN_DOWN(highmem_start));
+        xenheap_max_mfn(PFN_DOWN(highmem_start - 1));
 
     /*
      * Walk every RAM region and map it in its entirety (on x86/64, at least)
@@ -1183,9 +1183,6 @@ void __init noreturn __start_xen(unsigne
 
     numa_initmem_init(0, raw_max_page);
 
-    end_boot_allocator();
-    system_state = SYS_STATE_boot;
-
     if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
     {
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
@@ -1194,6 +1191,8 @@ void __init noreturn __start_xen(unsigne
         if ( !highmem_start )
             xenheap_max_mfn(limit);
 
+        end_boot_allocator();
+
         /* Pass the remaining memory to the allocator. */
         for ( i = 0; i < boot_e820.nr_map; i++ )
         {
@@ -1217,6 +1216,10 @@ void __init noreturn __start_xen(unsigne
            opt_tmem = 0;
         }
     }
+    else
+        end_boot_allocator();
+
+    system_state = SYS_STATE_boot;
 
     vm_init();
     console_init_ring();
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -404,13 +404,19 @@ void get_outstanding_claims(uint64_t *fr
     spin_unlock(&heap_lock);
 }
 
+static bool_t __read_mostly first_node_initialised;
+#ifndef CONFIG_SEPARATE_XENHEAP
+static unsigned int __read_mostly xenheap_bits;
+#else
+#define xenheap_bits 0
+#endif
+
 static unsigned long init_node_heap(int node, unsigned long mfn,
                                     unsigned long nr, bool_t *use_tail)
 {
     /* First node to be discovered has its heap metadata statically alloced. */
     static heap_by_zone_and_order_t _heap_static;
     static unsigned long avail_static[NR_ZONES];
-    static int first_node_initialised;
     unsigned long needed = (sizeof(**_heap) +
                             sizeof(**avail) * NR_ZONES +
                             PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -428,14 +434,18 @@ static unsigned long init_node_heap(int 
     }
 #ifdef DIRECTMAP_VIRT_END
     else if ( *use_tail && nr >= needed &&
-              (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) )
+              (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) &&
+              (!xenheap_bits ||
+               !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
         _heap[node] = mfn_to_virt(mfn + nr - needed);
         avail[node] = mfn_to_virt(mfn + nr - 1) +
                       PAGE_SIZE - sizeof(**avail) * NR_ZONES;
     }
     else if ( nr >= needed &&
-              (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) )
+              (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) &&
+              (!xenheap_bits ||
+               !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
         _heap[node] = mfn_to_virt(mfn);
         avail[node] = mfn_to_virt(mfn + needed - 1) +
@@ -1543,11 +1553,13 @@ void free_xenheap_pages(void *v, unsigne
 
 #else
 
-static unsigned int __read_mostly xenheap_bits;
-
 void __init xenheap_max_mfn(unsigned long mfn)
 {
-    xenheap_bits = flsl(mfn) + PAGE_SHIFT;
+    ASSERT(!first_node_initialised);
+    ASSERT(!xenheap_bits);
+    BUILD_BUG_ON(PADDR_BITS >= BITS_PER_LONG);
+    xenheap_bits = min(flsl(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS);
+    printk(XENLOG_INFO "Xen heap: %u bits\n", xenheap_bits);
 }
 
 void init_xenheap_pages(paddr_t ps, paddr_t pe)

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-09-04 12:57 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-27  8:37 [PATCH] x86/NUMA: make init_node_heap() respect Xen heap limit Jan Beulich
2015-08-27  9:25 ` Wei Liu
2015-08-27 10:11 ` Andrew Cooper
2015-08-27 14:43 ` Wei Liu
2015-09-01 10:28 ` Ian Campbell
2015-09-03 20:01 ` Julien Grall
2015-09-03 20:58   ` Julien Grall
2015-09-04  7:37     ` Jan Beulich
2015-09-04  8:27       ` Ian Campbell
2015-09-04  8:39         ` Jan Beulich
2015-09-04  8:52           ` Ian Campbell
2015-09-04  9:09             ` Jan Beulich
2015-09-04 11:29               ` Julien Grall
2015-09-04 12:02                 ` Jan Beulich
2015-09-04 12:05                   ` Wei Liu
2015-09-04 12:50                   ` Julien Grall
2015-09-04 12:57                     ` Ian Campbell
2015-09-04 12:52                 ` Ian Campbell
2015-09-04 12:53                   ` Julien Grall

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).