xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Fix cpu online/offline bug: mce memory leak.
@ 2011-03-01  9:09 Liu, Jinsong
  2011-03-01  9:22 ` Keir Fraser
  2011-03-01  9:30 ` Keir Fraser
  0 siblings, 2 replies; 11+ messages in thread
From: Liu, Jinsong @ 2011-03-01  9:09 UTC (permalink / raw)
  To: keir@xen.org, xen-devel@lists.xensource.com; +Cc: keir.xen@gmail.com

[-- Attachment #1: Type: text/plain, Size: 3786 bytes --]

Fix cpu online/offline bug: mce memory leak.

Current Xen mce logic didn't free mcabanks. This would be a memory leak when cpu offline.
When repeatly do cpu online/offline, this memory leak would make xenpool shrink, and at a time point, 
will call alloc_heap_pages --> flush_area_mask, which ASSERT(local_irq_is_enabled()).
However, cpu online is irq disable, so it finally result in Xen crash.

This patch fix the memory leak bug, and tested OK over 110,000 round cpu online/offline.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
     mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct mca_error_handler);
 }
 
-static int intel_init_mca_banks(void)
+static void cpu_mcabank_free(unsigned int cpu)
 {
-    struct mca_banks *mb1, *mb2, * mb3;
+    struct mca_banks *mb1, *mb2, *mb3, *mb4;
+
+    mb1 = per_cpu(mce_clear_banks, cpu);
+    mb2 = per_cpu(no_cmci_banks, cpu);
+    mb3 = per_cpu(mce_banks_owned, cpu);
+    mb4 = per_cpu(poll_bankmask, cpu);
+
+    mcabanks_free(mb1);
+    mcabanks_free(mb2);
+    mcabanks_free(mb3);
+    mcabanks_free(mb4);
+}
+
+static void cpu_mcabank_alloc(unsigned int cpu)
+{
+    struct mca_banks *mb1, *mb2, *mb3;
 
     mb1 = mcabanks_alloc();
     mb2 = mcabanks_alloc();
@@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
     if (!mb1 || !mb2 || !mb3)
         goto out;
 
-    __get_cpu_var(mce_clear_banks) = mb1;
-    __get_cpu_var(no_cmci_banks) = mb2;
-    __get_cpu_var(mce_banks_owned) = mb3;
+    per_cpu(mce_clear_banks, cpu) = mb1;
+    per_cpu(no_cmci_banks, cpu) = mb2;
+    per_cpu(mce_banks_owned, cpu) = mb3;
+    return;
 
-    return 0;
 out:
     mcabanks_free(mb1);
     mcabanks_free(mb2);
     mcabanks_free(mb3);
-    return -ENOMEM;
 }
 
 /* p4/p6 family have similar MCA initialization process */
 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
 {
-    if (intel_init_mca_banks())
+    if ( !this_cpu(mce_clear_banks) ||
+         !this_cpu(no_cmci_banks)   ||
+         !this_cpu(mce_banks_owned) )
          return mcheck_none;
 
     intel_init_mca(c);
@@ -1301,13 +1317,19 @@ static int cpu_callback(
 static int cpu_callback(
     struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
+    unsigned int cpu = (unsigned long)hcpu;
+
     switch ( action )
     {
+    case CPU_UP_PREPARE:
+        cpu_mcabank_alloc(cpu);
+        break;
     case CPU_DYING:
         cpu_mcheck_disable();
         break;
     case CPU_DEAD:
         cpu_mcheck_distribute_cmci();
+        cpu_mcabank_free(cpu);
         break;
     default:
         break;
@@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
 
 static int __init intel_mce_initcall(void)
 {
+    void *hcpu = (void *)(long)smp_processor_id();
+    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
     register_cpu_notifier(&cpu_nfb);
     return 0;
 }
diff -r 1a364b17d66a xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/setup.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
 
     arch_init_memory();
 
+    do_presmp_initcalls();
+
     identify_cpu(&boot_cpu_data);
     if ( cpu_has_fxsr )
         set_in_cr4(X86_CR4_OSFXSR);
@@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
     initialize_keytable();
 
     console_init_postirq();
-
-    do_presmp_initcalls();
 
     for_each_present_cpu ( i )
     {

[-- Attachment #2: mce_fix_memory_leak.patch --]
[-- Type: application/octet-stream, Size: 3667 bytes --]

Fix cpu online/offline bug: mce memory leak.

Current Xen mce logic didn't free mcabanks. This would be a memory leak when cpu offline.
When repeatly do cpu online/offline, this memory leak would make xenpool shrink, and at a time point, 
will call alloc_heap_pages --> flush_area_mask, which ASSERT(local_irq_is_enabled()).
However, cpu online is irq disable, so it finally result in Xen crash.

This patch fix the memory leak bug, and tested OK over 110,000 round cpu online/offline.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
     mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct mca_error_handler);
 }
 
-static int intel_init_mca_banks(void)
+static void cpu_mcabank_free(unsigned int cpu)
 {
-    struct mca_banks *mb1, *mb2, * mb3;
+    struct mca_banks *mb1, *mb2, *mb3, *mb4;
+
+    mb1 = per_cpu(mce_clear_banks, cpu);
+    mb2 = per_cpu(no_cmci_banks, cpu);
+    mb3 = per_cpu(mce_banks_owned, cpu);
+    mb4 = per_cpu(poll_bankmask, cpu);
+
+    mcabanks_free(mb1);
+    mcabanks_free(mb2);
+    mcabanks_free(mb3);
+    mcabanks_free(mb4);
+}
+
+static void cpu_mcabank_alloc(unsigned int cpu)
+{
+    struct mca_banks *mb1, *mb2, *mb3;
 
     mb1 = mcabanks_alloc();
     mb2 = mcabanks_alloc();
@@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
     if (!mb1 || !mb2 || !mb3)
         goto out;
 
-    __get_cpu_var(mce_clear_banks) = mb1;
-    __get_cpu_var(no_cmci_banks) = mb2;
-    __get_cpu_var(mce_banks_owned) = mb3;
+    per_cpu(mce_clear_banks, cpu) = mb1;
+    per_cpu(no_cmci_banks, cpu) = mb2;
+    per_cpu(mce_banks_owned, cpu) = mb3;
+    return;
 
-    return 0;
 out:
     mcabanks_free(mb1);
     mcabanks_free(mb2);
     mcabanks_free(mb3);
-    return -ENOMEM;
 }
 
 /* p4/p6 family have similar MCA initialization process */
 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
 {
-    if (intel_init_mca_banks())
+    if ( !this_cpu(mce_clear_banks) ||
+         !this_cpu(no_cmci_banks)   ||
+         !this_cpu(mce_banks_owned) )
          return mcheck_none;
 
     intel_init_mca(c);
@@ -1301,13 +1317,19 @@ static int cpu_callback(
 static int cpu_callback(
     struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
+    unsigned int cpu = (unsigned long)hcpu;
+
     switch ( action )
     {
+    case CPU_UP_PREPARE:
+        cpu_mcabank_alloc(cpu);
+        break;
     case CPU_DYING:
         cpu_mcheck_disable();
         break;
     case CPU_DEAD:
         cpu_mcheck_distribute_cmci();
+        cpu_mcabank_free(cpu);
         break;
     default:
         break;
@@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
 
 static int __init intel_mce_initcall(void)
 {
+    void *hcpu = (void *)(long)smp_processor_id();
+    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
     register_cpu_notifier(&cpu_nfb);
     return 0;
 }
diff -r 1a364b17d66a xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/setup.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
 
     arch_init_memory();
 
+    do_presmp_initcalls();
+
     identify_cpu(&boot_cpu_data);
     if ( cpu_has_fxsr )
         set_in_cr4(X86_CR4_OSFXSR);
@@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
     initialize_keytable();
 
     console_init_postirq();
-
-    do_presmp_initcalls();
 
     for_each_present_cpu ( i )
     {

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2011-03-02  6:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-01  9:09 [PATCH] Fix cpu online/offline bug: mce memory leak Liu, Jinsong
2011-03-01  9:22 ` Keir Fraser
2011-03-01  9:27   ` Liu, Jinsong
2011-03-01  9:30 ` Keir Fraser
2011-03-01 10:12   ` Haitao Shan
2011-03-01 10:19     ` Keir Fraser
2011-03-01 10:21       ` Haitao Shan
2011-03-01 10:25         ` Keir Fraser
2011-03-01 10:26         ` Liu, Jinsong
2011-03-01 10:47   ` Keir Fraser
2011-03-02  6:54     ` Liu, Jinsong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).