[PATCH] Fix cpu online/offline bug: mce memory leak.

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] Fix cpu online/offline bug: mce memory leak.
@ 2011-03-01  9:09 Liu, Jinsong
  2011-03-01  9:22 ` Keir Fraser
  2011-03-01  9:30 ` Keir Fraser
  0 siblings, 2 replies; 11+ messages in thread
From: Liu, Jinsong @ 2011-03-01  9:09 UTC (permalink / raw)
  To: keir@xen.org, xen-devel@lists.xensource.com; +Cc: keir.xen@gmail.com

[-- Attachment #1: Type: text/plain, Size: 3786 bytes --]

Fix cpu online/offline bug: mce memory leak.

Current Xen mce logic didn't free mcabanks. This would be a memory leak when cpu offline.
When repeatly do cpu online/offline, this memory leak would make xenpool shrink, and at a time point, 
will call alloc_heap_pages --> flush_area_mask, which ASSERT(local_irq_is_enabled()).
However, cpu online is irq disable, so it finally result in Xen crash.

This patch fix the memory leak bug, and tested OK over 110,000 round cpu online/offline.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
     mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct mca_error_handler);
 }
 
-static int intel_init_mca_banks(void)
+static void cpu_mcabank_free(unsigned int cpu)
 {
-    struct mca_banks *mb1, *mb2, * mb3;
+    struct mca_banks *mb1, *mb2, *mb3, *mb4;
+
+    mb1 = per_cpu(mce_clear_banks, cpu);
+    mb2 = per_cpu(no_cmci_banks, cpu);
+    mb3 = per_cpu(mce_banks_owned, cpu);
+    mb4 = per_cpu(poll_bankmask, cpu);
+
+    mcabanks_free(mb1);
+    mcabanks_free(mb2);
+    mcabanks_free(mb3);
+    mcabanks_free(mb4);
+}
+
+static void cpu_mcabank_alloc(unsigned int cpu)
+{
+    struct mca_banks *mb1, *mb2, *mb3;
 
     mb1 = mcabanks_alloc();
     mb2 = mcabanks_alloc();
@@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
     if (!mb1 || !mb2 || !mb3)
         goto out;
 
-    __get_cpu_var(mce_clear_banks) = mb1;
-    __get_cpu_var(no_cmci_banks) = mb2;
-    __get_cpu_var(mce_banks_owned) = mb3;
+    per_cpu(mce_clear_banks, cpu) = mb1;
+    per_cpu(no_cmci_banks, cpu) = mb2;
+    per_cpu(mce_banks_owned, cpu) = mb3;
+    return;
 
-    return 0;
 out:
     mcabanks_free(mb1);
     mcabanks_free(mb2);
     mcabanks_free(mb3);
-    return -ENOMEM;
 }
 
 /* p4/p6 family have similar MCA initialization process */
 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
 {
-    if (intel_init_mca_banks())
+    if ( !this_cpu(mce_clear_banks) ||
+         !this_cpu(no_cmci_banks)   ||
+         !this_cpu(mce_banks_owned) )
          return mcheck_none;
 
     intel_init_mca(c);
@@ -1301,13 +1317,19 @@ static int cpu_callback(
 static int cpu_callback(
     struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
+    unsigned int cpu = (unsigned long)hcpu;
+
     switch ( action )
     {
+    case CPU_UP_PREPARE:
+        cpu_mcabank_alloc(cpu);
+        break;
     case CPU_DYING:
         cpu_mcheck_disable();
         break;
     case CPU_DEAD:
         cpu_mcheck_distribute_cmci();
+        cpu_mcabank_free(cpu);
         break;
     default:
         break;
@@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
 
 static int __init intel_mce_initcall(void)
 {
+    void *hcpu = (void *)(long)smp_processor_id();
+    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
     register_cpu_notifier(&cpu_nfb);
     return 0;
 }
diff -r 1a364b17d66a xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/setup.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
 
     arch_init_memory();
 
+    do_presmp_initcalls();
+
     identify_cpu(&boot_cpu_data);
     if ( cpu_has_fxsr )
         set_in_cr4(X86_CR4_OSFXSR);
@@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
     initialize_keytable();
 
     console_init_postirq();
-
-    do_presmp_initcalls();
 
     for_each_present_cpu ( i )
     {

[-- Attachment #2: mce_fix_memory_leak.patch --]
[-- Type: application/octet-stream, Size: 3667 bytes --]

Fix cpu online/offline bug: mce memory leak.

Current Xen mce logic didn't free mcabanks. This would be a memory leak when cpu offline.
When repeatly do cpu online/offline, this memory leak would make xenpool shrink, and at a time point, 
will call alloc_heap_pages --> flush_area_mask, which ASSERT(local_irq_is_enabled()).
However, cpu online is irq disable, so it finally result in Xen crash.

This patch fix the memory leak bug, and tested OK over 110,000 round cpu online/offline.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
     mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct mca_error_handler);
 }
 
-static int intel_init_mca_banks(void)
+static void cpu_mcabank_free(unsigned int cpu)
 {
-    struct mca_banks *mb1, *mb2, * mb3;
+    struct mca_banks *mb1, *mb2, *mb3, *mb4;
+
+    mb1 = per_cpu(mce_clear_banks, cpu);
+    mb2 = per_cpu(no_cmci_banks, cpu);
+    mb3 = per_cpu(mce_banks_owned, cpu);
+    mb4 = per_cpu(poll_bankmask, cpu);
+
+    mcabanks_free(mb1);
+    mcabanks_free(mb2);
+    mcabanks_free(mb3);
+    mcabanks_free(mb4);
+}
+
+static void cpu_mcabank_alloc(unsigned int cpu)
+{
+    struct mca_banks *mb1, *mb2, *mb3;
 
     mb1 = mcabanks_alloc();
     mb2 = mcabanks_alloc();
@@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
     if (!mb1 || !mb2 || !mb3)
         goto out;
 
-    __get_cpu_var(mce_clear_banks) = mb1;
-    __get_cpu_var(no_cmci_banks) = mb2;
-    __get_cpu_var(mce_banks_owned) = mb3;
+    per_cpu(mce_clear_banks, cpu) = mb1;
+    per_cpu(no_cmci_banks, cpu) = mb2;
+    per_cpu(mce_banks_owned, cpu) = mb3;
+    return;
 
-    return 0;
 out:
     mcabanks_free(mb1);
     mcabanks_free(mb2);
     mcabanks_free(mb3);
-    return -ENOMEM;
 }
 
 /* p4/p6 family have similar MCA initialization process */
 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
 {
-    if (intel_init_mca_banks())
+    if ( !this_cpu(mce_clear_banks) ||
+         !this_cpu(no_cmci_banks)   ||
+         !this_cpu(mce_banks_owned) )
          return mcheck_none;
 
     intel_init_mca(c);
@@ -1301,13 +1317,19 @@ static int cpu_callback(
 static int cpu_callback(
     struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
+    unsigned int cpu = (unsigned long)hcpu;
+
     switch ( action )
     {
+    case CPU_UP_PREPARE:
+        cpu_mcabank_alloc(cpu);
+        break;
     case CPU_DYING:
         cpu_mcheck_disable();
         break;
     case CPU_DEAD:
         cpu_mcheck_distribute_cmci();
+        cpu_mcabank_free(cpu);
         break;
     default:
         break;
@@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
 
 static int __init intel_mce_initcall(void)
 {
+    void *hcpu = (void *)(long)smp_processor_id();
+    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
     register_cpu_notifier(&cpu_nfb);
     return 0;
 }
diff -r 1a364b17d66a xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/setup.c	Mon Feb 28 19:19:20 2011 +0800
@@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
 
     arch_init_memory();
 
+    do_presmp_initcalls();
+
     identify_cpu(&boot_cpu_data);
     if ( cpu_has_fxsr )
         set_in_cr4(X86_CR4_OSFXSR);
@@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
     initialize_keytable();
 
     console_init_postirq();
-
-    do_presmp_initcalls();
 
     for_each_present_cpu ( i )
     {

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01  9:09 [PATCH] Fix cpu online/offline bug: mce memory leak Liu, Jinsong
@ 2011-03-01  9:22 ` Keir Fraser
  2011-03-01  9:27   ` Liu, Jinsong
  2011-03-01  9:30 ` Keir Fraser
  1 sibling, 1 reply; 11+ messages in thread
From: Keir Fraser @ 2011-03-01  9:22 UTC (permalink / raw)
  To: Liu, Jinsong, xen-devel@lists.xensource.com; +Cc: Haitao Shan

Brilliant! This appears to move allocations of the CPU bringup path and into
CPU_UP_PREPARE? If so, this should not only fix a memory leak but also
Haitao Shan's crash during CPU bringup. Cc'ing Haitao.

 -- Keir

On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:

> Fix cpu online/offline bug: mce memory leak.
> 
> Current Xen mce logic didn't free mcabanks. This would be a memory leak when
> cpu offline.
> When repeatly do cpu online/offline, this memory leak would make xenpool
> shrink, and at a time point,
> will call alloc_heap_pages --> flush_area_mask, which
> ASSERT(local_irq_is_enabled()).
> However, cpu online is irq disable, so it finally result in Xen crash.
> 
> This patch fix the memory leak bug, and tested OK over 110,000 round cpu
> online/offline.
> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> 
> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
> mca_error_handler);
>  }
>  
> -static int intel_init_mca_banks(void)
> +static void cpu_mcabank_free(unsigned int cpu)
>  {
> -    struct mca_banks *mb1, *mb2, * mb3;
> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
> +
> +    mb1 = per_cpu(mce_clear_banks, cpu);
> +    mb2 = per_cpu(no_cmci_banks, cpu);
> +    mb3 = per_cpu(mce_banks_owned, cpu);
> +    mb4 = per_cpu(poll_bankmask, cpu);
> +
> +    mcabanks_free(mb1);
> +    mcabanks_free(mb2);
> +    mcabanks_free(mb3);
> +    mcabanks_free(mb4);
> +}
> +
> +static void cpu_mcabank_alloc(unsigned int cpu)
> +{
> +    struct mca_banks *mb1, *mb2, *mb3;
>  
>      mb1 = mcabanks_alloc();
>      mb2 = mcabanks_alloc();
> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>      if (!mb1 || !mb2 || !mb3)
>          goto out;
>  
> -    __get_cpu_var(mce_clear_banks) = mb1;
> -    __get_cpu_var(no_cmci_banks) = mb2;
> -    __get_cpu_var(mce_banks_owned) = mb3;
> +    per_cpu(mce_clear_banks, cpu) = mb1;
> +    per_cpu(no_cmci_banks, cpu) = mb2;
> +    per_cpu(mce_banks_owned, cpu) = mb3;
> +    return;
>  
> -    return 0;
>  out:
>      mcabanks_free(mb1);
>      mcabanks_free(mb2);
>      mcabanks_free(mb3);
> -    return -ENOMEM;
>  }
>  
>  /* p4/p6 family have similar MCA initialization process */
>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
>  {
> -    if (intel_init_mca_banks())
> +    if ( !this_cpu(mce_clear_banks) ||
> +         !this_cpu(no_cmci_banks)   ||
> +         !this_cpu(mce_banks_owned) )
>           return mcheck_none;
>  
>      intel_init_mca(c);
> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>  static int cpu_callback(
>      struct notifier_block *nfb, unsigned long action, void *hcpu)
>  {
> +    unsigned int cpu = (unsigned long)hcpu;
> +
>      switch ( action )
>      {
> +    case CPU_UP_PREPARE:
> +        cpu_mcabank_alloc(cpu);
> +        break;
>      case CPU_DYING:
>          cpu_mcheck_disable();
>          break;
>      case CPU_DEAD:
>          cpu_mcheck_distribute_cmci();
> +        cpu_mcabank_free(cpu);
>          break;
>      default:
>          break;
> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>  
>  static int __init intel_mce_initcall(void)
>  {
> +    void *hcpu = (void *)(long)smp_processor_id();
> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>      register_cpu_notifier(&cpu_nfb);
>      return 0;
>  }
> diff -r 1a364b17d66a xen/arch/x86/setup.c
> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>  
>      arch_init_memory();
>  
> +    do_presmp_initcalls();
> +
>      identify_cpu(&boot_cpu_data);
>      if ( cpu_has_fxsr )
>          set_in_cr4(X86_CR4_OSFXSR);
> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
>      initialize_keytable();
>  
>      console_init_postirq();
> -
> -    do_presmp_initcalls();
>  
>      for_each_present_cpu ( i )
>      {

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01  9:22 ` Keir Fraser
@ 2011-03-01  9:27   ` Liu, Jinsong
  0 siblings, 0 replies; 11+ messages in thread
From: Liu, Jinsong @ 2011-03-01  9:27 UTC (permalink / raw)
  To: Keir Fraser, xen-devel@lists.xensource.com; +Cc: Haitao Shan

Yes :)

Keir Fraser wrote:
> Brilliant! This appears to move allocations of the CPU bringup path
> and into CPU_UP_PREPARE? If so, this should not only fix a memory
> leak but also Haitao Shan's crash during CPU bringup. Cc'ing Haitao.
> 
>  -- Keir
> 
> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> 
>> Fix cpu online/offline bug: mce memory leak.
>> 
>> Current Xen mce logic didn't free mcabanks. This would be a memory
>> leak when cpu offline. When repeatly do cpu online/offline, this
>> memory leak would make xenpool shrink, and at a time point, will
>> call alloc_heap_pages --> flush_area_mask, which
>> ASSERT(local_irq_is_enabled()). 
>> However, cpu online is irq disable, so it finally result in Xen
>> crash. 
>> 
>> This patch fix the memory leak bug, and tested OK over 110,000 round
>> cpu online/offline. 
>> 
>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>> 
>> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011
>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20
>> 2011 +0800 @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
>>  mca_error_handler); }
>> 
>> -static int intel_init_mca_banks(void)
>> +static void cpu_mcabank_free(unsigned int cpu)
>>  {
>> -    struct mca_banks *mb1, *mb2, * mb3;
>> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
>> +
>> +    mb1 = per_cpu(mce_clear_banks, cpu);
>> +    mb2 = per_cpu(no_cmci_banks, cpu);
>> +    mb3 = per_cpu(mce_banks_owned, cpu);
>> +    mb4 = per_cpu(poll_bankmask, cpu);
>> +
>> +    mcabanks_free(mb1);
>> +    mcabanks_free(mb2);
>> +    mcabanks_free(mb3);
>> +    mcabanks_free(mb4);
>> +}
>> +
>> +static void cpu_mcabank_alloc(unsigned int cpu)
>> +{
>> +    struct mca_banks *mb1, *mb2, *mb3;
>> 
>>      mb1 = mcabanks_alloc();
>>      mb2 = mcabanks_alloc();
>> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>>      if (!mb1 || !mb2 || !mb3)
>>          goto out;
>> 
>> -    __get_cpu_var(mce_clear_banks) = mb1;
>> -    __get_cpu_var(no_cmci_banks) = mb2;
>> -    __get_cpu_var(mce_banks_owned) = mb3;
>> +    per_cpu(mce_clear_banks, cpu) = mb1;
>> +    per_cpu(no_cmci_banks, cpu) = mb2;
>> +    per_cpu(mce_banks_owned, cpu) = mb3;
>> +    return;
>> 
>> -    return 0;
>>  out:
>>      mcabanks_free(mb1);
>>      mcabanks_free(mb2);
>>      mcabanks_free(mb3);
>> -    return -ENOMEM;
>>  }
>> 
>>  /* p4/p6 family have similar MCA initialization process */
>>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)  {
>> -    if (intel_init_mca_banks())
>> +    if ( !this_cpu(mce_clear_banks) ||
>> +         !this_cpu(no_cmci_banks)   ||
>> +         !this_cpu(mce_banks_owned) )
>>           return mcheck_none;
>> 
>>      intel_init_mca(c);
>> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>>  static int cpu_callback(
>>      struct notifier_block *nfb, unsigned long action, void *hcpu)  {
>> +    unsigned int cpu = (unsigned long)hcpu;
>> +
>>      switch ( action )
>>      {
>> +    case CPU_UP_PREPARE:
>> +        cpu_mcabank_alloc(cpu);
>> +        break;
>>      case CPU_DYING:
>>          cpu_mcheck_disable();
>>          break;
>>      case CPU_DEAD:
>>          cpu_mcheck_distribute_cmci();
>> +        cpu_mcabank_free(cpu);
>>          break;
>>      default:
>>          break;
>> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>> 
>>  static int __init intel_mce_initcall(void)
>>  {
>> +    void *hcpu = (void *)(long)smp_processor_id();
>> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>>      register_cpu_notifier(&cpu_nfb);
>>      return 0;
>>  }
>> diff -r 1a364b17d66a xen/arch/x86/setup.c
>> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
>> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
>> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>> 
>>      arch_init_memory();
>> 
>> +    do_presmp_initcalls();
>> +
>>      identify_cpu(&boot_cpu_data);
>>      if ( cpu_has_fxsr )
>>          set_in_cr4(X86_CR4_OSFXSR);
>> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb     
>> initialize_keytable(); 
>> 
>>      console_init_postirq();
>> -
>> -    do_presmp_initcalls();
>> 
>>      for_each_present_cpu ( i )
>>      {

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01  9:09 [PATCH] Fix cpu online/offline bug: mce memory leak Liu, Jinsong
  2011-03-01  9:22 ` Keir Fraser
@ 2011-03-01  9:30 ` Keir Fraser
  2011-03-01 10:12   ` Haitao Shan
  2011-03-01 10:47   ` Keir Fraser
  1 sibling, 2 replies; 11+ messages in thread
From: Keir Fraser @ 2011-03-01  9:30 UTC (permalink / raw)
  To: Liu, Jinsong, xen-devel@lists.xensource.com; +Cc: Haitao Shan

Some comments inline below. Overall a good patch to have, but needs some
minor adjustments!

 -- Keir

On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:

> Fix cpu online/offline bug: mce memory leak.
> 
> Current Xen mce logic didn't free mcabanks. This would be a memory leak when
> cpu offline.
> When repeatly do cpu online/offline, this memory leak would make xenpool
> shrink, and at a time point,
> will call alloc_heap_pages --> flush_area_mask, which
> ASSERT(local_irq_is_enabled()).
> However, cpu online is irq disable, so it finally result in Xen crash.
> 
> This patch fix the memory leak bug, and tested OK over 110,000 round cpu
> online/offline.
> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> 
> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
> mca_error_handler);
>  }
>  
> -static int intel_init_mca_banks(void)
> +static void cpu_mcabank_free(unsigned int cpu)
>  {
> -    struct mca_banks *mb1, *mb2, * mb3;
> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
> +
> +    mb1 = per_cpu(mce_clear_banks, cpu);
> +    mb2 = per_cpu(no_cmci_banks, cpu);
> +    mb3 = per_cpu(mce_banks_owned, cpu);
> +    mb4 = per_cpu(poll_bankmask, cpu);
> +
> +    mcabanks_free(mb1);
> +    mcabanks_free(mb2);
> +    mcabanks_free(mb3);
> +    mcabanks_free(mb4);
> +}
> +
> +static void cpu_mcabank_alloc(unsigned int cpu)
> +{
> +    struct mca_banks *mb1, *mb2, *mb3;
>  
>      mb1 = mcabanks_alloc();
>      mb2 = mcabanks_alloc();
> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>      if (!mb1 || !mb2 || !mb3)
>          goto out;
>  
> -    __get_cpu_var(mce_clear_banks) = mb1;
> -    __get_cpu_var(no_cmci_banks) = mb2;
> -    __get_cpu_var(mce_banks_owned) = mb3;
> +    per_cpu(mce_clear_banks, cpu) = mb1;
> +    per_cpu(no_cmci_banks, cpu) = mb2;
> +    per_cpu(mce_banks_owned, cpu) = mb3;
> +    return;
>  
> -    return 0;
>  out:
>      mcabanks_free(mb1);
>      mcabanks_free(mb2);
>      mcabanks_free(mb3);
> -    return -ENOMEM;
>  }
>  
>  /* p4/p6 family have similar MCA initialization process */
>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
>  {
> -    if (intel_init_mca_banks())
> +    if ( !this_cpu(mce_clear_banks) ||
> +         !this_cpu(no_cmci_banks)   ||
> +         !this_cpu(mce_banks_owned) )
>           return mcheck_none;
>  
>      intel_init_mca(c);
> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>  static int cpu_callback(
>      struct notifier_block *nfb, unsigned long action, void *hcpu)
>  {
> +    unsigned int cpu = (unsigned long)hcpu;
> +
>      switch ( action )
>      {
> +    case CPU_UP_PREPARE:
> +        cpu_mcabank_alloc(cpu);
> +        break;
>      case CPU_DYING:
>          cpu_mcheck_disable();
>          break;
>      case CPU_DEAD:
>          cpu_mcheck_distribute_cmci();
> +        cpu_mcabank_free(cpu);
>          break;

You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else there's a
memory leak on failed CPU bringup.

>      default:
>          break;
> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>  
>  static int __init intel_mce_initcall(void)
>  {
> +    void *hcpu = (void *)(long)smp_processor_id();
> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>      register_cpu_notifier(&cpu_nfb);
>      return 0;
>  }
> diff -r 1a364b17d66a xen/arch/x86/setup.c
> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>  
>      arch_init_memory();
>  
> +    do_presmp_initcalls();
> +
>      identify_cpu(&boot_cpu_data);
>      if ( cpu_has_fxsr )
>          set_in_cr4(X86_CR4_OSFXSR);
> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
>      initialize_keytable();
>  
>      console_init_postirq();
> -
> -    do_presmp_initcalls();

This looks risky, especially so close to 4.1 release. Instead of moving
do_presmp_initcalls(), please special-case cpu_mcabank_alloc() for CPU0 in
intel_mcheck_init(), and remove your direct call to
cpu_callback(...CPU_UP_PREPARE...) in intel_mce_initcall().

>      for_each_present_cpu ( i )
>      {

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01  9:30 ` Keir Fraser
@ 2011-03-01 10:12   ` Haitao Shan
  2011-03-01 10:19     ` Keir Fraser
  2011-03-01 10:47   ` Keir Fraser
  1 sibling, 1 reply; 11+ messages in thread
From: Haitao Shan @ 2011-03-01 10:12 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Liu, Jinsong, xen-devel@lists.xensource.com


[-- Attachment #1.1: Type: text/plain, Size: 5900 bytes --]

Hi, Keir,

Fixing memory leak would be always good. But I don't agree this could fix
the problem I observed.
Yes, indeed, I observed the issue during stress test of cpu offline/online
by offlining all cpus except CPU0 one by one and then bringing them back. In
this case, with memory leak, after some time, xmalloc at onlining could hit
a heap page that is formerly owned by domains, since usable Xen heap memory
will be slowly used up. Without memory leak, xmalloc at onlining will be
likely use the memory that is freed just by offlining. So it won't trigger
this assertion.

But let us think a typical usage model, you will offline all LPs on a socket
so that it can be later removed physically. Some other time, you will bring
them back. Between offlining and onlining, there could be a time interval as
long as one week and activities such as creating and killing many guests.
How can we ensure that we won't meet this assertion at that time?

It is my understanding that this memory leak triggered the issue I raised
but the issue itself can be triggered in many other ways. Please do correct
me if I am wrong. Thanks!

Shan Haitao

2011/3/1 Keir Fraser <keir.xen@gmail.com>

> Some comments inline below. Overall a good patch to have, but needs some
> minor adjustments!
>
>  -- Keir
>
> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>
> > Fix cpu online/offline bug: mce memory leak.
> >
> > Current Xen mce logic didn't free mcabanks. This would be a memory leak
> when
> > cpu offline.
> > When repeatly do cpu online/offline, this memory leak would make xenpool
> > shrink, and at a time point,
> > will call alloc_heap_pages --> flush_area_mask, which
> > ASSERT(local_irq_is_enabled()).
> > However, cpu online is irq disable, so it finally result in Xen crash.
> >
> > This patch fix the memory leak bug, and tested OK over 110,000 round cpu
> > online/offline.
> >
> > Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> >
> > diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
> > --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
> > +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
> > @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
> >      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
> > mca_error_handler);
> >  }
> >
> > -static int intel_init_mca_banks(void)
> > +static void cpu_mcabank_free(unsigned int cpu)
> >  {
> > -    struct mca_banks *mb1, *mb2, * mb3;
> > +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
> > +
> > +    mb1 = per_cpu(mce_clear_banks, cpu);
> > +    mb2 = per_cpu(no_cmci_banks, cpu);
> > +    mb3 = per_cpu(mce_banks_owned, cpu);
> > +    mb4 = per_cpu(poll_bankmask, cpu);
> > +
> > +    mcabanks_free(mb1);
> > +    mcabanks_free(mb2);
> > +    mcabanks_free(mb3);
> > +    mcabanks_free(mb4);
> > +}
> > +
> > +static void cpu_mcabank_alloc(unsigned int cpu)
> > +{
> > +    struct mca_banks *mb1, *mb2, *mb3;
> >
> >      mb1 = mcabanks_alloc();
> >      mb2 = mcabanks_alloc();
> > @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
> >      if (!mb1 || !mb2 || !mb3)
> >          goto out;
> >
> > -    __get_cpu_var(mce_clear_banks) = mb1;
> > -    __get_cpu_var(no_cmci_banks) = mb2;
> > -    __get_cpu_var(mce_banks_owned) = mb3;
> > +    per_cpu(mce_clear_banks, cpu) = mb1;
> > +    per_cpu(no_cmci_banks, cpu) = mb2;
> > +    per_cpu(mce_banks_owned, cpu) = mb3;
> > +    return;
> >
> > -    return 0;
> >  out:
> >      mcabanks_free(mb1);
> >      mcabanks_free(mb2);
> >      mcabanks_free(mb3);
> > -    return -ENOMEM;
> >  }
> >
> >  /* p4/p6 family have similar MCA initialization process */
> >  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
> >  {
> > -    if (intel_init_mca_banks())
> > +    if ( !this_cpu(mce_clear_banks) ||
> > +         !this_cpu(no_cmci_banks)   ||
> > +         !this_cpu(mce_banks_owned) )
> >           return mcheck_none;
> >
> >      intel_init_mca(c);
> > @@ -1301,13 +1317,19 @@ static int cpu_callback(
> >  static int cpu_callback(
> >      struct notifier_block *nfb, unsigned long action, void *hcpu)
> >  {
> > +    unsigned int cpu = (unsigned long)hcpu;
> > +
> >      switch ( action )
> >      {
> > +    case CPU_UP_PREPARE:
> > +        cpu_mcabank_alloc(cpu);
> > +        break;
> >      case CPU_DYING:
> >          cpu_mcheck_disable();
> >          break;
> >      case CPU_DEAD:
> >          cpu_mcheck_distribute_cmci();
> > +        cpu_mcabank_free(cpu);
> >          break;
>
> You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else there's a
> memory leak on failed CPU bringup.
>
> >      default:
> >          break;
> > @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
> >
> >  static int __init intel_mce_initcall(void)
> >  {
> > +    void *hcpu = (void *)(long)smp_processor_id();
> > +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
> >      register_cpu_notifier(&cpu_nfb);
> >      return 0;
> >  }
> > diff -r 1a364b17d66a xen/arch/x86/setup.c
> > --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
> > +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
> > @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
> >
> >      arch_init_memory();
> >
> > +    do_presmp_initcalls();
> > +
> >      identify_cpu(&boot_cpu_data);
> >      if ( cpu_has_fxsr )
> >          set_in_cr4(X86_CR4_OSFXSR);
> > @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
> >      initialize_keytable();
> >
> >      console_init_postirq();
> > -
> > -    do_presmp_initcalls();
>
> This looks risky, especially so close to 4.1 release. Instead of moving
> do_presmp_initcalls(), please special-case cpu_mcabank_alloc() for CPU0 in
> intel_mcheck_init(), and remove your direct call to
> cpu_callback(...CPU_UP_PREPARE...) in intel_mce_initcall().
>
> >      for_each_present_cpu ( i )
> >      {
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 7310 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01 10:12   ` Haitao Shan
@ 2011-03-01 10:19     ` Keir Fraser
  2011-03-01 10:21       ` Haitao Shan
  0 siblings, 1 reply; 11+ messages in thread
From: Keir Fraser @ 2011-03-01 10:19 UTC (permalink / raw)
  To: Haitao Shan; +Cc: Liu, Jinsong, xen-devel@lists.xensource.com

On 01/03/2011 10:12, "Haitao Shan" <maillists.shan@gmail.com> wrote:

> Hi, Keir,
> 
> Fixing memory leak would be always good. But I don't agree this could fix the
> problem I observed. 
> Yes, indeed, I observed the issue during stress test of cpu offline/online by
> offlining all cpus except CPU0 one by one and then bringing them back. In this
> case, with memory leak, after some time, xmalloc at onlining could hit a heap
> page that is formerly owned by domains, since usable Xen heap memory will be
> slowly used up. Without memory leak, xmalloc at onlining will be likely use
> the memory that is freed just by offlining. So it won't trigger this
> assertion.
> 
> But let us think a typical usage model, you will offline all LPs on a socket
> so that it can be later removed physically. Some other time, you will bring
> them back. Between offlining and onlining, there could be a time interval as
> long as one week and activities such as creating and killing many guests. How
> can we ensure that we won't meet this assertion at that time?
> 
> It is my understanding that this memory leak triggered the issue I raised but
> the issue itself can be triggered in many other ways. Please do correct me if
> I am wrong. Thanks!

The point is that the patch also moves the xmalloc() calls out of the new
CPU's bringup path, and into CPU_UP_PREPARE context. This means the
allocations will occur on a different CPU, before the new CPU begins
execution, and with irqs enabled. That's why it should fix your crash.

 -- Keir

> Shan Haitao
> 
> 2011/3/1 Keir Fraser <keir.xen@gmail.com>
>> Some comments inline below. Overall a good patch to have, but needs some
>> minor adjustments!
>> 
>>  -- Keir
>> 
>> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>> 
>>> Fix cpu online/offline bug: mce memory leak.
>>> 
>>> Current Xen mce logic didn't free mcabanks. This would be a memory leak when
>>> cpu offline.
>>> When repeatly do cpu online/offline, this memory leak would make xenpool
>>> shrink, and at a time point,
>>> will call alloc_heap_pages --> flush_area_mask, which
>>> ASSERT(local_irq_is_enabled()).
>>> However, cpu online is irq disable, so it finally result in Xen crash.
>>> 
>>> This patch fix the memory leak bug, and tested OK over 110,000 round cpu
>>> online/offline.
>>> 
>>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>>> 
>>> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
>>> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
>>> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>>>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
>>> mca_error_handler);
>>>  }
>>> 
>>> -static int intel_init_mca_banks(void)
>>> +static void cpu_mcabank_free(unsigned int cpu)
>>>  {
>>> -    struct mca_banks *mb1, *mb2, * mb3;
>>> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
>>> +
>>> +    mb1 = per_cpu(mce_clear_banks, cpu);
>>> +    mb2 = per_cpu(no_cmci_banks, cpu);
>>> +    mb3 = per_cpu(mce_banks_owned, cpu);
>>> +    mb4 = per_cpu(poll_bankmask, cpu);
>>> +
>>> +    mcabanks_free(mb1);
>>> +    mcabanks_free(mb2);
>>> +    mcabanks_free(mb3);
>>> +    mcabanks_free(mb4);
>>> +}
>>> +
>>> +static void cpu_mcabank_alloc(unsigned int cpu)
>>> +{
>>> +    struct mca_banks *mb1, *mb2, *mb3;
>>> 
>>>      mb1 = mcabanks_alloc();
>>>      mb2 = mcabanks_alloc();
>>> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>>>      if (!mb1 || !mb2 || !mb3)
>>>          goto out;
>>> 
>>> -    __get_cpu_var(mce_clear_banks) = mb1;
>>> -    __get_cpu_var(no_cmci_banks) = mb2;
>>> -    __get_cpu_var(mce_banks_owned) = mb3;
>>> +    per_cpu(mce_clear_banks, cpu) = mb1;
>>> +    per_cpu(no_cmci_banks, cpu) = mb2;
>>> +    per_cpu(mce_banks_owned, cpu) = mb3;
>>> +    return;
>>> 
>>> -    return 0;
>>>  out:
>>>      mcabanks_free(mb1);
>>>      mcabanks_free(mb2);
>>>      mcabanks_free(mb3);
>>> -    return -ENOMEM;
>>>  }
>>> 
>>>  /* p4/p6 family have similar MCA initialization process */
>>>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
>>>  {
>>> -    if (intel_init_mca_banks())
>>> +    if ( !this_cpu(mce_clear_banks) ||
>>> +         !this_cpu(no_cmci_banks)   ||
>>> +         !this_cpu(mce_banks_owned) )
>>>           return mcheck_none;
>>> 
>>>      intel_init_mca(c);
>>> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>>>  static int cpu_callback(
>>>      struct notifier_block *nfb, unsigned long action, void *hcpu)
>>>  {
>>> +    unsigned int cpu = (unsigned long)hcpu;
>>> +
>>>      switch ( action )
>>>      {
>>> +    case CPU_UP_PREPARE:
>>> +        cpu_mcabank_alloc(cpu);
>>> +        break;
>>>      case CPU_DYING:
>>>          cpu_mcheck_disable();
>>>          break;
>>>      case CPU_DEAD:
>>>          cpu_mcheck_distribute_cmci();
>>> +        cpu_mcabank_free(cpu);
>>>          break;
>> 
>> You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else there's a
>> memory leak on failed CPU bringup.
>> 
>>>      default:
>>>          break;
>>> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>>> 
>>>  static int __init intel_mce_initcall(void)
>>>  {
>>> +    void *hcpu = (void *)(long)smp_processor_id();
>>> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>>>      register_cpu_notifier(&cpu_nfb);
>>>      return 0;
>>>  }
>>> diff -r 1a364b17d66a xen/arch/x86/setup.c
>>> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
>>> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
>>> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>>> 
>>>      arch_init_memory();
>>> 
>>> +    do_presmp_initcalls();
>>> +
>>>      identify_cpu(&boot_cpu_data);
>>>      if ( cpu_has_fxsr )
>>>          set_in_cr4(X86_CR4_OSFXSR);
>>> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
>>>      initialize_keytable();
>>> 
>>>      console_init_postirq();
>>> -
>>> -    do_presmp_initcalls();
>> 
>> This looks risky, especially so close to 4.1 release. Instead of moving
>> do_presmp_initcalls(), please special-case cpu_mcabank_alloc() for CPU0 in
>> intel_mcheck_init(), and remove your direct call to
>> cpu_callback(...CPU_UP_PREPARE...) in intel_mce_initcall().
>> 
>>>      for_each_present_cpu ( i )
>>>      {
>> 
>> 
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01 10:19     ` Keir Fraser
@ 2011-03-01 10:21       ` Haitao Shan
  2011-03-01 10:25         ` Keir Fraser
  2011-03-01 10:26         ` Liu, Jinsong
  0 siblings, 2 replies; 11+ messages in thread
From: Haitao Shan @ 2011-03-01 10:21 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Liu, Jinsong, xen-devel@lists.xensource.com


[-- Attachment #1.1: Type: text/plain, Size: 6900 bytes --]

Ah, yes. I was misled by the patch name itself. :)
Smart fix, isn't it! I guess the issue is closed. Thanks, Jinsong.

Shan Haitao

2011/3/1 Keir Fraser <keir.xen@gmail.com>

> On 01/03/2011 10:12, "Haitao Shan" <maillists.shan@gmail.com> wrote:
>
> > Hi, Keir,
> >
> > Fixing memory leak would be always good. But I don't agree this could fix
> the
> > problem I observed.
> > Yes, indeed, I observed the issue during stress test of cpu
> offline/online by
> > offlining all cpus except CPU0 one by one and then bringing them back. In
> this
> > case, with memory leak, after some time, xmalloc at onlining could hit a
> heap
> > page that is formerly owned by domains, since usable Xen heap memory will
> be
> > slowly used up. Without memory leak, xmalloc at onlining will be likely
> use
> > the memory that is freed just by offlining. So it won't trigger this
> > assertion.
> >
> > But let us think a typical usage model, you will offline all LPs on a
> socket
> > so that it can be later removed physically. Some other time, you will
> bring
> > them back. Between offlining and onlining, there could be a time interval
> as
> > long as one week and activities such as creating and killing many guests.
> How
> > can we ensure that we won't meet this assertion at that time?
> >
> > It is my understanding that this memory leak triggered the issue I raised
> but
> > the issue itself can be triggered in many other ways. Please do correct
> me if
> > I am wrong. Thanks!
>
> The point is that the patch also moves the xmalloc() calls out of the new
> CPU's bringup path, and into CPU_UP_PREPARE context. This means the
> allocations will occur on a different CPU, before the new CPU begins
> execution, and with irqs enabled. That's why it should fix your crash.
>
>  -- Keir
>
> > Shan Haitao
> >
> > 2011/3/1 Keir Fraser <keir.xen@gmail.com>
> >> Some comments inline below. Overall a good patch to have, but needs some
> >> minor adjustments!
> >>
> >>  -- Keir
> >>
> >> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> >>
> >>> Fix cpu online/offline bug: mce memory leak.
> >>>
> >>> Current Xen mce logic didn't free mcabanks. This would be a memory leak
> when
> >>> cpu offline.
> >>> When repeatly do cpu online/offline, this memory leak would make
> xenpool
> >>> shrink, and at a time point,
> >>> will call alloc_heap_pages --> flush_area_mask, which
> >>> ASSERT(local_irq_is_enabled()).
> >>> However, cpu online is irq disable, so it finally result in Xen crash.
> >>>
> >>> This patch fix the memory leak bug, and tested OK over 110,000 round
> cpu
> >>> online/offline.
> >>>
> >>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> >>>
> >>> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
> >>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011
> +0800
> >>> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011
> +0800
> >>> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
> >>>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
> >>> mca_error_handler);
> >>>  }
> >>>
> >>> -static int intel_init_mca_banks(void)
> >>> +static void cpu_mcabank_free(unsigned int cpu)
> >>>  {
> >>> -    struct mca_banks *mb1, *mb2, * mb3;
> >>> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
> >>> +
> >>> +    mb1 = per_cpu(mce_clear_banks, cpu);
> >>> +    mb2 = per_cpu(no_cmci_banks, cpu);
> >>> +    mb3 = per_cpu(mce_banks_owned, cpu);
> >>> +    mb4 = per_cpu(poll_bankmask, cpu);
> >>> +
> >>> +    mcabanks_free(mb1);
> >>> +    mcabanks_free(mb2);
> >>> +    mcabanks_free(mb3);
> >>> +    mcabanks_free(mb4);
> >>> +}
> >>> +
> >>> +static void cpu_mcabank_alloc(unsigned int cpu)
> >>> +{
> >>> +    struct mca_banks *mb1, *mb2, *mb3;
> >>>
> >>>      mb1 = mcabanks_alloc();
> >>>      mb2 = mcabanks_alloc();
> >>> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
> >>>      if (!mb1 || !mb2 || !mb3)
> >>>          goto out;
> >>>
> >>> -    __get_cpu_var(mce_clear_banks) = mb1;
> >>> -    __get_cpu_var(no_cmci_banks) = mb2;
> >>> -    __get_cpu_var(mce_banks_owned) = mb3;
> >>> +    per_cpu(mce_clear_banks, cpu) = mb1;
> >>> +    per_cpu(no_cmci_banks, cpu) = mb2;
> >>> +    per_cpu(mce_banks_owned, cpu) = mb3;
> >>> +    return;
> >>>
> >>> -    return 0;
> >>>  out:
> >>>      mcabanks_free(mb1);
> >>>      mcabanks_free(mb2);
> >>>      mcabanks_free(mb3);
> >>> -    return -ENOMEM;
> >>>  }
> >>>
> >>>  /* p4/p6 family have similar MCA initialization process */
> >>>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
> >>>  {
> >>> -    if (intel_init_mca_banks())
> >>> +    if ( !this_cpu(mce_clear_banks) ||
> >>> +         !this_cpu(no_cmci_banks)   ||
> >>> +         !this_cpu(mce_banks_owned) )
> >>>           return mcheck_none;
> >>>
> >>>      intel_init_mca(c);
> >>> @@ -1301,13 +1317,19 @@ static int cpu_callback(
> >>>  static int cpu_callback(
> >>>      struct notifier_block *nfb, unsigned long action, void *hcpu)
> >>>  {
> >>> +    unsigned int cpu = (unsigned long)hcpu;
> >>> +
> >>>      switch ( action )
> >>>      {
> >>> +    case CPU_UP_PREPARE:
> >>> +        cpu_mcabank_alloc(cpu);
> >>> +        break;
> >>>      case CPU_DYING:
> >>>          cpu_mcheck_disable();
> >>>          break;
> >>>      case CPU_DEAD:
> >>>          cpu_mcheck_distribute_cmci();
> >>> +        cpu_mcabank_free(cpu);
> >>>          break;
> >>
> >> You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else there's
> a
> >> memory leak on failed CPU bringup.
> >>
> >>>      default:
> >>>          break;
> >>> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
> >>>
> >>>  static int __init intel_mce_initcall(void)
> >>>  {
> >>> +    void *hcpu = (void *)(long)smp_processor_id();
> >>> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
> >>>      register_cpu_notifier(&cpu_nfb);
> >>>      return 0;
> >>>  }
> >>> diff -r 1a364b17d66a xen/arch/x86/setup.c
> >>> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
> >>> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
> >>> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
> >>>
> >>>      arch_init_memory();
> >>>
> >>> +    do_presmp_initcalls();
> >>> +
> >>>      identify_cpu(&boot_cpu_data);
> >>>      if ( cpu_has_fxsr )
> >>>          set_in_cr4(X86_CR4_OSFXSR);
> >>> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
> >>>      initialize_keytable();
> >>>
> >>>      console_init_postirq();
> >>> -
> >>> -    do_presmp_initcalls();
> >>
> >> This looks risky, especially so close to 4.1 release. Instead of moving
> >> do_presmp_initcalls(), please special-case cpu_mcabank_alloc() for CPU0
> in
> >> intel_mcheck_init(), and remove your direct call to
> >> cpu_callback(...CPU_UP_PREPARE...) in intel_mce_initcall().
> >>
> >>>      for_each_present_cpu ( i )
> >>>      {
> >>
> >>
> >
> >
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 9367 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01 10:21       ` Haitao Shan
@ 2011-03-01 10:25         ` Keir Fraser
  2011-03-01 10:26         ` Liu, Jinsong
  1 sibling, 0 replies; 11+ messages in thread
From: Keir Fraser @ 2011-03-01 10:25 UTC (permalink / raw)
  To: Haitao Shan; +Cc: Liu, Jinsong, xen-devel@lists.xensource.com

Well, it is the correct fix, certainly. The new notifier mechanism we
borrowed from Linux should enable us to remove all dynamic allocations from
a CPU's early bringup path. It's just a case of finding them all as some are
tucked away in cpu/platform-specific areas (like this one).

There are a couple of alterations for Jinsong to make, and then smoke test
the patch again. When he resubmits with the alterations I will check the
patch straight in for the next release candidate (RC7).

 K.

On 01/03/2011 10:21, "Haitao Shan" <maillists.shan@gmail.com> wrote:

> Ah, yes. I was misled by the patch name itself. :)
> Smart fix, isn't it! I guess the issue is closed. Thanks, Jinsong.
> 
> Shan Haitao
> 
> 2011/3/1 Keir Fraser <keir.xen@gmail.com>
>> On 01/03/2011 10:12, "Haitao Shan" <maillists.shan@gmail.com> wrote:
>> 
>>> Hi, Keir,
>>> 
>>> Fixing memory leak would be always good. But I don't agree this could fix
>>> the
>>> problem I observed. 
>>> Yes, indeed, I observed the issue during stress test of cpu offline/online
>>> by
>>> offlining all cpus except CPU0 one by one and then bringing them back. In
>>> this
>>> case, with memory leak, after some time, xmalloc at onlining could hit a
>>> heap
>>> page that is formerly owned by domains, since usable Xen heap memory will be
>>> slowly used up. Without memory leak, xmalloc at onlining will be likely use
>>> the memory that is freed just by offlining. So it won't trigger this
>>> assertion.
>>> 
>>> But let us think a typical usage model, you will offline all LPs on a socket
>>> so that it can be later removed physically. Some other time, you will bring
>>> them back. Between offlining and onlining, there could be a time interval as
>>> long as one week and activities such as creating and killing many guests.
>>> How
>>> can we ensure that we won't meet this assertion at that time?
>>> 
>>> It is my understanding that this memory leak triggered the issue I raised
>>> but
>>> the issue itself can be triggered in many other ways. Please do correct me
>>> if
>>> I am wrong. Thanks!
>> 
>> The point is that the patch also moves the xmalloc() calls out of the new
>> CPU's bringup path, and into CPU_UP_PREPARE context. This means the
>> allocations will occur on a different CPU, before the new CPU begins
>> execution, and with irqs enabled. That's why it should fix your crash.
>> 
>>  -- Keir
>> 
>>> Shan Haitao
>>> 
>>> 2011/3/1 Keir Fraser <keir.xen@gmail.com>
>>>> Some comments inline below. Overall a good patch to have, but needs some
>>>> minor adjustments!
>>>> 
>>>>  -- Keir
>>>> 
>>>> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>>>> 
>>>>> Fix cpu online/offline bug: mce memory leak.
>>>>> 
>>>>> Current Xen mce logic didn't free mcabanks. This would be a memory leak
>>>>> when
>>>>> cpu offline.
>>>>> When repeatly do cpu online/offline, this memory leak would make xenpool
>>>>> shrink, and at a time point,
>>>>> will call alloc_heap_pages --> flush_area_mask, which
>>>>> ASSERT(local_irq_is_enabled()).
>>>>> However, cpu online is irq disable, so it finally result in Xen crash.
>>>>> 
>>>>> This patch fix the memory leak bug, and tested OK over 110,000 round cpu
>>>>> online/offline.
>>>>> 
>>>>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>>>>> 
>>>>> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
>>>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
>>>>> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
>>>>> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>>>>>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
>>>>> mca_error_handler);
>>>>>  }
>>>>> 
>>>>> -static int intel_init_mca_banks(void)
>>>>> +static void cpu_mcabank_free(unsigned int cpu)
>>>>>  {
>>>>> -    struct mca_banks *mb1, *mb2, * mb3;
>>>>> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
>>>>> +
>>>>> +    mb1 = per_cpu(mce_clear_banks, cpu);
>>>>> +    mb2 = per_cpu(no_cmci_banks, cpu);
>>>>> +    mb3 = per_cpu(mce_banks_owned, cpu);
>>>>> +    mb4 = per_cpu(poll_bankmask, cpu);
>>>>> +
>>>>> +    mcabanks_free(mb1);
>>>>> +    mcabanks_free(mb2);
>>>>> +    mcabanks_free(mb3);
>>>>> +    mcabanks_free(mb4);
>>>>> +}
>>>>> +
>>>>> +static void cpu_mcabank_alloc(unsigned int cpu)
>>>>> +{
>>>>> +    struct mca_banks *mb1, *mb2, *mb3;
>>>>> 
>>>>>      mb1 = mcabanks_alloc();
>>>>>      mb2 = mcabanks_alloc();
>>>>> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>>>>>      if (!mb1 || !mb2 || !mb3)
>>>>>          goto out;
>>>>> 
>>>>> -    __get_cpu_var(mce_clear_banks) = mb1;
>>>>> -    __get_cpu_var(no_cmci_banks) = mb2;
>>>>> -    __get_cpu_var(mce_banks_owned) = mb3;
>>>>> +    per_cpu(mce_clear_banks, cpu) = mb1;
>>>>> +    per_cpu(no_cmci_banks, cpu) = mb2;
>>>>> +    per_cpu(mce_banks_owned, cpu) = mb3;
>>>>> +    return;
>>>>> 
>>>>> -    return 0;
>>>>>  out:
>>>>>      mcabanks_free(mb1);
>>>>>      mcabanks_free(mb2);
>>>>>      mcabanks_free(mb3);
>>>>> -    return -ENOMEM;
>>>>>  }
>>>>> 
>>>>>  /* p4/p6 family have similar MCA initialization process */
>>>>>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
>>>>>  {
>>>>> -    if (intel_init_mca_banks())
>>>>> +    if ( !this_cpu(mce_clear_banks) ||
>>>>> +         !this_cpu(no_cmci_banks)   ||
>>>>> +         !this_cpu(mce_banks_owned) )
>>>>>           return mcheck_none;
>>>>> 
>>>>>      intel_init_mca(c);
>>>>> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>>>>>  static int cpu_callback(
>>>>>      struct notifier_block *nfb, unsigned long action, void *hcpu)
>>>>>  {
>>>>> +    unsigned int cpu = (unsigned long)hcpu;
>>>>> +
>>>>>      switch ( action )
>>>>>      {
>>>>> +    case CPU_UP_PREPARE:
>>>>> +        cpu_mcabank_alloc(cpu);
>>>>> +        break;
>>>>>      case CPU_DYING:
>>>>>          cpu_mcheck_disable();
>>>>>          break;
>>>>>      case CPU_DEAD:
>>>>>          cpu_mcheck_distribute_cmci();
>>>>> +        cpu_mcabank_free(cpu);
>>>>>          break;
>>>> 
>>>> You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else there's a
>>>> memory leak on failed CPU bringup.
>>>> 
>>>>>      default:
>>>>>          break;
>>>>> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>>>>> 
>>>>>  static int __init intel_mce_initcall(void)
>>>>>  {
>>>>> +    void *hcpu = (void *)(long)smp_processor_id();
>>>>> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>>>>>      register_cpu_notifier(&cpu_nfb);
>>>>>      return 0;
>>>>>  }
>>>>> diff -r 1a364b17d66a xen/arch/x86/setup.c
>>>>> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
>>>>> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
>>>>> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>>>>> 
>>>>>      arch_init_memory();
>>>>> 
>>>>> +    do_presmp_initcalls();
>>>>> +
>>>>>      identify_cpu(&boot_cpu_data);
>>>>>      if ( cpu_has_fxsr )
>>>>>          set_in_cr4(X86_CR4_OSFXSR);
>>>>> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
>>>>>      initialize_keytable();
>>>>> 
>>>>>      console_init_postirq();
>>>>> -
>>>>> -    do_presmp_initcalls();
>>>> 
>>>> This looks risky, especially so close to 4.1 release. Instead of moving
>>>> do_presmp_initcalls(), please special-case cpu_mcabank_alloc() for CPU0 in
>>>> intel_mcheck_init(), and remove your direct call to
>>>> cpu_callback(...CPU_UP_PREPARE...) in intel_mce_initcall().
>>>> 
>>>>>      for_each_present_cpu ( i )
>>>>>      {
>>>> 
>>>> 
>>> 
>>> 
>> 
>> 
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01 10:21       ` Haitao Shan
  2011-03-01 10:25         ` Keir Fraser
@ 2011-03-01 10:26         ` Liu, Jinsong
  1 sibling, 0 replies; 11+ messages in thread
From: Liu, Jinsong @ 2011-03-01 10:26 UTC (permalink / raw)
  To: Haitao Shan, Keir Fraser; +Cc: xen-devel@lists.xensource.com


[-- Attachment #1.1: Type: text/plain, Size: 7214 bytes --]

In fact, your finding is the key point and base that make me continue trace.
So I need thank you :)

Jinsong

________________________________
From: Haitao Shan [mailto:maillists.shan@gmail.com]
Sent: Tuesday, March 01, 2011 6:21 PM
To: Keir Fraser
Cc: Liu, Jinsong; xen-devel@lists.xensource.com
Subject: Re: [PATCH] Fix cpu online/offline bug: mce memory leak.

Ah, yes. I was misled by the patch name itself. :)
Smart fix, isn't it! I guess the issue is closed. Thanks, Jinsong.

Shan Haitao

2011/3/1 Keir Fraser <keir.xen@gmail.com<mailto:keir.xen@gmail.com>>
On 01/03/2011 10:12, "Haitao Shan" <maillists.shan@gmail.com<mailto:maillists.shan@gmail.com>> wrote:

> Hi, Keir,
>
> Fixing memory leak would be always good. But I don't agree this could fix the
> problem I observed.
> Yes, indeed, I observed the issue during stress test of cpu offline/online by
> offlining all cpus except CPU0 one by one and then bringing them back. In this
> case, with memory leak, after some time, xmalloc at onlining could hit a heap
> page that is formerly owned by domains, since usable Xen heap memory will be
> slowly used up. Without memory leak, xmalloc at onlining will be likely use
> the memory that is freed just by offlining. So it won't trigger this
> assertion.
>
> But let us think a typical usage model, you will offline all LPs on a socket
> so that it can be later removed physically. Some other time, you will bring
> them back. Between offlining and onlining, there could be a time interval as
> long as one week and activities such as creating and killing many guests. How
> can we ensure that we won't meet this assertion at that time?
>
> It is my understanding that this memory leak triggered the issue I raised but
> the issue itself can be triggered in many other ways. Please do correct me if
> I am wrong. Thanks!

The point is that the patch also moves the xmalloc() calls out of the new
CPU's bringup path, and into CPU_UP_PREPARE context. This means the
allocations will occur on a different CPU, before the new CPU begins
execution, and with irqs enabled. That's why it should fix your crash.

 -- Keir

> Shan Haitao
>
> 2011/3/1 Keir Fraser <keir.xen@gmail.com<mailto:keir.xen@gmail.com>>
>> Some comments inline below. Overall a good patch to have, but needs some
>> minor adjustments!
>>
>>  -- Keir
>>
>> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com<mailto:jinsong.liu@intel.com>> wrote:
>>
>>> Fix cpu online/offline bug: mce memory leak.
>>>
>>> Current Xen mce logic didn't free mcabanks. This would be a memory leak when
>>> cpu offline.
>>> When repeatly do cpu online/offline, this memory leak would make xenpool
>>> shrink, and at a time point,
>>> will call alloc_heap_pages --> flush_area_mask, which
>>> ASSERT(local_irq_is_enabled()).
>>> However, cpu online is irq disable, so it finally result in Xen crash.
>>>
>>> This patch fix the memory leak bug, and tested OK over 110,000 round cpu
>>> online/offline.
>>>
>>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com<mailto:jinsong.liu@intel.com>>
>>>
>>> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
>>> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
>>> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>>>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
>>> mca_error_handler);
>>>  }
>>>
>>> -static int intel_init_mca_banks(void)
>>> +static void cpu_mcabank_free(unsigned int cpu)
>>>  {
>>> -    struct mca_banks *mb1, *mb2, * mb3;
>>> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
>>> +
>>> +    mb1 = per_cpu(mce_clear_banks, cpu);
>>> +    mb2 = per_cpu(no_cmci_banks, cpu);
>>> +    mb3 = per_cpu(mce_banks_owned, cpu);
>>> +    mb4 = per_cpu(poll_bankmask, cpu);
>>> +
>>> +    mcabanks_free(mb1);
>>> +    mcabanks_free(mb2);
>>> +    mcabanks_free(mb3);
>>> +    mcabanks_free(mb4);
>>> +}
>>> +
>>> +static void cpu_mcabank_alloc(unsigned int cpu)
>>> +{
>>> +    struct mca_banks *mb1, *mb2, *mb3;
>>>
>>>      mb1 = mcabanks_alloc();
>>>      mb2 = mcabanks_alloc();
>>> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>>>      if (!mb1 || !mb2 || !mb3)
>>>          goto out;
>>>
>>> -    __get_cpu_var(mce_clear_banks) = mb1;
>>> -    __get_cpu_var(no_cmci_banks) = mb2;
>>> -    __get_cpu_var(mce_banks_owned) = mb3;
>>> +    per_cpu(mce_clear_banks, cpu) = mb1;
>>> +    per_cpu(no_cmci_banks, cpu) = mb2;
>>> +    per_cpu(mce_banks_owned, cpu) = mb3;
>>> +    return;
>>>
>>> -    return 0;
>>>  out:
>>>      mcabanks_free(mb1);
>>>      mcabanks_free(mb2);
>>>      mcabanks_free(mb3);
>>> -    return -ENOMEM;
>>>  }
>>>
>>>  /* p4/p6 family have similar MCA initialization process */
>>>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
>>>  {
>>> -    if (intel_init_mca_banks())
>>> +    if ( !this_cpu(mce_clear_banks) ||
>>> +         !this_cpu(no_cmci_banks)   ||
>>> +         !this_cpu(mce_banks_owned) )
>>>           return mcheck_none;
>>>
>>>      intel_init_mca(c);
>>> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>>>  static int cpu_callback(
>>>      struct notifier_block *nfb, unsigned long action, void *hcpu)
>>>  {
>>> +    unsigned int cpu = (unsigned long)hcpu;
>>> +
>>>      switch ( action )
>>>      {
>>> +    case CPU_UP_PREPARE:
>>> +        cpu_mcabank_alloc(cpu);
>>> +        break;
>>>      case CPU_DYING:
>>>          cpu_mcheck_disable();
>>>          break;
>>>      case CPU_DEAD:
>>>          cpu_mcheck_distribute_cmci();
>>> +        cpu_mcabank_free(cpu);
>>>          break;
>>
>> You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else there's a
>> memory leak on failed CPU bringup.
>>
>>>      default:
>>>          break;
>>> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>>>
>>>  static int __init intel_mce_initcall(void)
>>>  {
>>> +    void *hcpu = (void *)(long)smp_processor_id();
>>> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>>>      register_cpu_notifier(&cpu_nfb);
>>>      return 0;
>>>  }
>>> diff -r 1a364b17d66a xen/arch/x86/setup.c
>>> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
>>> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
>>> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>>>
>>>      arch_init_memory();
>>>
>>> +    do_presmp_initcalls();
>>> +
>>>      identify_cpu(&boot_cpu_data);
>>>      if ( cpu_has_fxsr )
>>>          set_in_cr4(X86_CR4_OSFXSR);
>>> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
>>>      initialize_keytable();
>>>
>>>      console_init_postirq();
>>> -
>>> -    do_presmp_initcalls();
>>
>> This looks risky, especially so close to 4.1 release. Instead of moving
>> do_presmp_initcalls(), please special-case cpu_mcabank_alloc() for CPU0 in
>> intel_mcheck_init(), and remove your direct call to
>> cpu_callback(...CPU_UP_PREPARE...) in intel_mce_initcall().
>>
>>>      for_each_present_cpu ( i )
>>>      {
>>
>>
>
>




[-- Attachment #1.2: Type: text/html, Size: 11815 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01  9:30 ` Keir Fraser
  2011-03-01 10:12   ` Haitao Shan
@ 2011-03-01 10:47   ` Keir Fraser
  2011-03-02  6:54     ` Liu, Jinsong
  1 sibling, 1 reply; 11+ messages in thread
From: Keir Fraser @ 2011-03-01 10:47 UTC (permalink / raw)
  To: Liu, Jinsong, xen-devel@lists.xensource.com; +Cc: Haitao Shan

A further comment: Please remove the presmp_initcall(intel_mce_initcall)
altogether. It will be causing unnecessary memory allocations even on
non-Intel CPUs. Instead, I suggest to register the cpu_notifier from
intel_mcheck_init(), at the same time you allocate the mcabanks for CPU0.
This will ensure that the CMCI/allocation work in the notifier is only
attempted for suitably MCA-capable Intel processors.

Um, also, you should fail CPU_UP_PREPARE if cpu_mcabank_alloc() fails. CPU
bringup will probably fail anyway if you are that low on memory. I don't
think it makes sense to struggle on with hobbled MCA support for the new
processor in this case. If cpu_mcabank_alloc() fails for CPU0, called from
intel_mcheck_init() you can quite reasonably BUG() on that.

 -- Keir

On 01/03/2011 09:30, "Keir Fraser" <keir.xen@gmail.com> wrote:

> Some comments inline below. Overall a good patch to have, but needs some
> minor adjustments!
> 
>  -- Keir
> 
> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> 
>> Fix cpu online/offline bug: mce memory leak.
>> 
>> Current Xen mce logic didn't free mcabanks. This would be a memory leak when
>> cpu offline.
>> When repeatly do cpu online/offline, this memory leak would make xenpool
>> shrink, and at a time point,
>> will call alloc_heap_pages --> flush_area_mask, which
>> ASSERT(local_irq_is_enabled()).
>> However, cpu online is irq disable, so it finally result in Xen crash.
>> 
>> This patch fix the memory leak bug, and tested OK over 110,000 round cpu
>> online/offline.
>> 
>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>> 
>> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
>> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
>> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
>> mca_error_handler);
>>  }
>>  
>> -static int intel_init_mca_banks(void)
>> +static void cpu_mcabank_free(unsigned int cpu)
>>  {
>> -    struct mca_banks *mb1, *mb2, * mb3;
>> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
>> +
>> +    mb1 = per_cpu(mce_clear_banks, cpu);
>> +    mb2 = per_cpu(no_cmci_banks, cpu);
>> +    mb3 = per_cpu(mce_banks_owned, cpu);
>> +    mb4 = per_cpu(poll_bankmask, cpu);
>> +
>> +    mcabanks_free(mb1);
>> +    mcabanks_free(mb2);
>> +    mcabanks_free(mb3);
>> +    mcabanks_free(mb4);
>> +}
>> +
>> +static void cpu_mcabank_alloc(unsigned int cpu)
>> +{
>> +    struct mca_banks *mb1, *mb2, *mb3;
>>  
>>      mb1 = mcabanks_alloc();
>>      mb2 = mcabanks_alloc();
>> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>>      if (!mb1 || !mb2 || !mb3)
>>          goto out;
>>  
>> -    __get_cpu_var(mce_clear_banks) = mb1;
>> -    __get_cpu_var(no_cmci_banks) = mb2;
>> -    __get_cpu_var(mce_banks_owned) = mb3;
>> +    per_cpu(mce_clear_banks, cpu) = mb1;
>> +    per_cpu(no_cmci_banks, cpu) = mb2;
>> +    per_cpu(mce_banks_owned, cpu) = mb3;
>> +    return;
>>  
>> -    return 0;
>>  out:
>>      mcabanks_free(mb1);
>>      mcabanks_free(mb2);
>>      mcabanks_free(mb3);
>> -    return -ENOMEM;
>>  }
>>  
>>  /* p4/p6 family have similar MCA initialization process */
>>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
>>  {
>> -    if (intel_init_mca_banks())
>> +    if ( !this_cpu(mce_clear_banks) ||
>> +         !this_cpu(no_cmci_banks)   ||
>> +         !this_cpu(mce_banks_owned) )
>>           return mcheck_none;
>>  
>>      intel_init_mca(c);
>> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>>  static int cpu_callback(
>>      struct notifier_block *nfb, unsigned long action, void *hcpu)
>>  {
>> +    unsigned int cpu = (unsigned long)hcpu;
>> +
>>      switch ( action )
>>      {
>> +    case CPU_UP_PREPARE:
>> +        cpu_mcabank_alloc(cpu);
>> +        break;
>>      case CPU_DYING:
>>          cpu_mcheck_disable();
>>          break;
>>      case CPU_DEAD:
>>          cpu_mcheck_distribute_cmci();
>> +        cpu_mcabank_free(cpu);
>>          break;
> 
> You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else there's a
> memory leak on failed CPU bringup.
> 
>>      default:
>>          break;
>> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>>  
>>  static int __init intel_mce_initcall(void)
>>  {
>> +    void *hcpu = (void *)(long)smp_processor_id();
>> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>>      register_cpu_notifier(&cpu_nfb);
>>      return 0;
>>  }
>> diff -r 1a364b17d66a xen/arch/x86/setup.c
>> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
>> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
>> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>>  
>>      arch_init_memory();
>>  
>> +    do_presmp_initcalls();
>> +
>>      identify_cpu(&boot_cpu_data);
>>      if ( cpu_has_fxsr )
>>          set_in_cr4(X86_CR4_OSFXSR);
>> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
>>      initialize_keytable();
>>  
>>      console_init_postirq();
>> -
>> -    do_presmp_initcalls();
> 
> This looks risky, especially so close to 4.1 release. Instead of moving
> do_presmp_initcalls(), please special-case cpu_mcabank_alloc() for CPU0 in
> intel_mcheck_init(), and remove your direct call to
> cpu_callback(...CPU_UP_PREPARE...) in intel_mce_initcall().
> 
>>      for_each_present_cpu ( i )
>>      {
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH] Fix cpu online/offline bug: mce memory leak.
  2011-03-01 10:47   ` Keir Fraser
@ 2011-03-02  6:54     ` Liu, Jinsong
  0 siblings, 0 replies; 11+ messages in thread
From: Liu, Jinsong @ 2011-03-02  6:54 UTC (permalink / raw)
  To: Keir Fraser, xen-devel@lists.xensource.com
  Cc: Haitao Shan, Jiang, Yunhong, Li, Xin

[-- Attachment #1: Type: text/plain, Size: 5947 bytes --]

Keir,

Thanks for comments! Attached is updated patch, and smoke test done over 50,000 round cpu online/offline.

Regards,
Jinsong

Keir Fraser wrote:
> A further comment: Please remove the
> presmp_initcall(intel_mce_initcall) altogether. It will be causing
> unnecessary memory allocations even on non-Intel CPUs. Instead, I
> suggest to register the cpu_notifier from intel_mcheck_init(), at the
> same time you allocate the mcabanks for CPU0. This will ensure that
> the CMCI/allocation work in the notifier is only attempted for
> suitably MCA-capable Intel processors. 
> 
> Um, also, you should fail CPU_UP_PREPARE if cpu_mcabank_alloc()
> fails. CPU bringup will probably fail anyway if you are that low on
> memory. I don't think it makes sense to struggle on with hobbled MCA
> support for the new processor in this case. If cpu_mcabank_alloc()
> fails for CPU0, called from intel_mcheck_init() you can quite
> reasonably BUG() on that. 
> 
>  -- Keir
> 
> On 01/03/2011 09:30, "Keir Fraser" <keir.xen@gmail.com> wrote:
> 
>> Some comments inline below. Overall a good patch to have, but needs
>> some minor adjustments! 
>> 
>>  -- Keir
>> 
>> On 01/03/2011 09:09, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>> 
>>> Fix cpu online/offline bug: mce memory leak.
>>> 
>>> Current Xen mce logic didn't free mcabanks. This would be a memory
>>> leak when cpu offline. When repeatly do cpu online/offline, this
>>> memory leak would make xenpool shrink, and at a time point, will
>>> call alloc_heap_pages --> flush_area_mask, which
>>> ASSERT(local_irq_is_enabled()). 
>>> However, cpu online is irq disable, so it finally result in Xen
>>> crash. 
>>> 
>>> This patch fix the memory leak bug, and tested OK over 110,000
>>> round cpu online/offline. 
>>> 
>>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
>>> 
>>> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
>>> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011
>>> +0800 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20
>>> 2011 +0800 @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
>>>      mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
>>>  mca_error_handler); }
>>> 
>>> -static int intel_init_mca_banks(void)
>>> +static void cpu_mcabank_free(unsigned int cpu)
>>>  {
>>> -    struct mca_banks *mb1, *mb2, * mb3;
>>> +    struct mca_banks *mb1, *mb2, *mb3, *mb4;
>>> +
>>> +    mb1 = per_cpu(mce_clear_banks, cpu);
>>> +    mb2 = per_cpu(no_cmci_banks, cpu);
>>> +    mb3 = per_cpu(mce_banks_owned, cpu);
>>> +    mb4 = per_cpu(poll_bankmask, cpu);
>>> +
>>> +    mcabanks_free(mb1);
>>> +    mcabanks_free(mb2);
>>> +    mcabanks_free(mb3);
>>> +    mcabanks_free(mb4);
>>> +}
>>> +
>>> +static void cpu_mcabank_alloc(unsigned int cpu)
>>> +{
>>> +    struct mca_banks *mb1, *mb2, *mb3;
>>> 
>>>      mb1 = mcabanks_alloc();
>>>      mb2 = mcabanks_alloc();
>>> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
>>>      if (!mb1 || !mb2 || !mb3)
>>>          goto out;
>>> 
>>> -    __get_cpu_var(mce_clear_banks) = mb1;
>>> -    __get_cpu_var(no_cmci_banks) = mb2;
>>> -    __get_cpu_var(mce_banks_owned) = mb3;
>>> +    per_cpu(mce_clear_banks, cpu) = mb1;
>>> +    per_cpu(no_cmci_banks, cpu) = mb2;
>>> +    per_cpu(mce_banks_owned, cpu) = mb3;
>>> +    return;
>>> 
>>> -    return 0;
>>>  out:
>>>      mcabanks_free(mb1);
>>>      mcabanks_free(mb2);
>>>      mcabanks_free(mb3);
>>> -    return -ENOMEM;
>>>  }
>>> 
>>>  /* p4/p6 family have similar MCA initialization process */
>>>  enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)  {
>>> -    if (intel_init_mca_banks())
>>> +    if ( !this_cpu(mce_clear_banks) ||
>>> +         !this_cpu(no_cmci_banks)   ||
>>> +         !this_cpu(mce_banks_owned) )
>>>           return mcheck_none;
>>> 
>>>      intel_init_mca(c);
>>> @@ -1301,13 +1317,19 @@ static int cpu_callback(
>>>  static int cpu_callback(
>>>      struct notifier_block *nfb, unsigned long action, void *hcpu) 
>>> { +    unsigned int cpu = (unsigned long)hcpu;
>>> +
>>>      switch ( action )
>>>      {
>>> +    case CPU_UP_PREPARE:
>>> +        cpu_mcabank_alloc(cpu);
>>> +        break;
>>>      case CPU_DYING:
>>>          cpu_mcheck_disable();
>>>          break;
>>>      case CPU_DEAD:
>>>          cpu_mcheck_distribute_cmci();
>>> +        cpu_mcabank_free(cpu);
>>>          break;
>> 
>> You also need to cpu_mcabank_free(cpu) on CPU_UP_CANCELED. Else
>> there's a memory leak on failed CPU bringup.
>> 
>>>      default:
>>>          break;
>>> @@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
>>> 
>>>  static int __init intel_mce_initcall(void)
>>>  {
>>> +    void *hcpu = (void *)(long)smp_processor_id();
>>> +    cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
>>>      register_cpu_notifier(&cpu_nfb);
>>>      return 0;
>>>  }
>>> diff -r 1a364b17d66a xen/arch/x86/setup.c
>>> --- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
>>> +++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
>>> @@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
>>> 
>>>      arch_init_memory();
>>> 
>>> +    do_presmp_initcalls();
>>> +
>>>      identify_cpu(&boot_cpu_data);
>>>      if ( cpu_has_fxsr )
>>>          set_in_cr4(X86_CR4_OSFXSR);
>>> @@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb     
>>> initialize_keytable(); 
>>> 
>>>      console_init_postirq();
>>> -
>>> -    do_presmp_initcalls();
>> 
>> This looks risky, especially so close to 4.1 release. Instead of
>> moving do_presmp_initcalls(), please special-case
>> cpu_mcabank_alloc() for CPU0 in intel_mcheck_init(), and remove your
>> direct call to cpu_callback(...CPU_UP_PREPARE...) in
>> intel_mce_initcall(). 
>> 
>>>      for_each_present_cpu ( i )
>>>      {


[-- Attachment #2: mce_fix_memory_leak.patch --]
[-- Type: application/octet-stream, Size: 3798 bytes --]

Fix cpu online/offline bug: mce memory leak.

Current Xen mce logic didn't free mcabanks. This would be a memory leak when cpu offline.
When repeatly do cpu online/offline, this memory leak would make xenpool shrink, and at a time point,
will call alloc_heap_pages --> flush_area_mask, which ASSERT(local_irq_is_enabled()).
However, cpu online is irq disable, so it finally result in Xen crash.

This patch fix the memory leak bug, and tested OK over 50,000 round cpu online/offline.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Wed Mar 02 03:30:24 2022 +0800
@@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
     mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct mca_error_handler);
 }
 
-static int intel_init_mca_banks(void)
+static void cpu_mcabank_free(unsigned int cpu)
 {
-    struct mca_banks *mb1, *mb2, * mb3;
+    struct mca_banks *mb1, *mb2, *mb3, *mb4;
+
+    mb1 = per_cpu(mce_clear_banks, cpu);
+    mb2 = per_cpu(no_cmci_banks, cpu);
+    mb3 = per_cpu(mce_banks_owned, cpu);
+    mb4 = per_cpu(poll_bankmask, cpu);
+
+    mcabanks_free(mb1);
+    mcabanks_free(mb2);
+    mcabanks_free(mb3);
+    mcabanks_free(mb4);
+}
+
+static int cpu_mcabank_alloc(unsigned int cpu)
+{
+    struct mca_banks *mb1, *mb2, *mb3;
 
     mb1 = mcabanks_alloc();
     mb2 = mcabanks_alloc();
@@ -1237,9 +1252,9 @@ static int intel_init_mca_banks(void)
     if (!mb1 || !mb2 || !mb3)
         goto out;
 
-    __get_cpu_var(mce_clear_banks) = mb1;
-    __get_cpu_var(no_cmci_banks) = mb2;
-    __get_cpu_var(mce_banks_owned) = mb3;
+    per_cpu(mce_clear_banks, cpu) = mb1;
+    per_cpu(no_cmci_banks, cpu) = mb2;
+    per_cpu(mce_banks_owned, cpu) = mb3;
 
     return 0;
 out:
@@ -1249,11 +1264,49 @@ out:
     return -ENOMEM;
 }
 
+static int cpu_callback(
+    struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+    unsigned int cpu = (unsigned long)hcpu;
+    int rc = 0;
+
+    switch ( action )
+    {
+    /* 
+     * it doesn't make sense to struggle on with hobbled MCA support 
+     * for the new processor in such a low memory case
+     */
+    case CPU_UP_PREPARE:
+        rc = cpu_mcabank_alloc(cpu);
+        break;
+    case CPU_DYING:
+        cpu_mcheck_disable();
+        break;
+    case CPU_DEAD:
+        cpu_mcheck_distribute_cmci();
+    case CPU_UP_CANCELED:
+        cpu_mcabank_free(cpu);
+        break;
+    default:
+        break;
+    }
+
+    return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
+}
+
+static struct notifier_block cpu_nfb = {
+    .notifier_call = cpu_callback
+};
+
 /* p4/p6 family have similar MCA initialization process */
 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
 {
-    if (intel_init_mca_banks())
-         return mcheck_none;
+    if ( smp_processor_id() == 0 )
+    {
+        /* it's quite reasonable to BUG_ON mcabank alloc fail for cpu0 */
+        BUG_ON(cpu_mcabank_alloc(0));
+        register_cpu_notifier(&cpu_nfb);
+    }
 
     intel_init_mca(c);
 
@@ -1298,31 +1351,3 @@ int intel_mce_rdmsr(uint32_t msr, uint64
     return ret;
 }
 
-static int cpu_callback(
-    struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-    switch ( action )
-    {
-    case CPU_DYING:
-        cpu_mcheck_disable();
-        break;
-    case CPU_DEAD:
-        cpu_mcheck_distribute_cmci();
-        break;
-    default:
-        break;
-    }
-
-    return NOTIFY_DONE;
-}
-
-static struct notifier_block cpu_nfb = {
-    .notifier_call = cpu_callback
-};
-
-static int __init intel_mce_initcall(void)
-{
-    register_cpu_notifier(&cpu_nfb);
-    return 0;
-}
-presmp_initcall(intel_mce_initcall);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2011-03-02  6:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-01  9:09 [PATCH] Fix cpu online/offline bug: mce memory leak Liu, Jinsong
2011-03-01  9:22 ` Keir Fraser
2011-03-01  9:27   ` Liu, Jinsong
2011-03-01  9:30 ` Keir Fraser
2011-03-01 10:12   ` Haitao Shan
2011-03-01 10:19     ` Keir Fraser
2011-03-01 10:21       ` Haitao Shan
2011-03-01 10:25         ` Keir Fraser
2011-03-01 10:26         ` Liu, Jinsong
2011-03-01 10:47   ` Keir Fraser
2011-03-02  6:54     ` Liu, Jinsong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).