Fix cpu online/offline bug: mce memory leak.
Current Xen mce logic didn't free mcabanks. This would be a memory leak when
cpu offline.
When repeatly do cpu online/offline, this memory leak would make xenpool
shrink, and at a time point,
will call alloc_heap_pages --> flush_area_mask, which
ASSERT(local_irq_is_enabled()).
However, cpu online is irq disable, so it finally result in Xen crash.
This patch fix the memory leak bug, and tested OK over 110,000 round cpu
online/offline.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
@@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
mca_error_handler);
}
-static int intel_init_mca_banks(void)
+static void cpu_mcabank_free(unsigned int cpu)
{
- struct mca_banks *mb1, *mb2, * mb3;
+ struct mca_banks *mb1, *mb2, *mb3, *mb4;
+
+ mb1 = per_cpu(mce_clear_banks, cpu);
+ mb2 = per_cpu(no_cmci_banks, cpu);
+ mb3 = per_cpu(mce_banks_owned, cpu);
+ mb4 = per_cpu(poll_bankmask, cpu);
+
+ mcabanks_free(mb1);
+ mcabanks_free(mb2);
+ mcabanks_free(mb3);
+ mcabanks_free(mb4);
+}
+
+static void cpu_mcabank_alloc(unsigned int cpu)
+{
+ struct mca_banks *mb1, *mb2, *mb3;
mb1 = mcabanks_alloc();
mb2 = mcabanks_alloc();
@@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
if (!mb1 || !mb2 || !mb3)
goto out;
- __get_cpu_var(mce_clear_banks) = mb1;
- __get_cpu_var(no_cmci_banks) = mb2;
- __get_cpu_var(mce_banks_owned) = mb3;
+ per_cpu(mce_clear_banks, cpu) = mb1;
+ per_cpu(no_cmci_banks, cpu) = mb2;
+ per_cpu(mce_banks_owned, cpu) = mb3;
+ return;
- return 0;
out:
mcabanks_free(mb1);
mcabanks_free(mb2);
mcabanks_free(mb3);
- return -ENOMEM;
}
/* p4/p6 family have similar MCA initialization process */
enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
{
- if (intel_init_mca_banks())
+ if ( !this_cpu(mce_clear_banks) ||
+ !this_cpu(no_cmci_banks) ||
+ !this_cpu(mce_banks_owned) )
return mcheck_none;
intel_init_mca(c);
@@ -1301,13 +1317,19 @@ static int cpu_callback(
static int cpu_callback(
struct notifier_block *nfb, unsigned long action, void *hcpu)
{
+ unsigned int cpu = (unsigned long)hcpu;
+
switch ( action )
{
+ case CPU_UP_PREPARE:
+ cpu_mcabank_alloc(cpu);
+ break;
case CPU_DYING:
cpu_mcheck_disable();
break;
case CPU_DEAD:
cpu_mcheck_distribute_cmci();
+ cpu_mcabank_free(cpu);
break;
default:
break;
@@ -1322,6 +1344,8 @@ static struct notifier_block cpu_nfb = {
static int __init intel_mce_initcall(void)
{
+ void *hcpu = (void *)(long)smp_processor_id();
+ cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu);
register_cpu_notifier(&cpu_nfb);
return 0;
}
diff -r 1a364b17d66a xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Fri Feb 25 01:26:01 2011 +0800
+++ b/xen/arch/x86/setup.c Mon Feb 28 19:19:20 2011 +0800
@@ -1203,6 +1203,8 @@ void __init __start_xen(unsigned long mb
arch_init_memory();
+ do_presmp_initcalls();
+
identify_cpu(&boot_cpu_data);
if ( cpu_has_fxsr )
set_in_cr4(X86_CR4_OSFXSR);
@@ -1235,8 +1237,6 @@ void __init __start_xen(unsigned long mb
initialize_keytable();
console_init_postirq();
-
- do_presmp_initcalls();
for_each_present_cpu ( i )
{
mce_fix_memory_leak.patch
Description: mce_fix_memory_leak.patch
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|