On 01/03/2011 09:09, "Liu, Jinsong" <
jinsong.liu@xxxxxxxxx> wrote:
> Fix cpu online/offline bug: mce memory leak.
>
> Current Xen mce logic didn't free mcabanks. This would be a memory leak when
> cpu offline.
> When repeatly do cpu online/offline, this memory leak would make xenpool
> shrink, and at a time point,
> will call alloc_heap_pages --> flush_area_mask, which
> ASSERT(local_irq_is_enabled()).
> However, cpu online is irq disable, so it finally result in Xen crash.
>
> This patch fix the memory leak bug, and tested OK over 110,000 round cpu
> online/offline.
>
> Signed-off-by: Liu, Jinsong <
jinsong.liu@xxxxxxxxx>
>
> diff -r 1a364b17d66a xen/arch/x86/cpu/mcheck/mce_intel.c
> --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 25 01:26:01 2011 +0800
> +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Feb 28 19:19:20 2011 +0800
> @@ -1227,9 +1227,24 @@ static void intel_init_mce(void)
> mce_uhandler_num = sizeof(intel_mce_uhandlers)/sizeof(struct
> mca_error_handler);
> }
>
> -static int intel_init_mca_banks(void)
> +static void cpu_mcabank_free(unsigned int cpu)
> {
> - struct mca_banks *mb1, *mb2, * mb3;
> + struct mca_banks *mb1, *mb2, *mb3, *mb4;
> +
> + mb1 = per_cpu(mce_clear_banks, cpu);
> + mb2 = per_cpu(no_cmci_banks, cpu);
> + mb3 = per_cpu(mce_banks_owned, cpu);
> + mb4 = per_cpu(poll_bankmask, cpu);
> +
> + mcabanks_free(mb1);
> + mcabanks_free(mb2);
> + mcabanks_free(mb3);
> + mcabanks_free(mb4);
> +}
> +
> +static void cpu_mcabank_alloc(unsigned int cpu)
> +{
> + struct mca_banks *mb1, *mb2, *mb3;
>
> mb1 = mcabanks_alloc();
> mb2 = mcabanks_alloc();
> @@ -1237,22 +1252,23 @@ static int intel_init_mca_banks(void)
> if (!mb1 || !mb2 || !mb3)
> goto out;
>
> - __get_cpu_var(mce_clear_banks) = mb1;
> - __get_cpu_var(no_cmci_banks) = mb2;
> - __get_cpu_var(mce_banks_owned) = mb3;
> + per_cpu(mce_clear_banks, cpu) = mb1;
> + per_cpu(no_cmci_banks, cpu) = mb2;
> + per_cpu(mce_banks_owned, cpu) = mb3;
> + return;
>
> - return 0;
> out:
> mcabanks_free(mb1);
> mcabanks_free(mb2);
> mcabanks_free(mb3);
> - return -ENOMEM;
> }
>
> /* p4/p6 family have similar MCA initialization process */
> enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
> {
> - if (intel_init_mca_banks())
> + if ( !this_cpu(mce_clear_banks) ||
> + !this_cpu(no_cmci_banks) ||
> + !this_cpu(mce_banks_owned) )
> return mcheck_none;
>
> intel_init_mca(c);
> @@ -1301,13 +1317,19 @@ static int cpu_callback(
> static int cpu_callback(
> struct notifier_block *nfb, unsigned long action, void *hcpu)
> {
> + unsigned int cpu = (unsigned long)hcpu;
> +
> switch ( action )
> {
> + case CPU_UP_PREPARE:
> + cpu_mcabank_alloc(cpu);
> + break;
> case CPU_DYING:
> cpu_mcheck_disable();
> break;
> case CPU_DEAD:
> cpu_mcheck_distribute_cmci();
> + cpu_mcabank_free(cpu);
> break;