WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Enable CMCI for Intel CPUs

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Enable CMCI for Intel CPUs
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 22 Dec 2008 00:20:12 -0800
Delivery-date: Mon, 22 Dec 2008 00:21:14 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1229933553 0
# Node ID 4d5203f95498ff83b4fbcd48500c1d2d20b23f91
# Parent  2dffa6ceb0af954e7f3a9ad7e993b8aee7b7de65
Enable CMCI for Intel CPUs

Signed-off-by Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
Signed-off-by Liping Ke <liping.ke@xxxxxxxxx>
---
 xen/arch/x86/cpu/mcheck/p4.c                   |  270 ---------
 xen/arch/x86/cpu/mcheck/p6.c                   |  118 ----
 xen/arch/x86/apic.c                            |   33 +
 xen/arch/x86/cpu/mcheck/Makefile               |    3 
 xen/arch/x86/cpu/mcheck/k7.c                   |    1 
 xen/arch/x86/cpu/mcheck/mce.c                  |   31 -
 xen/arch/x86/cpu/mcheck/mce.h                  |   16 
 xen/arch/x86/cpu/mcheck/mce_intel.c            |  681 +++++++++++++++++++++++++
 xen/arch/x86/cpu/mcheck/non-fatal.c            |   25 
 xen/arch/x86/cpu/mcheck/x86_mca.h              |   19 
 xen/arch/x86/hvm/vmx/vmx.c                     |    8 
 xen/arch/x86/i8259.c                           |    1 
 xen/arch/x86/smpboot.c                         |   34 +
 xen/common/stop_machine.c                      |   31 -
 xen/include/asm-x86/apicdef.h                  |    2 
 xen/include/asm-x86/config.h                   |    2 
 xen/include/asm-x86/irq.h                      |    1 
 xen/include/asm-x86/mach-default/irq_vectors.h |    4 
 xen/include/asm-x86/msr-index.h                |    6 
 xen/include/asm-x86/smp.h                      |    2 
 xen/include/public/arch-x86/xen-mca.h          |   15 
 xen/include/xen/stop_machine.h                 |    4 
 22 files changed, 859 insertions(+), 448 deletions(-)

diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/apic.c       Mon Dec 22 08:12:33 2008 +0000
@@ -99,8 +99,11 @@ void __init apic_intr_init(void)
     /* Performance Counters Interrupt */
     set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
-    /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+    /* CMCI Correctable Machine Check Interrupt */
+    set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt);
+
+    /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/
+#ifdef CONFIG_X86_MCE_THERMAL
     set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
 }
@@ -172,12 +175,17 @@ void clear_local_APIC(void)
     }
 
 /* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5) {
         v = apic_read(APIC_LVTTHMR);
         apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
     }
 #endif
+
+    if (maxlvt >= 6) {
+        v = apic_read(APIC_CMCI);
+        apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED);
+    }
     /*
      * Clean APIC state for other OSs:
      */
@@ -189,10 +197,13 @@ void clear_local_APIC(void)
     if (maxlvt >= 4)
         apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
 
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5)
         apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
 #endif
+    if (maxlvt >= 6)
+        apic_write_around(APIC_CMCI, APIC_LVT_MASKED);
+
     v = GET_APIC_VERSION(apic_read(APIC_LVR));
     if (APIC_INTEGRATED(v)) {  /* !82489DX */
         if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
@@ -597,6 +608,7 @@ static struct {
     unsigned int apic_spiv;
     unsigned int apic_lvtt;
     unsigned int apic_lvtpc;
+    unsigned int apic_lvtcmci;
     unsigned int apic_lvt0;
     unsigned int apic_lvt1;
     unsigned int apic_lvterr;
@@ -608,7 +620,7 @@ int lapic_suspend(void)
 int lapic_suspend(void)
 {
     unsigned long flags;
-
+    int maxlvt = get_maxlvt();
     if (!apic_pm_state.active)
         return 0;
 
@@ -620,6 +632,11 @@ int lapic_suspend(void)
     apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
     apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
     apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+
+    if (maxlvt >= 6) {
+        apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
+    }
+
     apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
     apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
     apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
@@ -637,6 +654,7 @@ int lapic_resume(void)
 {
     unsigned int l, h;
     unsigned long flags;
+    int maxlvt = get_maxlvt();
 
     if (!apic_pm_state.active)
         return 0;
@@ -669,6 +687,11 @@ int lapic_resume(void)
     apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
     apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
     apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+
+    if (maxlvt >= 6) {
+        apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
+    }
+
     apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
     apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
     apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile  Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/Makefile  Mon Dec 22 08:12:33 2008 +0000
@@ -3,8 +3,7 @@ obj-y += amd_k8.o
 obj-y += amd_k8.o
 obj-y += amd_f10.o
 obj-y += mce.o
+obj-y += mce_intel.o
 obj-y += non-fatal.o
-obj-y += p4.o
 obj-$(x86_32) += p5.o
-obj-$(x86_32) += p6.o
 obj-$(x86_32) += winchip.o
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/k7.c
--- a/xen/arch/x86/cpu/mcheck/k7.c      Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/k7.c      Mon Dec 22 08:12:33 2008 +0000
@@ -14,6 +14,7 @@
 #include <asm/msr.h>
 
 #include "mce.h"
+#include "x86_mca.h"
 
 /* Machine Check Handler For AMD Athlon/Duron */
 static fastcall void k7_machine_check(struct cpu_user_regs * regs, long 
error_code)
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Mon Dec 22 08:12:33 2008 +0000
@@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks);      /* non-
  * to physical cpus present in the machine.
  * The more physical cpus are available, the more entries you need.
  */
-#define MAX_MCINFO     10
+#define MAX_MCINFO     20
 
 struct mc_machine_notify {
        struct mc_info mc;
@@ -110,6 +110,22 @@ static void amd_mcheck_init(struct cpuin
        }
 }
 
+/*check the existence of Machine Check*/
+int mce_available(struct cpuinfo_x86 *c)
+{
+       return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
+/*Make sure there are no machine check on offlined or suspended CPUs*/
+void mce_disable_cpu(void)
+{
+    if (!mce_available(&current_cpu_data) || mce_disabled == 1)
+         return;
+    printk(KERN_DEBUG "MCE: disable mce on CPU%d\n", smp_processor_id());
+    clear_in_cr4(X86_CR4_MCE);
+}
+
+
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
 {
@@ -135,11 +151,13 @@ void mcheck_init(struct cpuinfo_x86 *c)
 #ifndef CONFIG_X86_64
                if (c->x86==5)
                        intel_p5_mcheck_init(c);
-               if (c->x86==6)
-                       intel_p6_mcheck_init(c);
 #endif
-               if (c->x86==15)
-                       intel_p4_mcheck_init(c);
+               /*If it is P6 or P4 family, including CORE 2 DUO series*/
+               if (c->x86 == 6 || c->x86==15)
+               {
+                       printk(KERN_DEBUG "MCE: Intel newly family MC Init\n");
+                       intel_mcheck_init(c);
+               }
                break;
 
 #ifndef CONFIG_X86_64
@@ -413,7 +431,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                if (mic == NULL)
                        return;
                if (mic->type != MC_TYPE_BANK)
-                       continue;
+                       goto next;
 
                mc_bank = (struct mcinfo_bank *)mic;
        
@@ -426,6 +444,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                        printk(" at %16"PRIx64, mc_bank->mc_addr);
 
                printk("\n");
+next:
                mic = x86_mcinfo_next(mic); /* next entry */
                if ((mic == NULL) || (mic->size == 0))
                        break;
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Mon Dec 22 08:12:33 2008 +0000
@@ -1,14 +1,22 @@
 #include <xen/init.h>
+#include <asm/types.h>
 #include <asm/traps.h>
+#include <asm/atomic.h>
+#include <asm/percpu.h>
+
 
 /* Init functions */
 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k7_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k8_mcheck_init(struct cpuinfo_x86 *c);
 void amd_f10_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c);
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void intel_mcheck_init(struct cpuinfo_x86 *c);
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
 /* Function pointer used in the handlers to collect additional information
@@ -19,6 +27,7 @@ extern int (*mc_callback_bank_extended)(
                uint16_t bank, uint64_t status);
 
 
+int mce_available(struct cpuinfo_x86 *c);
 /* Helper functions used for collecting error telemetry */
 struct mc_info *x86_mcinfo_getptr(void);
 void x86_mcinfo_clear(struct mc_info *mi);
@@ -26,6 +35,3 @@ void x86_mcinfo_dump(struct mc_info *mi)
 void x86_mcinfo_dump(struct mc_info *mi);
 void mc_panic(char *s);
 
-/* Global variables */
-extern int mce_disabled;
-extern unsigned int nr_mce_banks;
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce_intel.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Mon Dec 22 08:12:33 2008 +0000
@@ -0,0 +1,681 @@
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+static int nr_intel_ext_msrs = 0;
+static int cmci_support = 0;
+extern int firstbank;
+
+#ifdef CONFIG_X86_MCE_THERMAL
+static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
+{      
+    printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
+                smp_processor_id());
+    add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    u32 l, h;
+    unsigned int cpu = smp_processor_id();
+    static s_time_t next[NR_CPUS];
+
+    ack_APIC_irq();
+    if (NOW() < next[cpu])
+        return;
+
+    next[cpu] = NOW() + MILLISECS(5000);
+    rdmsr(MSR_IA32_THERM_STATUS, l, h);
+    if (l & 0x1) {
+        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
+        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
+                cpu);
+        add_taint(TAINT_MACHINE_CHECK);
+    } else {
+        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+    }
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) 
+        = unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    irq_enter();
+    vendor_thermal_interrupt(regs);
+    irq_exit();
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+    int tm2 = 0;
+    unsigned int cpu = smp_processor_id();
+
+    /* Thermal monitoring */
+    if (!cpu_has(c, X86_FEATURE_ACPI))
+        return;        /* -ENODEV */
+
+    /* Clock modulation */
+    if (!cpu_has(c, X86_FEATURE_ACC))
+        return;        /* -ENODEV */
+
+    /* first check if its enabled already, in which case there might
+     * be some SMM goo which handles it, so we can't even put a handler
+     * since it might be delivered via SMI already -zwanem.
+     */
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    h = apic_read(APIC_LVTTHMR);
+    if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+        printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu);
+        return; /* -EBUSY */
+    }
+
+    if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+        tm2 = 1;
+
+       /* check whether a vector already exists, temporarily masked? */
+    if (h & APIC_VECTOR_MASK) {
+        printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already 
installed\n",
+                 cpu, (h & APIC_VECTOR_MASK));
+        return; /* -EBUSY */
+    }
+
+    /* The temperature transition interrupt handler setup */
+    h = THERMAL_APIC_VECTOR;           /* our delivery vector */
+    h |= (APIC_DM_FIXED | APIC_LVT_MASKED);    /* we'll mask till we're ready 
*/
+    apic_write_around(APIC_LVTTHMR, h);
+
+    rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+    wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+    /* ok we're good to go... */
+    vendor_thermal_interrupt = intel_thermal_interrupt;
+
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+    l = apic_read (APIC_LVTTHMR);
+    apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+    printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 
+            cpu, tm2 ? "TM2" : "TM1");
+    return;
+}
+#endif /* CONFIG_X86_MCE_THERMAL */
+
+static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext)
+{
+    if (nr_intel_ext_msrs == 0)
+        return;
+
+       /*this function will called when CAP(9).MCG_EXT_P = 1*/
+    memset(mc_ext, 0, sizeof(struct mcinfo_extended));
+    mc_ext->common.type = MC_TYPE_EXTENDED;
+    mc_ext->common.size = sizeof(mc_ext);
+    mc_ext->mc_msrs = 10;
+
+    mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX;
+    rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value);
+    mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX;
+    rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value);
+    mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX;
+    rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value);
+
+    mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX;
+    rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value);
+    mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI;
+    rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value);
+    mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI;
+    rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value);
+
+    mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP;
+    rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value);
+    mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP;
+    rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value);
+    mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS;
+    rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value);
+    mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP;
+    rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value);
+}
+
+/* machine_check_poll might be called by following types:
+ * 1. called when do mcheck_init.
+ * 2. called in cmci interrupt handler
+ * 3. called in polling handler
+ * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
+ * consumer.
+*/
+static int machine_check_poll(struct mc_info *mi, int calltype)
+{
+    int exceptions = (read_cr4() & X86_CR4_MCE);
+    int i, nr_unit = 0, uc = 0, pcc = 0;
+    uint64_t status, addr;
+    struct mcinfo_global mcg;
+    struct mcinfo_extended mce;
+    unsigned int cpu;
+    struct domain *d;
+
+    cpu = smp_processor_id();
+
+    if (!mi) {
+        printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n");
+        return 0;
+    }
+    x86_mcinfo_clear(mi);
+
+    memset(&mcg, 0, sizeof(mcg));
+    mcg.common.type = MC_TYPE_GLOBAL;
+    mcg.common.size = sizeof(mcg);
+    /*If called from cpu-reset check, don't need to fill them.
+     *If called from cmci context, we'll try to fill domid by memory addr
+    */
+    mcg.mc_domid = -1;
+    mcg.mc_vcpuid = -1;
+    if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
+        mcg.mc_flags = MC_FLAG_POLLED;
+    else if (calltype == MC_FLAG_CMCI)
+        mcg.mc_flags = MC_FLAG_CMCI;
+    mcg.mc_socketid = phys_proc_id[cpu];
+    mcg.mc_coreid = cpu_core_id[cpu];
+    mcg.mc_apicid = cpu_physical_id(cpu);
+    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
+    rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
+
+    for ( i = 0; i < nr_mce_banks; i++ ) {
+        struct mcinfo_bank mcb;
+        /*For CMCI, only owners checks the owned MSRs*/
+        if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
+                       (calltype & MC_FLAG_CMCI) )
+            continue;
+        rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
+
+        if (! (status & MCi_STATUS_VAL) )
+            continue;
+        /*
+         * Uncorrected events are handled by the exception
+         * handler when it is enabled. But when the exception
+         * is disabled such as when mcheck_init, log everything.
+         */
+        if ((status & MCi_STATUS_UC) && exceptions)
+            continue;
+
+        if (status & MCi_STATUS_UC)
+            uc = 1;
+        if (status & MCi_STATUS_PCC)
+            pcc = 1;
+
+        memset(&mcb, 0, sizeof(mcb));
+        mcb.common.type = MC_TYPE_BANK;
+        mcb.common.size = sizeof(mcb);
+        mcb.mc_bank = i;
+        mcb.mc_status = status;
+        if (status & MCi_STATUS_MISCV)
+            rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc);
+        if (status & MCi_STATUS_ADDRV) {
+            rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
+            d = maddr_get_owner(addr);
+            if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) 
)
+                mcb.mc_domid = d->domain_id;
+        }
+        if (cmci_support)
+            rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
+        if (calltype == MC_FLAG_CMCI)
+            rdtscll(mcb.mc_tsc);
+        x86_mcinfo_add(mi, &mcb);
+        nr_unit++;
+        add_taint(TAINT_MACHINE_CHECK);
+        /*Clear state for this bank */
+        wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
+        printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%lx]\n", 
+                i, cpu, status);
+        printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], "
+                "thread[%d]\n", cpu, mcg.mc_socketid, 
+                mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
+ 
+    }
+    /*if pcc = 1, uc must be 1*/
+    if (pcc)
+        mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
+    else if (uc)
+        mcg.mc_flags |= MC_FLAG_RECOVERABLE;
+    else /*correctable*/
+        mcg.mc_flags |= MC_FLAG_CORRECTABLE;
+
+    if (nr_unit && nr_intel_ext_msrs && 
+                    (mcg.mc_gstatus & MCG_STATUS_EIPV)) {
+        intel_get_extended_msrs(&mce);
+        x86_mcinfo_add(mi, &mce);
+    }
+    if (nr_unit) 
+        x86_mcinfo_add(mi, &mcg);
+    /*Clear global state*/
+    return nr_unit;
+}
+
+static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
+{
+    /* MACHINE CHECK Error handler will be sent in another patch,
+     * simply copy old solutions here. This code will be replaced
+     * by upcoming machine check patches
+     */
+
+    int recover=1;
+    u32 alow, ahigh, high, low;
+    u32 mcgstl, mcgsth;
+    int i;
+   
+    rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+    if (mcgstl & (1<<0))       /* Recoverable ? */
+       recover=0;
+    
+    printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+       smp_processor_id(), mcgsth, mcgstl);
+    
+    for (i=0; i<nr_mce_banks; i++) {
+       rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+       if (high & (1<<31)) {
+               if (high & (1<<29))
+                       recover |= 1;
+               if (high & (1<<25))
+                       recover |= 2;
+               printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+               high &= ~(1<<31);
+               if (high & (1<<27)) {
+                       rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                       printk ("[%08x%08x]", ahigh, alow);
+               }
+               if (high & (1<<26)) {
+                       rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                       printk (" at %08x%08x", ahigh, alow);
+               }
+               printk ("\n");
+       }
+    }
+    
+    if (recover & 2)
+       mc_panic ("CPU context corrupt");
+    if (recover & 1)
+       mc_panic ("Unable to continue");
+    
+    printk(KERN_EMERG "Attempting to continue.\n");
+    /* 
+     * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+     * recoverable/continuable.This will allow BIOS to look at the MSRs
+     * for errors if the OS could not log the error.
+     */
+    for (i=0; i<nr_mce_banks; i++) {
+       u32 msr;
+       msr = MSR_IA32_MC0_STATUS+i*4;
+       rdmsr (msr, low, high);
+       if (high&(1<<31)) {
+               /* Clear it */
+               wrmsr(msr, 0UL, 0UL);
+               /* Serialize */
+               wmb();
+               add_taint(TAINT_MACHINE_CHECK);
+       }
+    }
+    mcgstl &= ~(1<<2);
+    wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+extern void (*cpu_down_handler)(int down_cpu);
+extern void (*cpu_down_rollback_handler)(int down_cpu);
+extern void mce_disable_cpu(void);
+static bool_t cmci_clear_lock = 0;
+static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
+
+/*
+ * Discover bank sharing using the algorithm recommended in the SDM.
+ */
+static int do_cmci_discover(int i)
+{
+    unsigned msr = MSR_IA32_MC0_CTL2 + i;
+    u64 val;
+
+    rdmsrl(msr, val);
+    /* Some other CPU already owns this bank. */
+    if (val & CMCI_EN) {
+       clear_bit(i, __get_cpu_var(mce_banks_owned));
+       goto out;
+    }
+    wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
+    rdmsrl(msr, val);
+
+    if (!(val & CMCI_EN)) {
+     /*
+      * This bank does not support CMCI. The polling
+      * timer has to handle it. 
+      */
+       set_bit(i, __get_cpu_var(no_cmci_banks));
+       return 0;
+    }
+    set_bit(i, __get_cpu_var(mce_banks_owned));
+out:
+    clear_bit(i, __get_cpu_var(no_cmci_banks));
+    return 1;
+}
+
+void cmci_discover(void)
+{
+    int i;
+
+    printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id());
+    spin_lock(&cmci_discover_lock);
+    for (i = 0; i < nr_mce_banks; i++) {
+        /*If the cpu is the bank owner, need not re-discover*/
+        if (test_bit(i, __get_cpu_var(mce_banks_owned)))
+            continue;
+        do_cmci_discover(i);
+    }
+    spin_unlock(&cmci_discover_lock);
+    printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", 
+            smp_processor_id(), 
+            *((unsigned long *)__get_cpu_var(mce_banks_owned)), 
+            *((unsigned long *)__get_cpu_var(no_cmci_banks)));
+}
+
+/*
+ * Define an owner for each bank. Banks can be shared between CPUs
+ * and to avoid reporting events multiple times always set up one
+ * CPU as owner. 
+ *
+ * The assignment has to be redone when CPUs go offline and
+ * any of the owners goes away. Also pollers run in parallel so we
+ * have to be careful to update the banks in a way that doesn't
+ * lose or duplicate events.
+ */
+
+static void mce_set_owner(void)
+{
+
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    cmci_discover();
+}
+
+static void clear_cmci(void)
+{
+    int i;
+
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n", 
+            smp_processor_id());
+
+    for (i = 0; i < nr_mce_banks; i++) {
+        unsigned msr = MSR_IA32_MC0_CTL2 + i;
+        u64 val;
+        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+            continue;
+        rdmsrl(msr, val);
+        if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
+            wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
+        clear_bit(i, __get_cpu_var(mce_banks_owned));
+    }
+}
+
+/*we need to re-set cmci owners when cpu_down fail or cpu_up*/
+static void cmci_reenable_cpu(void *h)
+{
+    if (!mce_available(&current_cpu_data) || mce_disabled == 1)
+         return;
+    printk(KERN_DEBUG "CMCI: reenable mce on CPU%d\n", smp_processor_id());
+    mce_set_owner();
+    set_in_cr4(X86_CR4_MCE);
+}
+
+/* When take cpu_down, we need to execute the impacted cmci_owner judge 
algorithm 
+ * First, we need to clear the ownership on the dead CPU
+ * Then,  other CPUs will check whether to take the bank's ownership from 
down_cpu
+ * CPU0 need not and "never" execute this path
+*/
+void  __cpu_clear_cmci( int down_cpu)
+{
+    int cpu = smp_processor_id();
+
+    if (!cmci_support && mce_disabled == 1)
+        return;
+
+    if (cpu == 0) {
+        printk(KERN_DEBUG "CMCI: CPU0 need not be cleared\n");
+        return;
+    }
+
+    local_irq_disable();
+    if (cpu == down_cpu){
+        mce_disable_cpu();
+        clear_cmci();
+        wmb();
+        test_and_set_bool(cmci_clear_lock);
+        return;
+    }
+    while (!cmci_clear_lock)
+        cpu_relax();
+    if (cpu != down_cpu)
+        mce_set_owner();
+
+    test_and_clear_bool(cmci_clear_lock);
+    local_irq_enable();
+
+}
+
+void  __cpu_clear_cmci_rollback( int down_cpu)
+{
+    cpumask_t down_map;
+    if (!cmci_support || mce_disabled == 1) 
+        return;
+
+    cpus_clear(down_map);
+    cpu_set(down_cpu, down_map);
+    printk(KERN_ERR "CMCI: cpu_down fail. "
+        "Reenable cmci on CPU%d\n", down_cpu);
+    on_selected_cpus(down_map, cmci_reenable_cpu, NULL, 1, 1);
+}
+
+static void intel_init_cmci(struct cpuinfo_x86 *c)
+{
+    u32 l, apic;
+    int cpu = smp_processor_id();
+
+    if (!mce_available(c) || !cmci_support) {
+        printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu);
+        return;
+    }
+
+    apic = apic_read(APIC_CMCI);
+    if ( apic & APIC_VECTOR_MASK )
+    {
+        printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n",
+            cpu, ( apic & APIC_VECTOR_MASK ));
+        return;
+    }
+
+    apic = CMCI_APIC_VECTOR;
+    apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+    apic_write_around(APIC_CMCI, apic);
+
+       /*now clear mask flag*/
+    l = apic_read(APIC_CMCI);
+    apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
+    cpu_down_handler =  __cpu_clear_cmci;
+    cpu_down_rollback_handler = __cpu_clear_cmci_rollback; 
+}
+
+fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    int cpu = smp_processor_id();
+
+    ack_APIC_irq();
+    irq_enter();
+    printk(KERN_DEBUG "CMCI: cmci_intr happen on CPU%d\n", cpu);
+    nr_unit = machine_check_poll(mi, MC_FLAG_CMCI);
+    if (nr_unit) {
+        x86_mcinfo_dump(mi);
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+    irq_exit();
+}
+
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+
+#ifdef CONFIG_X86_MCE_THERMAL
+    intel_init_thermal(c);
+#endif
+    intel_init_cmci(c);
+}
+
+static void mce_cap_init(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if ((l & MCG_CMCI_P) && cpu_has_apic)
+        cmci_support = 1;
+
+    nr_mce_banks = l & 0xff;
+    if (nr_mce_banks > MAX_NR_BANKS)
+        printk(KERN_WARNING "MCE: exceed max mce banks\n");
+    if (l & MCG_EXT_P)
+    {
+        nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
+        printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n",
+            smp_processor_id(), nr_intel_ext_msrs);
+    }
+    /* for most of p6 family, bank 0 is an alias bios MSR.
+     * But after model>1a, bank 0 is available*/
+    if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL
+            && c->x86_model < 0x1A)
+        firstbank = 1;
+    else
+        firstbank = 0;
+}
+
+static void mce_init(void)
+{
+    u32 l, h;
+    int i, nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    clear_in_cr4(X86_CR4_MCE);
+    /* log the machine checks left over from the previous reset.
+     * This also clears all registers*/
+
+    nr_unit = machine_check_poll(mi, MC_FLAG_RESET);
+    /*in the boot up stage, not expect inject to DOM0, but go print out
+    */
+    if (nr_unit > 0)
+        x86_mcinfo_dump(mi);
+
+    set_in_cr4(X86_CR4_MCE);
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if (l & MCG_CTL_P) /* Control register present ? */
+        wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+    for (i = firstbank; i < nr_mce_banks; i++)
+    {
+        /*Some banks are shared across cores, use MCi_CTRL to judge whether
+         * this bank has been initialized by other cores already.*/
+        rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
+        if (!l & !h)
+        {
+            /*if ctl is 0, this bank is never initialized*/
+            printk(KERN_DEBUG "mce_init: init bank%d\n", i);
+            wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
+            wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
+       }
+    }
+    if (firstbank) /*if cmci enabled, firstbank = 0*/
+        wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+}
+
+/*p4/p6 faimily has similar MCA initialization process*/
+void intel_mcheck_init(struct cpuinfo_x86 *c)
+{
+       
+       mce_cap_init(c);
+       printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+               smp_processor_id());
+       /* machine check is available */
+       machine_check_vector = intel_machine_check;
+       mce_init();
+       mce_intel_feature_init(c);
+       mce_set_owner();
+}
+
+/*
+ * Periodic polling timer for "silent" machine check errors. If the
+ * poller finds an MCE, poll faster. When the poller finds no more 
+ * errors, poll slower
+*/
+static struct timer mce_timer;
+
+#define MCE_PERIOD 4000
+#define MCE_MIN    2000
+#define MCE_MAX    32000
+
+static u64 period = MCE_PERIOD;
+static int adjust = 0;
+
+static void mce_intel_checkregs(void *info)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+
+    if( !mce_available(&current_cpu_data))
+        return;
+    nr_unit = machine_check_poll(mi, MC_FLAG_POLLED);
+    if (nr_unit)
+    {
+        x86_mcinfo_dump(mi);
+        adjust++;
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+}
+
+static void mce_intel_work_fn(void *data)
+{
+    on_each_cpu(mce_intel_checkregs, data, 1, 1);
+    if (adjust) {
+        period = period / (adjust + 1);
+        printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval to %ld",
+            period);
+    }
+    else {
+        period *= 2;
+    }
+    if (period > MCE_MAX) 
+        period = MCE_MAX;
+    if (period < MCE_MIN)
+        period = MCE_MIN;
+    set_timer(&mce_timer, NOW() + MILLISECS(period));
+    adjust = 0;
+}
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c)
+{
+    printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n");
+    init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+    set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD));
+}
+
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c       Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c       Mon Dec 22 08:12:33 2008 +0000
@@ -19,8 +19,8 @@
 #include <asm/msr.h>
 
 #include "mce.h"
-
-static int firstbank;
+#include "x86_mca.h"
+int firstbank = 0;
 static struct timer mce_timer;
 
 #define MCE_PERIOD MILLISECS(15000)
@@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_chec
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
        /* Check for MCE support */
-       if (!cpu_has(c, X86_FEATURE_MCE))
+       if (!mce_available(c))
                return -ENODEV;
-
-       /* Check for PPro style MCA */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return -ENODEV;
-
        /*
         * Check for non-fatal errors every MCE_RATE s
         */
@@ -85,12 +80,20 @@ static int __init init_nonfatal_mce_chec
                break;
 
        case X86_VENDOR_INTEL:
-               init_timer(&mce_timer, mce_work_fn, NULL, 0);
-               set_timer(&mce_timer, NOW() + MCE_PERIOD);
+               /* p5 family is different. P4/P6 and latest CPUs shares the
+                * same polling methods
+               */
+               if ( c->x86 != 5 )
+               {
+                       /* some CPUs or banks don't support cmci, we need to 
+                        * enable this feature anyway
+                        */
+                       intel_mcheck_timer(c);
+               }
                break;
        }
 
-       printk(KERN_INFO "MCA: Machine check polling timer started.\n");
+       printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
        return 0;
 }
 __initcall(init_nonfatal_mce_checker);
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/p4.c
--- a/xen/arch/x86/cpu/mcheck/p4.c      Fri Dec 19 14:56:36 2008 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,270 +0,0 @@
-/*
- * P4 specific Machine Check Exception Reporting
- */
-
-#include <xen/init.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/config.h>
-#include <xen/smp.h>
-#include <xen/irq.h>
-#include <xen/time.h>
-#include <asm/processor.h> 
-#include <asm/system.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-
-#include "mce.h"
-
-/* as supported by the P4/Xeon family */
-struct intel_mce_extended_msrs {
-       u32 eax;
-       u32 ebx;
-       u32 ecx;
-       u32 edx;
-       u32 esi;
-       u32 edi;
-       u32 ebp;
-       u32 esp;
-       u32 eflags;
-       u32 eip;
-       /* u32 *reserved[]; */
-};
-
-static int mce_num_extended_msrs = 0;
-
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
-{      
-       printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
-                       smp_processor_id());
-       add_taint(TAINT_MACHINE_CHECK);
-}
-
-/* P4/Xeon Thermal transition interrupt handler */
-static void intel_thermal_interrupt(struct cpu_user_regs *regs)
-{
-       u32 l, h;
-       unsigned int cpu = smp_processor_id();
-       static s_time_t next[NR_CPUS];
-
-       ack_APIC_irq();
-
-       if (NOW() < next[cpu])
-               return;
-
-       next[cpu] = NOW() + MILLISECS(5000);
-       rdmsr(MSR_IA32_THERM_STATUS, l, h);
-       if (l & 0x1) {
-               printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
-               printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-                               cpu);
-               add_taint(TAINT_MACHINE_CHECK);
-       } else {
-               printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
-       }
-}
-
-/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) = 
unexpected_thermal_interrupt;
-
-fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
-{
-       irq_enter();
-       vendor_thermal_interrupt(regs);
-       irq_exit();
-}
-
-/* P4/Xeon Thermal regulation detect and init */
-static void intel_init_thermal(struct cpuinfo_x86 *c)
-{
-       u32 l, h;
-       unsigned int cpu = smp_processor_id();
-
-       /* Thermal monitoring */
-       if (!cpu_has(c, X86_FEATURE_ACPI))
-               return; /* -ENODEV */
-
-       /* Clock modulation */
-       if (!cpu_has(c, X86_FEATURE_ACC))
-               return; /* -ENODEV */
-
-       /* first check if its enabled already, in which case there might
-        * be some SMM goo which handles it, so we can't even put a handler
-        * since it might be delivered via SMI already -zwanem.
-        */
-       rdmsr (MSR_IA32_MISC_ENABLE, l, h);
-       h = apic_read(APIC_LVTTHMR);
-       if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
-               printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
-                               cpu);
-               return; /* -EBUSY */
-       }
-
-       /* check whether a vector already exists, temporarily masked? */        
-       if (h & APIC_VECTOR_MASK) {
-               printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
-                               "installed\n",
-                       cpu, (h & APIC_VECTOR_MASK));
-               return; /* -EBUSY */
-       }
-
-       /* The temperature transition interrupt handler setup */
-       h = THERMAL_APIC_VECTOR;                /* our delivery vector */
-       h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready 
*/
-       apic_write_around(APIC_LVTTHMR, h);
-
-       rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
-       wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
-
-       /* ok we're good to go... */
-       vendor_thermal_interrupt = intel_thermal_interrupt;
-       
-       rdmsr (MSR_IA32_MISC_ENABLE, l, h);
-       wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
-       
-       l = apic_read (APIC_LVTTHMR);
-       apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-       printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
-       return;
-}
-#endif /* CONFIG_X86_MCE_P4THERMAL */
-
-
-/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
-{
-       u32 h;
-
-       if (mce_num_extended_msrs == 0)
-               goto done;
-
-       rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
-       rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
-       rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
-       rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
-       rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
-       rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
-       rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
-       rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
-       rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
-       rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
-
-       /* can we rely on kmalloc to do a dynamic
-        * allocation for the reserved registers?
-        */
-done:
-       return mce_num_extended_msrs;
-}
-
-static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
-{
-       int recover=1;
-       u32 alow, ahigh, high, low;
-       u32 mcgstl, mcgsth;
-       int i;
-       struct intel_mce_extended_msrs dbg;
-
-       rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-       if (mcgstl & (1<<0))    /* Recoverable ? */
-               recover=0;
-
-       printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
-               smp_processor_id(), mcgsth, mcgstl);
-
-       if (intel_get_extended_msrs(&dbg)) {
-               printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
-                       smp_processor_id(), dbg.eip, dbg.eflags);
-               printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: 
%08x\n",
-                       dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
-               printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: 
%08x\n",
-                       dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
-       }
-
-       for (i=0; i<nr_mce_banks; i++) {
-               rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
-               if (high & (1<<31)) {
-                       if (high & (1<<29))
-                               recover |= 1;
-                       if (high & (1<<25))
-                               recover |= 2;
-                       printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
-                       high &= ~(1<<31);
-                       if (high & (1<<27)) {
-                               rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-                               printk ("[%08x%08x]", ahigh, alow);
-                       }
-                       if (high & (1<<26)) {
-                               rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-                               printk (" at %08x%08x", ahigh, alow);
-                       }
-                       printk ("\n");
-               }
-       }
-
-       if (recover & 2)
-               mc_panic ("CPU context corrupt");
-       if (recover & 1)
-               mc_panic ("Unable to continue");
-
-       printk(KERN_EMERG "Attempting to continue.\n");
-       /* 
-        * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
-        * recoverable/continuable.This will allow BIOS to look at the MSRs
-        * for errors if the OS could not log the error.
-        */
-       for (i=0; i<nr_mce_banks; i++) {
-               u32 msr;
-               msr = MSR_IA32_MC0_STATUS+i*4;
-               rdmsr (msr, low, high);
-               if (high&(1<<31)) {
-                       /* Clear it */
-                       wrmsr(msr, 0UL, 0UL);
-                       /* Serialize */
-                       wmb();
-                       add_taint(TAINT_MACHINE_CHECK);
-               }
-       }
-       mcgstl &= ~(1<<2);
-       wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
-{
-       u32 l, h;
-       int i;
-       
-       machine_check_vector = intel_machine_check;
-       wmb();
-
-       printk (KERN_INFO "Intel machine check architecture supported.\n");
-       rdmsr (MSR_IA32_MCG_CAP, l, h);
-       if (l & (1<<8)) /* Control register present ? */
-               wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-       nr_mce_banks = l & 0xff;
-
-       for (i=0; i<nr_mce_banks; i++) {
-               wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-               wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-       }
-
-       set_in_cr4 (X86_CR4_MCE);
-       printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
-               smp_processor_id());
-
-       /* Check for P4/Xeon extended MCE MSRs */
-       rdmsr (MSR_IA32_MCG_CAP, l, h);
-       if (l & (1<<9)) {/* MCG_EXT_P */
-               mce_num_extended_msrs = (l >> 16) & 0xff;
-               printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
-                               " available\n",
-                       smp_processor_id(), mce_num_extended_msrs);
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-               /* Check for P4/Xeon Thermal monitor */
-               intel_init_thermal(c);
-#endif
-       }
-}
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/p6.c
--- a/xen/arch/x86/cpu/mcheck/p6.c      Fri Dec 19 14:56:36 2008 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-/*
- * P6 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@xxxxxxxxxx>
- */
-
-#include <xen/init.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/smp.h>
-
-#include <asm/processor.h> 
-#include <asm/system.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-/* Machine Check Handler For PII/PIII */
-static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
-{
-       int recover=1;
-       u32 alow, ahigh, high, low;
-       u32 mcgstl, mcgsth;
-       int i;
-
-       rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-       if (mcgstl & (1<<0))    /* Recoverable ? */
-               recover=0;
-
-       printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
-               smp_processor_id(), mcgsth, mcgstl);
-
-       for (i=0; i<nr_mce_banks; i++) {
-               rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
-               if (high & (1<<31)) {
-                       if (high & (1<<29))
-                               recover |= 1;
-                       if (high & (1<<25))
-                               recover |= 2;
-                       printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
-                       high &= ~(1<<31);
-                       if (high & (1<<27)) {
-                               rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-                               printk ("[%08x%08x]", ahigh, alow);
-                       }
-                       if (high & (1<<26)) {
-                               rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-                               printk (" at %08x%08x", ahigh, alow);
-                       }
-                       printk ("\n");
-               }
-       }
-
-       if (recover & 2)
-               mc_panic ("CPU context corrupt");
-       if (recover & 1)
-               mc_panic ("Unable to continue");
-
-       printk (KERN_EMERG "Attempting to continue.\n");
-       /* 
-        * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
-        * recoverable/continuable.This will allow BIOS to look at the MSRs
-        * for errors if the OS could not log the error.
-        */
-       for (i=0; i<nr_mce_banks; i++) {
-               unsigned int msr;
-               msr = MSR_IA32_MC0_STATUS+i*4;
-               rdmsr (msr,low, high);
-               if (high & (1<<31)) {
-                       /* Clear it */
-                       wrmsr (msr, 0UL, 0UL);
-                       /* Serialize */
-                       wmb();
-                       add_taint(TAINT_MACHINE_CHECK);
-               }
-       }
-       mcgstl &= ~(1<<2);
-       wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-/* Set up machine check reporting for processors with Intel style MCE */
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
-{
-       u32 l, h;
-       int i;
-       
-       /* Check for MCE support */
-       if (!cpu_has(c, X86_FEATURE_MCE))
-               return;
-
-       /* Check for PPro style MCA */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return;
-
-       /* Ok machine check is available */
-       machine_check_vector = intel_machine_check;
-       wmb();
-
-       printk (KERN_INFO "Intel machine check architecture supported.\n");
-       rdmsr (MSR_IA32_MCG_CAP, l, h);
-       if (l & (1<<8)) /* Control register present ? */
-               wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-       nr_mce_banks = l & 0xff;
-
-       /*
-        * Following the example in IA-32 SDM Vol 3:
-        * - MC0_CTL should not be written
-        * - Status registers on all banks should be cleared on reset
-        */
-       for (i=1; i<nr_mce_banks; i++)
-               wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-
-       for (i=0; i<nr_mce_banks; i++)
-               wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-
-       set_in_cr4 (X86_CR4_MCE);
-       printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
-               smp_processor_id());
-}
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Dec 22 08:12:33 2008 +0000
@@ -28,7 +28,10 @@
 /* Bitfield of the MSR_IA32_MCG_CAP register */
 #define MCG_CAP_COUNT           0x00000000000000ffULL
 #define MCG_CTL_P               0x0000000000000100ULL
-/* Bits 9-63 are reserved */
+#define MCG_EXT_P              (1UL<<9)
+#define MCG_EXT_CNT            (16)
+#define MCG_CMCI_P             (1UL<<10)
+/* Other bits are reserved */
 
 /* Bitfield of the MSR_IA32_MCG_STATUS register */
 #define MCG_STATUS_RIPV         0x0000000000000001ULL
@@ -70,3 +73,17 @@
 /* reserved bits */
 #define MCi_STATUS_OTHER_RESERVED2      0x0180000000000000ULL
 
+/*Intel Specific bitfield*/
+#define CMCI_THRESHOLD                 0x2
+
+
+#define MAX_NR_BANKS 128
+
+typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
+DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+/* Global variables */
+extern int mce_disabled;
+extern unsigned int nr_mce_banks;
+extern int firstbank;
+
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Dec 22 08:12:33 2008 +0000
@@ -2030,7 +2030,8 @@ static void vmx_do_extint(struct cpu_use
     fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
-#ifdef CONFIG_X86_MCE_P4THERMAL
+    fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs);
+#ifdef CONFIG_X86_MCE_THERMAL
     fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
 #endif
 
@@ -2060,10 +2061,13 @@ static void vmx_do_extint(struct cpu_use
     case ERROR_APIC_VECTOR:
         smp_error_interrupt(regs);
         break;
+    case CMCI_APIC_VECTOR:
+        smp_cmci_interrupt(regs);
+        break;
     case PMU_APIC_VECTOR:
         smp_pmu_apic_interrupt(regs);
         break;
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     case THERMAL_APIC_VECTOR:
         smp_thermal_interrupt(regs);
         break;
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/i8259.c      Mon Dec 22 08:12:33 2008 +0000
@@ -74,6 +74,7 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,S
 BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(cmci_interrupt, CMCI_APIC_VECTOR)
 
 #define IRQ(x,y) \
     IRQ##x##y##_interrupt
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/smpboot.c    Mon Dec 22 08:12:33 2008 +0000
@@ -1237,11 +1237,25 @@ remove_siblinginfo(int cpu)
 }
 
 extern void fixup_irqs(cpumask_t map);
-int __cpu_disable(void)
+
+/*
+ * Functions called when offline cpu. 
+ * We need to process some new feature such as 
+ * CMCI owner change when do cpu hotplug in latest 
+ * Intel CPU families
+*/
+void (*cpu_down_handler)(int down_cpu) = NULL;
+void (*cpu_down_rollback_handler)(int down_cpu) = NULL;
+
+
+int __cpu_disable(int down_cpu)
 {
        cpumask_t map = cpu_online_map;
        int cpu = smp_processor_id();
 
+       /*Only down_cpu need to execute this function*/
+       if (cpu != down_cpu)
+               return 0;
        /*
         * Perhaps use cpufreq to drop frequency, but that could go
         * into generic code.
@@ -1293,10 +1307,14 @@ void __cpu_die(unsigned int cpu)
        }
        printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
-
-static int take_cpu_down(void *unused)
-{
-    return __cpu_disable();
+static int take_cpu_down(void *down_cpu)
+{
+
+    if (cpu_down_handler)
+        cpu_down_handler(*(int *)down_cpu);
+    wmb();
+
+    return __cpu_disable(*(int *)down_cpu);
 }
 
 int cpu_down(unsigned int cpu)
@@ -1322,7 +1340,7 @@ int cpu_down(unsigned int cpu)
 
        printk("Prepare to bring CPU%d down...\n", cpu);
 
-       err = stop_machine_run(take_cpu_down, NULL, cpu);
+       err = stop_machine_run(take_cpu_down, &cpu, cpu_online_map);
        if ( err < 0 )
                goto out;
 
@@ -1333,6 +1351,10 @@ int cpu_down(unsigned int cpu)
                err = -EBUSY;
        }
 out:
+       /*if cpu_offline failed, re-check cmci_owner*/
+
+       if ( err < 0 && cpu_down_rollback_handler) 
+               cpu_down_rollback_handler(cpu); 
        spin_unlock(&cpu_add_remove_lock);
        return err;
 }
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/common/stop_machine.c
--- a/xen/common/stop_machine.c Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/common/stop_machine.c Mon Dec 22 08:12:33 2008 +0000
@@ -45,7 +45,7 @@ struct stopmachine_data {
     enum stopmachine_state state;
     atomic_t done;
 
-    unsigned int fn_cpu;
+    cpumask_t fn_cpus;
     int fn_result;
     int (*fn)(void *);
     void *fn_data;
@@ -63,21 +63,22 @@ static void stopmachine_set_state(enum s
         cpu_relax();
 }
 
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpus)
 {
     cpumask_t allbutself;
     unsigned int i, nr_cpus;
-    int ret;
+    int cur_cpu, ret;
 
     BUG_ON(!local_irq_is_enabled());
 
     allbutself = cpu_online_map;
-    cpu_clear(smp_processor_id(), allbutself);
+    cur_cpu = smp_processor_id();
+    cpu_clear(cur_cpu, allbutself);
     nr_cpus = cpus_weight(allbutself);
 
     if ( nr_cpus == 0 )
     {
-        BUG_ON(cpu != smp_processor_id());
+        BUG_ON(!cpu_isset(cur_cpu, cpus));
         return (*fn)(data);
     }
 
@@ -91,7 +92,8 @@ int stop_machine_run(int (*fn)(void *), 
     stopmachine_data.fn = fn;
     stopmachine_data.fn_data = data;
     stopmachine_data.nr_cpus = nr_cpus;
-    stopmachine_data.fn_cpu = cpu;
+    stopmachine_data.fn_cpus = cpus;
+    stopmachine_data.fn_result = 0;
     atomic_set(&stopmachine_data.done, 0);
     stopmachine_data.state = STOPMACHINE_START;
 
@@ -105,8 +107,13 @@ int stop_machine_run(int (*fn)(void *), 
     local_irq_disable();
     stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
 
-    if ( cpu == smp_processor_id() )
-        stopmachine_data.fn_result = (*fn)(data);
+    /* callback will run on each cpu of the input map.
+     * If callback fails on any CPU, the stop_machine_run
+     * will return the  *ORed* the failure
+     */
+    if ( cpu_isset(cur_cpu, cpus) ){
+        stopmachine_data.fn_result |= (*fn)(data);
+    }
     stopmachine_set_state(STOPMACHINE_INVOKE);
     ret = stopmachine_data.fn_result;
 
@@ -121,7 +128,6 @@ static void stopmachine_softirq(void)
 static void stopmachine_softirq(void)
 {
     enum stopmachine_state state = STOPMACHINE_START;
-
     smp_mb();
 
     while ( state != STOPMACHINE_EXIT )
@@ -136,10 +142,11 @@ static void stopmachine_softirq(void)
             local_irq_disable();
             break;
         case STOPMACHINE_INVOKE:
-            if ( stopmachine_data.fn_cpu == smp_processor_id() )
-                stopmachine_data.fn_result =
+            if ( cpu_isset(smp_processor_id(), stopmachine_data.fn_cpus )) {
+                stopmachine_data.fn_result |= 
                     stopmachine_data.fn(stopmachine_data.fn_data);
-            break;
+            }
+           break;
         default:
             break;
         }
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/apicdef.h
--- a/xen/include/asm-x86/apicdef.h     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/apicdef.h     Mon Dec 22 08:12:33 2008 +0000
@@ -80,6 +80,8 @@
 #define                APIC_LVTTHMR    0x330
 #define                APIC_LVTPC      0x340
 #define                APIC_LVT0       0x350
+#define                APIC_CMCI       0x2F0
+
 #define                        APIC_LVT_TIMER_BASE_MASK        (0x3<<18)
 #define                        GET_APIC_TIMER_BASE(x)          (((x)>>18)&0x3)
 #define                        SET_APIC_TIMER_BASE(x)          (((x)<<18))
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/config.h      Mon Dec 22 08:12:33 2008 +0000
@@ -22,7 +22,7 @@
 #define CONFIG_X86_IO_APIC 1
 #define CONFIG_X86_PM_TIMER 1
 #define CONFIG_HPET_TIMER 1
-#define CONFIG_X86_MCE_P4THERMAL 1
+#define CONFIG_X86_MCE_THERMAL 1
 #define CONFIG_NUMA 1
 #define CONFIG_DISCONTIGMEM 1
 #define CONFIG_NUMA_EMU 1
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/irq.h Mon Dec 22 08:12:33 2008 +0000
@@ -33,6 +33,7 @@ fastcall void pmu_apic_interrupt(void);
 fastcall void pmu_apic_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
+fastcall void cmci_interrupt(void);
 
 void disable_8259A_irq(unsigned int irq);
 void enable_8259A_irq(unsigned int irq);
diff -r 2dffa6ceb0af -r 4d5203f95498 
xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h    Fri Dec 19 14:56:36 
2008 +0000
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h    Mon Dec 22 08:12:33 
2008 +0000
@@ -10,13 +10,13 @@
 #define THERMAL_APIC_VECTOR    0xfa
 #define LOCAL_TIMER_VECTOR     0xf9
 #define PMU_APIC_VECTOR        0xf8
-
+#define CMCI_APIC_VECTOR       0xf7
 /*
  * High-priority dynamically-allocated vectors. For interrupts that
  * must be higher priority than any guest-bound interrupt.
  */
 #define FIRST_HIPRIORITY_VECTOR        0xf0
-#define LAST_HIPRIORITY_VECTOR  0xf7
+#define LAST_HIPRIORITY_VECTOR  0xf6
 
 /* Legacy PIC uses vectors 0xe0-0xef. */
 #define FIRST_LEGACY_VECTOR    0xe0
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/msr-index.h   Mon Dec 22 08:12:33 2008 +0000
@@ -92,8 +92,10 @@
 #define MSR_IA32_MC0_STATUS            0x00000401
 #define MSR_IA32_MC0_ADDR              0x00000402
 #define MSR_IA32_MC0_MISC              0x00000403
-
-#define MSR_IA32_MC1_CTL               0x00000404
+#define MSR_IA32_MC0_CTL2              0x00000280
+#define CMCI_EN                        (1UL<<30)
+#define CMCI_THRESHOLD_MASK            0x7FFF
+
 #define MSR_IA32_MC1_STATUS            0x00000405
 #define MSR_IA32_MC1_ADDR              0x00000406
 #define MSR_IA32_MC1_MISC              0x00000407
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/smp.h Mon Dec 22 08:12:33 2008 +0000
@@ -101,7 +101,7 @@ static __inline int logical_smp_processo
 
 #endif
 
-extern int __cpu_disable(void);
+extern int __cpu_disable(int down_cpu);
 extern void __cpu_die(unsigned int cpu);
 #endif /* !__ASSEMBLY__ */
 
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/public/arch-x86/xen-mca.h     Mon Dec 22 08:12:33 2008 +0000
@@ -106,7 +106,10 @@ struct mcinfo_common {
 
 #define MC_FLAG_CORRECTABLE     (1 << 0)
 #define MC_FLAG_UNCORRECTABLE   (1 << 1)
-
+#define MC_FLAG_RECOVERABLE    (1 << 2)
+#define MC_FLAG_POLLED         (1 << 3)
+#define MC_FLAG_RESET          (1 << 4)
+#define MC_FLAG_CMCI           (1 << 5)
 /* contains global x86 mc information */
 struct mcinfo_global {
     struct mcinfo_common common;
@@ -115,6 +118,7 @@ struct mcinfo_global {
     uint16_t mc_domid;
     uint32_t mc_socketid; /* physical socket of the physical core */
     uint16_t mc_coreid; /* physical impacted core */
+    uint8_t  mc_apicid;
     uint16_t mc_core_threadid; /* core thread of physical core */
     uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
     uint64_t mc_gstatus; /* global status */
@@ -132,6 +136,8 @@ struct mcinfo_bank {
     uint64_t mc_addr;   /* bank address, only valid
                          * if addr bit is set in mc_status */
     uint64_t mc_misc;
+    uint64_t mc_ctrl2;
+    uint64_t mc_tsc;
 };
 
 
@@ -150,7 +156,12 @@ struct mcinfo_extended {
      * multiple times. */
 
     uint32_t mc_msrs; /* Number of msr with valid values. */
-    struct mcinfo_msr mc_msr[5];
+    /*
+     * Currently Intel extended MSR (32/64) including all gp registers
+     * and E(R)DI, E(R)BP, E(R)SP, E(R)FLAGS, E(R)IP, E(R)MISC, only 10
+     * of them might be useful. So expend this array to 10.
+    */
+    struct mcinfo_msr mc_msr[10];
 };
 
 #define MCINFO_HYPERCALLSIZE   1024
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/xen/stop_machine.h
--- a/xen/include/xen/stop_machine.h    Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/xen/stop_machine.h    Mon Dec 22 08:12:33 2008 +0000
@@ -5,7 +5,7 @@
  * stop_machine_run: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS).
+ * @cpus: cpus to run @fn() on.
  *
  * Description: This causes every other cpu to enter a safe point, with
  * each of which disables interrupts, and finally interrupts are disabled
@@ -14,6 +14,6 @@
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpu);
 
 #endif /* __XEN_STOP_MACHINE_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Enable CMCI for Intel CPUs, Xen patchbot-unstable <=