[Xen-changelog] [xen-unstable] Enable CMCI for Intel CPUs

# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1229933553 0
# Node ID 4d5203f95498ff83b4fbcd48500c1d2d20b23f91
# Parent  2dffa6ceb0af954e7f3a9ad7e993b8aee7b7de65
Enable CMCI for Intel CPUs

Signed-off-by Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
Signed-off-by Liping Ke <liping.ke@xxxxxxxxx>
---
 xen/arch/x86/cpu/mcheck/p4.c                   |  270 ---------
 xen/arch/x86/cpu/mcheck/p6.c                   |  118 ----
 xen/arch/x86/apic.c                            |   33 +
 xen/arch/x86/cpu/mcheck/Makefile               |    3 
 xen/arch/x86/cpu/mcheck/k7.c                   |    1 
 xen/arch/x86/cpu/mcheck/mce.c                  |   31 -
 xen/arch/x86/cpu/mcheck/mce.h                  |   16 
 xen/arch/x86/cpu/mcheck/mce_intel.c            |  681 +++++++++++++++++++++++++
 xen/arch/x86/cpu/mcheck/non-fatal.c            |   25 
 xen/arch/x86/cpu/mcheck/x86_mca.h              |   19 
 xen/arch/x86/hvm/vmx/vmx.c                     |    8 
 xen/arch/x86/i8259.c                           |    1 
 xen/arch/x86/smpboot.c                         |   34 +
 xen/common/stop_machine.c                      |   31 -
 xen/include/asm-x86/apicdef.h                  |    2 
 xen/include/asm-x86/config.h                   |    2 
 xen/include/asm-x86/irq.h                      |    1 
 xen/include/asm-x86/mach-default/irq_vectors.h |    4 
 xen/include/asm-x86/msr-index.h                |    6 
 xen/include/asm-x86/smp.h                      |    2 
 xen/include/public/arch-x86/xen-mca.h          |   15 
 xen/include/xen/stop_machine.h                 |    4 
 22 files changed, 859 insertions(+), 448 deletions(-)

diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/apic.c       Mon Dec 22 08:12:33 2008 +0000
@@ -99,8 +99,11 @@ void __init apic_intr_init(void)
     /* Performance Counters Interrupt */
     set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
-    /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+    /* CMCI Correctable Machine Check Interrupt */
+    set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt);
+
+    /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/
+#ifdef CONFIG_X86_MCE_THERMAL
     set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
 }
@@ -172,12 +175,17 @@ void clear_local_APIC(void)
     }
 
 /* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5) {
         v = apic_read(APIC_LVTTHMR);
         apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
     }
 #endif
+
+    if (maxlvt >= 6) {
+        v = apic_read(APIC_CMCI);
+        apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED);
+    }
     /*
      * Clean APIC state for other OSs:
      */
@@ -189,10 +197,13 @@ void clear_local_APIC(void)
     if (maxlvt >= 4)
         apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
 
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5)
         apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
 #endif
+    if (maxlvt >= 6)
+        apic_write_around(APIC_CMCI, APIC_LVT_MASKED);
+
     v = GET_APIC_VERSION(apic_read(APIC_LVR));
     if (APIC_INTEGRATED(v)) {  /* !82489DX */
         if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
@@ -597,6 +608,7 @@ static struct {
     unsigned int apic_spiv;
     unsigned int apic_lvtt;
     unsigned int apic_lvtpc;
+    unsigned int apic_lvtcmci;
     unsigned int apic_lvt0;
     unsigned int apic_lvt1;
     unsigned int apic_lvterr;
@@ -608,7 +620,7 @@ int lapic_suspend(void)
 int lapic_suspend(void)
 {
     unsigned long flags;
-
+    int maxlvt = get_maxlvt();
     if (!apic_pm_state.active)
         return 0;
 
@@ -620,6 +632,11 @@ int lapic_suspend(void)
     apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
     apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
     apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+
+    if (maxlvt >= 6) {
+        apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
+    }
+
     apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
     apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
     apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
@@ -637,6 +654,7 @@ int lapic_resume(void)
 {
     unsigned int l, h;
     unsigned long flags;
+    int maxlvt = get_maxlvt();
 
     if (!apic_pm_state.active)
         return 0;
@@ -669,6 +687,11 @@ int lapic_resume(void)
     apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
     apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
     apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+
+    if (maxlvt >= 6) {
+        apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
+    }
+
     apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
     apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
     apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile  Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/Makefile  Mon Dec 22 08:12:33 2008 +0000
@@ -3,8 +3,7 @@ obj-y += amd_k8.o
 obj-y += amd_k8.o
 obj-y += amd_f10.o
 obj-y += mce.o
+obj-y += mce_intel.o
 obj-y += non-fatal.o
-obj-y += p4.o
 obj-$(x86_32) += p5.o
-obj-$(x86_32) += p6.o
 obj-$(x86_32) += winchip.o
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/k7.c
--- a/xen/arch/x86/cpu/mcheck/k7.c      Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/k7.c      Mon Dec 22 08:12:33 2008 +0000
@@ -14,6 +14,7 @@
 #include <asm/msr.h>
 
 #include "mce.h"
+#include "x86_mca.h"
 
 /* Machine Check Handler For AMD Athlon/Duron */
 static fastcall void k7_machine_check(struct cpu_user_regs * regs, long 
error_code)
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Mon Dec 22 08:12:33 2008 +0000
@@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks);      /* non-
  * to physical cpus present in the machine.
  * The more physical cpus are available, the more entries you need.
  */
-#define MAX_MCINFO     10
+#define MAX_MCINFO     20
 
 struct mc_machine_notify {
        struct mc_info mc;
@@ -110,6 +110,22 @@ static void amd_mcheck_init(struct cpuin
        }
 }
 
+/*check the existence of Machine Check*/
+int mce_available(struct cpuinfo_x86 *c)
+{
+       return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
+/*Make sure there are no machine check on offlined or suspended CPUs*/
+void mce_disable_cpu(void)
+{
+    if (!mce_available(&current_cpu_data) || mce_disabled == 1)
+         return;
+    printk(KERN_DEBUG "MCE: disable mce on CPU%d\n", smp_processor_id());
+    clear_in_cr4(X86_CR4_MCE);
+}
+
+
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
 {
@@ -135,11 +151,13 @@ void mcheck_init(struct cpuinfo_x86 *c)
 #ifndef CONFIG_X86_64
                if (c->x86==5)
                        intel_p5_mcheck_init(c);
-               if (c->x86==6)
-                       intel_p6_mcheck_init(c);
 #endif
-               if (c->x86==15)
-                       intel_p4_mcheck_init(c);
+               /*If it is P6 or P4 family, including CORE 2 DUO series*/
+               if (c->x86 == 6 || c->x86==15)
+               {
+                       printk(KERN_DEBUG "MCE: Intel newly family MC Init\n");
+                       intel_mcheck_init(c);
+               }
                break;
 
 #ifndef CONFIG_X86_64
@@ -413,7 +431,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                if (mic == NULL)
                        return;
                if (mic->type != MC_TYPE_BANK)
-                       continue;
+                       goto next;
 
                mc_bank = (struct mcinfo_bank *)mic;
        
@@ -426,6 +444,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                        printk(" at %16"PRIx64, mc_bank->mc_addr);
 
                printk("\n");
+next:
                mic = x86_mcinfo_next(mic); /* next entry */
                if ((mic == NULL) || (mic->size == 0))
                        break;
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Mon Dec 22 08:12:33 2008 +0000
@@ -1,14 +1,22 @@
 #include <xen/init.h>
+#include <asm/types.h>
 #include <asm/traps.h>
+#include <asm/atomic.h>
+#include <asm/percpu.h>
+
 
 /* Init functions */
 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k7_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k8_mcheck_init(struct cpuinfo_x86 *c);
 void amd_f10_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c);
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void intel_mcheck_init(struct cpuinfo_x86 *c);
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
 /* Function pointer used in the handlers to collect additional information
@@ -19,6 +27,7 @@ extern int (*mc_callback_bank_extended)(
                uint16_t bank, uint64_t status);
 
 
+int mce_available(struct cpuinfo_x86 *c);
 /* Helper functions used for collecting error telemetry */
 struct mc_info *x86_mcinfo_getptr(void);
 void x86_mcinfo_clear(struct mc_info *mi);
@@ -26,6 +35,3 @@ void x86_mcinfo_dump(struct mc_info *mi)
 void x86_mcinfo_dump(struct mc_info *mi);
 void mc_panic(char *s);
 
-/* Global variables */
-extern int mce_disabled;
-extern unsigned int nr_mce_banks;
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/mce_intel.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Mon Dec 22 08:12:33 2008 +0000
@@ -0,0 +1,681 @@
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+static int nr_intel_ext_msrs = 0;
+static int cmci_support = 0;
+extern int firstbank;
+
+#ifdef CONFIG_X86_MCE_THERMAL
+static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
+{      
+    printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
+                smp_processor_id());
+    add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    u32 l, h;
+    unsigned int cpu = smp_processor_id();
+    static s_time_t next[NR_CPUS];
+
+    ack_APIC_irq();
+    if (NOW() < next[cpu])
+        return;
+
+    next[cpu] = NOW() + MILLISECS(5000);
+    rdmsr(MSR_IA32_THERM_STATUS, l, h);
+    if (l & 0x1) {
+        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
+        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
+                cpu);
+        add_taint(TAINT_MACHINE_CHECK);
+    } else {
+        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+    }
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) 
+        = unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    irq_enter();
+    vendor_thermal_interrupt(regs);
+    irq_exit();
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+    int tm2 = 0;
+    unsigned int cpu = smp_processor_id();
+
+    /* Thermal monitoring */
+    if (!cpu_has(c, X86_FEATURE_ACPI))
+        return;        /* -ENODEV */
+
+    /* Clock modulation */
+    if (!cpu_has(c, X86_FEATURE_ACC))
+        return;        /* -ENODEV */
+
+    /* first check if its enabled already, in which case there might
+     * be some SMM goo which handles it, so we can't even put a handler
+     * since it might be delivered via SMI already -zwanem.
+     */
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    h = apic_read(APIC_LVTTHMR);
+    if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+        printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu);
+        return; /* -EBUSY */
+    }
+
+    if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+        tm2 = 1;
+
+       /* check whether a vector already exists, temporarily masked? */
+    if (h & APIC_VECTOR_MASK) {
+        printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already 
installed\n",
+                 cpu, (h & APIC_VECTOR_MASK));
+        return; /* -EBUSY */
+    }
+
+    /* The temperature transition interrupt handler setup */
+    h = THERMAL_APIC_VECTOR;           /* our delivery vector */
+    h |= (APIC_DM_FIXED | APIC_LVT_MASKED);    /* we'll mask till we're ready 
*/
+    apic_write_around(APIC_LVTTHMR, h);
+
+    rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+    wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+    /* ok we're good to go... */
+    vendor_thermal_interrupt = intel_thermal_interrupt;
+
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+    l = apic_read (APIC_LVTTHMR);
+    apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+    printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 
+            cpu, tm2 ? "TM2" : "TM1");
+    return;
+}
+#endif /* CONFIG_X86_MCE_THERMAL */
+
+static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext)
+{
+    if (nr_intel_ext_msrs == 0)
+        return;
+
+       /*this function will called when CAP(9).MCG_EXT_P = 1*/
+    memset(mc_ext, 0, sizeof(struct mcinfo_extended));
+    mc_ext->common.type = MC_TYPE_EXTENDED;
+    mc_ext->common.size = sizeof(mc_ext);
+    mc_ext->mc_msrs = 10;
+
+    mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX;
+    rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value);
+    mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX;
+    rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value);
+    mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX;
+    rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value);
+
+    mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX;
+    rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value);
+    mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI;
+    rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value);
+    mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI;
+    rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value);
+
+    mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP;
+    rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value);
+    mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP;
+    rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value);
+    mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS;
+    rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value);
+    mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP;
+    rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value);
+}
+
+/* machine_check_poll might be called by following types:
+ * 1. called when do mcheck_init.
+ * 2. called in cmci interrupt handler
+ * 3. called in polling handler
+ * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
+ * consumer.
+*/
+static int machine_check_poll(struct mc_info *mi, int calltype)
+{
+    int exceptions = (read_cr4() & X86_CR4_MCE);
+    int i, nr_unit = 0, uc = 0, pcc = 0;
+    uint64_t status, addr;
+    struct mcinfo_global mcg;
+    struct mcinfo_extended mce;
+    unsigned int cpu;
+    struct domain *d;
+
+    cpu = smp_processor_id();
+
+    if (!mi) {
+        printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n");
+        return 0;
+    }
+    x86_mcinfo_clear(mi);
+
+    memset(&mcg, 0, sizeof(mcg));
+    mcg.common.type = MC_TYPE_GLOBAL;
+    mcg.common.size = sizeof(mcg);
+    /*If called from cpu-reset check, don't need to fill them.
+     *If called from cmci context, we'll try to fill domid by memory addr
+    */
+    mcg.mc_domid = -1;
+    mcg.mc_vcpuid = -1;
+    if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
+        mcg.mc_flags = MC_FLAG_POLLED;
+    else if (calltype == MC_FLAG_CMCI)
+        mcg.mc_flags = MC_FLAG_CMCI;
+    mcg.mc_socketid = phys_proc_id[cpu];
+    mcg.mc_coreid = cpu_core_id[cpu];
+    mcg.mc_apicid = cpu_physical_id(cpu);
+    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
+    rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
+
+    for ( i = 0; i < nr_mce_banks; i++ ) {
+        struct mcinfo_bank mcb;
+        /*For CMCI, only owners checks the owned MSRs*/
+        if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
+                       (calltype & MC_FLAG_CMCI) )
+            continue;
+        rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
+
+        if (! (status & MCi_STATUS_VAL) )
+            continue;
+        /*
+         * Uncorrected events are handled by the exception
+         * handler when it is enabled. But when the exception
+         * is disabled such as when mcheck_init, log everything.
+         */
+        if ((status & MCi_STATUS_UC) && exceptions)
+            continue;
+
+        if (status & MCi_STATUS_UC)
+            uc = 1;
+        if (status & MCi_STATUS_PCC)
+            pcc = 1;
+
+        memset(&mcb, 0, sizeof(mcb));
+        mcb.common.type = MC_TYPE_BANK;
+        mcb.common.size = sizeof(mcb);
+        mcb.mc_bank = i;
+        mcb.mc_status = status;
+        if (status & MCi_STATUS_MISCV)
+            rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc);
+        if (status & MCi_STATUS_ADDRV) {
+            rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
+            d = maddr_get_owner(addr);
+            if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) 
)
+                mcb.mc_domid = d->domain_id;
+        }
+        if (cmci_support)
+            rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
+        if (calltype == MC_FLAG_CMCI)
+            rdtscll(mcb.mc_tsc);
+        x86_mcinfo_add(mi, &mcb);
+        nr_unit++;
+        add_taint(TAINT_MACHINE_CHECK);
+        /*Clear state for this bank */
+        wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
+        printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%lx]\n", 
+                i, cpu, status);
+        printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], "
+                "thread[%d]\n", cpu, mcg.mc_socketid, 
+                mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
+ 
+    }
+    /*if pcc = 1, uc must be 1*/
+    if (pcc)
+        mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
+    else if (uc)
+        mcg.mc_flags |= MC_FLAG_RECOVERABLE;
+    else /*correctable*/
+        mcg.mc_flags |= MC_FLAG_CORRECTABLE;
+
+    if (nr_unit && nr_intel_ext_msrs && 
+                    (mcg.mc_gstatus & MCG_STATUS_EIPV)) {
+        intel_get_extended_msrs(&mce);
+        x86_mcinfo_add(mi, &mce);
+    }
+    if (nr_unit) 
+        x86_mcinfo_add(mi, &mcg);
+    /*Clear global state*/
+    return nr_unit;
+}
+
+static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
+{
+    /* MACHINE CHECK Error handler will be sent in another patch,
+     * simply copy old solutions here. This code will be replaced
+     * by upcoming machine check patches
+     */
+
+    int recover=1;
+    u32 alow, ahigh, high, low;
+    u32 mcgstl, mcgsth;
+    int i;
+   
+    rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+    if (mcgstl & (1<<0))       /* Recoverable ? */
+       recover=0;
+    
+    printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+       smp_processor_id(), mcgsth, mcgstl);
+    
+    for (i=0; i<nr_mce_banks; i++) {
+       rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+       if (high & (1<<31)) {
+               if (high & (1<<29))
+                       recover |= 1;
+               if (high & (1<<25))
+                       recover |= 2;
+               printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+               high &= ~(1<<31);
+               if (high & (1<<27)) {
+                       rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                       printk ("[%08x%08x]", ahigh, alow);
+               }
+               if (high & (1<<26)) {
+                       rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                       printk (" at %08x%08x", ahigh, alow);
+               }
+               printk ("\n");
+       }
+    }
+    
+    if (recover & 2)
+       mc_panic ("CPU context corrupt");
+    if (recover & 1)
+       mc_panic ("Unable to continue");
+    
+    printk(KERN_EMERG "Attempting to continue.\n");
+    /* 
+     * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+     * recoverable/continuable.This will allow BIOS to look at the MSRs
+     * for errors if the OS could not log the error.
+     */
+    for (i=0; i<nr_mce_banks; i++) {
+       u32 msr;
+       msr = MSR_IA32_MC0_STATUS+i*4;
+       rdmsr (msr, low, high);
+       if (high&(1<<31)) {
+               /* Clear it */
+               wrmsr(msr, 0UL, 0UL);
+               /* Serialize */
+               wmb();
+               add_taint(TAINT_MACHINE_CHECK);
+       }
+    }
+    mcgstl &= ~(1<<2);
+    wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+extern void (*cpu_down_handler)(int down_cpu);
+extern void (*cpu_down_rollback_handler)(int down_cpu);
+extern void mce_disable_cpu(void);
+static bool_t cmci_clear_lock = 0;
+static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
+
+/*
+ * Discover bank sharing using the algorithm recommended in the SDM.
+ */
+static int do_cmci_discover(int i)
+{
+    unsigned msr = MSR_IA32_MC0_CTL2 + i;
+    u64 val;
+
+    rdmsrl(msr, val);
+    /* Some other CPU already owns this bank. */
+    if (val & CMCI_EN) {
+       clear_bit(i, __get_cpu_var(mce_banks_owned));
+       goto out;
+    }
+    wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
+    rdmsrl(msr, val);
+
+    if (!(val & CMCI_EN)) {
+     /*
+      * This bank does not support CMCI. The polling
+      * timer has to handle it. 
+      */
+       set_bit(i, __get_cpu_var(no_cmci_banks));
+       return 0;
+    }
+    set_bit(i, __get_cpu_var(mce_banks_owned));
+out:
+    clear_bit(i, __get_cpu_var(no_cmci_banks));
+    return 1;
+}
+
+void cmci_discover(void)
+{
+    int i;
+
+    printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id());
+    spin_lock(&cmci_discover_lock);
+    for (i = 0; i < nr_mce_banks; i++) {
+        /*If the cpu is the bank owner, need not re-discover*/
+        if (test_bit(i, __get_cpu_var(mce_banks_owned)))
+            continue;
+        do_cmci_discover(i);
+    }
+    spin_unlock(&cmci_discover_lock);
+    printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", 
+            smp_processor_id(), 
+            *((unsigned long *)__get_cpu_var(mce_banks_owned)), 
+            *((unsigned long *)__get_cpu_var(no_cmci_banks)));
+}
+
+/*
+ * Define an owner for each bank. Banks can be shared between CPUs
+ * and to avoid reporting events multiple times always set up one
+ * CPU as owner. 
+ *
+ * The assignment has to be redone when CPUs go offline and
+ * any of the owners goes away. Also pollers run in parallel so we
+ * have to be careful to update the banks in a way that doesn't
+ * lose or duplicate events.
+ */
+
+static void mce_set_owner(void)
+{
+
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    cmci_discover();
+}
+
+static void clear_cmci(void)
+{
+    int i;
+
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n", 
+            smp_processor_id());
+
+    for (i = 0; i < nr_mce_banks; i++) {
+        unsigned msr = MSR_IA32_MC0_CTL2 + i;
+        u64 val;
+        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+            continue;
+        rdmsrl(msr, val);
+        if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
+            wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
+        clear_bit(i, __get_cpu_var(mce_banks_owned));
+    }
+}
+
+/*we need to re-set cmci owners when cpu_down fail or cpu_up*/
+static void cmci_reenable_cpu(void *h)
+{
+    if (!mce_available(&current_cpu_data) || mce_disabled == 1)
+         return;
+    printk(KERN_DEBUG "CMCI: reenable mce on CPU%d\n", smp_processor_id());
+    mce_set_owner();
+    set_in_cr4(X86_CR4_MCE);
+}
+
+/* When take cpu_down, we need to execute the impacted cmci_owner judge 
algorithm 
+ * First, we need to clear the ownership on the dead CPU
+ * Then,  other CPUs will check whether to take the bank's ownership from 
down_cpu
+ * CPU0 need not and "never" execute this path
+*/
+void  __cpu_clear_cmci( int down_cpu)
+{
+    int cpu = smp_processor_id();
+
+    if (!cmci_support && mce_disabled == 1)
+        return;
+
+    if (cpu == 0) {
+        printk(KERN_DEBUG "CMCI: CPU0 need not be cleared\n");
+        return;
+    }
+
+    local_irq_disable();
+    if (cpu == down_cpu){
+        mce_disable_cpu();
+        clear_cmci();
+        wmb();
+        test_and_set_bool(cmci_clear_lock);
+        return;
+    }
+    while (!cmci_clear_lock)
+        cpu_relax();
+    if (cpu != down_cpu)
+        mce_set_owner();
+
+    test_and_clear_bool(cmci_clear_lock);
+    local_irq_enable();
+
+}
+
+void  __cpu_clear_cmci_rollback( int down_cpu)
+{
+    cpumask_t down_map;
+    if (!cmci_support || mce_disabled == 1) 
+        return;
+
+    cpus_clear(down_map);
+    cpu_set(down_cpu, down_map);
+    printk(KERN_ERR "CMCI: cpu_down fail. "
+        "Reenable cmci on CPU%d\n", down_cpu);
+    on_selected_cpus(down_map, cmci_reenable_cpu, NULL, 1, 1);
+}
+
+static void intel_init_cmci(struct cpuinfo_x86 *c)
+{
+    u32 l, apic;
+    int cpu = smp_processor_id();
+
+    if (!mce_available(c) || !cmci_support) {
+        printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu);
+        return;
+    }
+
+    apic = apic_read(APIC_CMCI);
+    if ( apic & APIC_VECTOR_MASK )
+    {
+        printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n",
+            cpu, ( apic & APIC_VECTOR_MASK ));
+        return;
+    }
+
+    apic = CMCI_APIC_VECTOR;
+    apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+    apic_write_around(APIC_CMCI, apic);
+
+       /*now clear mask flag*/
+    l = apic_read(APIC_CMCI);
+    apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
+    cpu_down_handler =  __cpu_clear_cmci;
+    cpu_down_rollback_handler = __cpu_clear_cmci_rollback; 
+}
+
+fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    int cpu = smp_processor_id();
+
+    ack_APIC_irq();
+    irq_enter();
+    printk(KERN_DEBUG "CMCI: cmci_intr happen on CPU%d\n", cpu);
+    nr_unit = machine_check_poll(mi, MC_FLAG_CMCI);
+    if (nr_unit) {
+        x86_mcinfo_dump(mi);
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+    irq_exit();
+}
+
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+
+#ifdef CONFIG_X86_MCE_THERMAL
+    intel_init_thermal(c);
+#endif
+    intel_init_cmci(c);
+}
+
+static void mce_cap_init(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if ((l & MCG_CMCI_P) && cpu_has_apic)
+        cmci_support = 1;
+
+    nr_mce_banks = l & 0xff;
+    if (nr_mce_banks > MAX_NR_BANKS)
+        printk(KERN_WARNING "MCE: exceed max mce banks\n");
+    if (l & MCG_EXT_P)
+    {
+        nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
+        printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n",
+            smp_processor_id(), nr_intel_ext_msrs);
+    }
+    /* for most of p6 family, bank 0 is an alias bios MSR.
+     * But after model>1a, bank 0 is available*/
+    if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL
+            && c->x86_model < 0x1A)
+        firstbank = 1;
+    else
+        firstbank = 0;
+}
+
+static void mce_init(void)
+{
+    u32 l, h;
+    int i, nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    clear_in_cr4(X86_CR4_MCE);
+    /* log the machine checks left over from the previous reset.
+     * This also clears all registers*/
+
+    nr_unit = machine_check_poll(mi, MC_FLAG_RESET);
+    /*in the boot up stage, not expect inject to DOM0, but go print out
+    */
+    if (nr_unit > 0)
+        x86_mcinfo_dump(mi);
+
+    set_in_cr4(X86_CR4_MCE);
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if (l & MCG_CTL_P) /* Control register present ? */
+        wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+    for (i = firstbank; i < nr_mce_banks; i++)
+    {
+        /*Some banks are shared across cores, use MCi_CTRL to judge whether
+         * this bank has been initialized by other cores already.*/
+        rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
+        if (!l & !h)
+        {
+            /*if ctl is 0, this bank is never initialized*/
+            printk(KERN_DEBUG "mce_init: init bank%d\n", i);
+            wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
+            wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
+       }
+    }
+    if (firstbank) /*if cmci enabled, firstbank = 0*/
+        wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+}
+
+/*p4/p6 faimily has similar MCA initialization process*/
+void intel_mcheck_init(struct cpuinfo_x86 *c)
+{
+       
+       mce_cap_init(c);
+       printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+               smp_processor_id());
+       /* machine check is available */
+       machine_check_vector = intel_machine_check;
+       mce_init();
+       mce_intel_feature_init(c);
+       mce_set_owner();
+}
+
+/*
+ * Periodic polling timer for "silent" machine check errors. If the
+ * poller finds an MCE, poll faster. When the poller finds no more 
+ * errors, poll slower
+*/
+static struct timer mce_timer;
+
+#define MCE_PERIOD 4000
+#define MCE_MIN    2000
+#define MCE_MAX    32000
+
+static u64 period = MCE_PERIOD;
+static int adjust = 0;
+
+static void mce_intel_checkregs(void *info)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+
+    if( !mce_available(&current_cpu_data))
+        return;
+    nr_unit = machine_check_poll(mi, MC_FLAG_POLLED);
+    if (nr_unit)
+    {
+        x86_mcinfo_dump(mi);
+        adjust++;
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+}
+
+static void mce_intel_work_fn(void *data)
+{
+    on_each_cpu(mce_intel_checkregs, data, 1, 1);
+    if (adjust) {
+        period = period / (adjust + 1);
+        printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval to %ld",
+            period);
+    }
+    else {
+        period *= 2;
+    }
+    if (period > MCE_MAX) 
+        period = MCE_MAX;
+    if (period < MCE_MIN)
+        period = MCE_MIN;
+    set_timer(&mce_timer, NOW() + MILLISECS(period));
+    adjust = 0;
+}
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c)
+{
+    printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n");
+    init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+    set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD));
+}
+
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c       Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c       Mon Dec 22 08:12:33 2008 +0000
@@ -19,8 +19,8 @@
 #include <asm/msr.h>
 
 #include "mce.h"
-
-static int firstbank;
+#include "x86_mca.h"
+int firstbank = 0;
 static struct timer mce_timer;
 
 #define MCE_PERIOD MILLISECS(15000)
@@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_chec
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
        /* Check for MCE support */
-       if (!cpu_has(c, X86_FEATURE_MCE))
+       if (!mce_available(c))
                return -ENODEV;
-
-       /* Check for PPro style MCA */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return -ENODEV;
-
        /*
         * Check for non-fatal errors every MCE_RATE s
         */
@@ -85,12 +80,20 @@ static int __init init_nonfatal_mce_chec
                break;
 
        case X86_VENDOR_INTEL:
-               init_timer(&mce_timer, mce_work_fn, NULL, 0);
-               set_timer(&mce_timer, NOW() + MCE_PERIOD);
+               /* p5 family is different. P4/P6 and latest CPUs shares the
+                * same polling methods
+               */
+               if ( c->x86 != 5 )
+               {
+                       /* some CPUs or banks don't support cmci, we need to 
+                        * enable this feature anyway
+                        */
+                       intel_mcheck_timer(c);
+               }
                break;
        }
 
-       printk(KERN_INFO "MCA: Machine check polling timer started.\n");
+       printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
        return 0;
 }
 __initcall(init_nonfatal_mce_checker);
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/p4.c
--- a/xen/arch/x86/cpu/mcheck/p4.c      Fri Dec 19 14:56:36 2008 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,270 +0,0 @@
-/*
- * P4 specific Machine Check Exception Reporting
- */
-
-#include <xen/init.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/config.h>
-#include <xen/smp.h>
-#include <xen/irq.h>
-#include <xen/time.h>
-#include <asm/processor.h> 
-#include <asm/system.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-
-#include "mce.h"
-
-/* as supported by the P4/Xeon family */
-struct intel_mce_extended_msrs {
-       u32 eax;
-       u32 ebx;
-       u32 ecx;
-       u32 edx;
-       u32 esi;
-       u32 edi;
-       u32 ebp;
-       u32 esp;
-       u32 eflags;
-       u32 eip;
-       /* u32 *reserved[]; */
-};
-
-static int mce_num_extended_msrs = 0;
-
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
-{      
-       printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
-                       smp_processor_id());
-       add_taint(TAINT_MACHINE_CHECK);
-}
-
-/* P4/Xeon Thermal transition interrupt handler */
-static void intel_thermal_interrupt(struct cpu_user_regs *regs)
-{
-       u32 l, h;
-       unsigned int cpu = smp_processor_id();
-       static s_time_t next[NR_CPUS];
-
-       ack_APIC_irq();
-
-       if (NOW() < next[cpu])
-               return;
-
-       next[cpu] = NOW() + MILLISECS(5000);
-       rdmsr(MSR_IA32_THERM_STATUS, l, h);
-       if (l & 0x1) {
-               printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
-               printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-                               cpu);
-               add_taint(TAINT_MACHINE_CHECK);
-       } else {
-               printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
-       }
-}
-
-/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) = 
unexpected_thermal_interrupt;
-
-fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
-{
-       irq_enter();
-       vendor_thermal_interrupt(regs);
-       irq_exit();
-}
-
-/* P4/Xeon Thermal regulation detect and init */
-static void intel_init_thermal(struct cpuinfo_x86 *c)
-{
-       u32 l, h;
-       unsigned int cpu = smp_processor_id();
-
-       /* Thermal monitoring */
-       if (!cpu_has(c, X86_FEATURE_ACPI))
-               return; /* -ENODEV */
-
-       /* Clock modulation */
-       if (!cpu_has(c, X86_FEATURE_ACC))
-               return; /* -ENODEV */
-
-       /* first check if its enabled already, in which case there might
-        * be some SMM goo which handles it, so we can't even put a handler
-        * since it might be delivered via SMI already -zwanem.
-        */
-       rdmsr (MSR_IA32_MISC_ENABLE, l, h);
-       h = apic_read(APIC_LVTTHMR);
-       if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
-               printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
-                               cpu);
-               return; /* -EBUSY */
-       }
-
-       /* check whether a vector already exists, temporarily masked? */        
-       if (h & APIC_VECTOR_MASK) {
-               printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
-                               "installed\n",
-                       cpu, (h & APIC_VECTOR_MASK));
-               return; /* -EBUSY */
-       }
-
-       /* The temperature transition interrupt handler setup */
-       h = THERMAL_APIC_VECTOR;                /* our delivery vector */
-       h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready 
*/
-       apic_write_around(APIC_LVTTHMR, h);
-
-       rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
-       wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
-
-       /* ok we're good to go... */
-       vendor_thermal_interrupt = intel_thermal_interrupt;
-       
-       rdmsr (MSR_IA32_MISC_ENABLE, l, h);
-       wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
-       
-       l = apic_read (APIC_LVTTHMR);
-       apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-       printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
-       return;
-}
-#endif /* CONFIG_X86_MCE_P4THERMAL */
-
-
-/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
-{
-       u32 h;
-
-       if (mce_num_extended_msrs == 0)
-               goto done;
-
-       rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
-       rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
-       rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
-       rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
-       rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
-       rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
-       rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
-       rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
-       rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
-       rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
-
-       /* can we rely on kmalloc to do a dynamic
-        * allocation for the reserved registers?
-        */
-done:
-       return mce_num_extended_msrs;
-}
-
-static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
-{
-       int recover=1;
-       u32 alow, ahigh, high, low;
-       u32 mcgstl, mcgsth;
-       int i;
-       struct intel_mce_extended_msrs dbg;
-
-       rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-       if (mcgstl & (1<<0))    /* Recoverable ? */
-               recover=0;
-
-       printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
-               smp_processor_id(), mcgsth, mcgstl);
-
-       if (intel_get_extended_msrs(&dbg)) {
-               printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
-                       smp_processor_id(), dbg.eip, dbg.eflags);
-               printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: 
%08x\n",
-                       dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
-               printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: 
%08x\n",
-                       dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
-       }
-
-       for (i=0; i<nr_mce_banks; i++) {
-               rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
-               if (high & (1<<31)) {
-                       if (high & (1<<29))
-                               recover |= 1;
-                       if (high & (1<<25))
-                               recover |= 2;
-                       printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
-                       high &= ~(1<<31);
-                       if (high & (1<<27)) {
-                               rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-                               printk ("[%08x%08x]", ahigh, alow);
-                       }
-                       if (high & (1<<26)) {
-                               rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-                               printk (" at %08x%08x", ahigh, alow);
-                       }
-                       printk ("\n");
-               }
-       }
-
-       if (recover & 2)
-               mc_panic ("CPU context corrupt");
-       if (recover & 1)
-               mc_panic ("Unable to continue");
-
-       printk(KERN_EMERG "Attempting to continue.\n");
-       /* 
-        * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
-        * recoverable/continuable.This will allow BIOS to look at the MSRs
-        * for errors if the OS could not log the error.
-        */
-       for (i=0; i<nr_mce_banks; i++) {
-               u32 msr;
-               msr = MSR_IA32_MC0_STATUS+i*4;
-               rdmsr (msr, low, high);
-               if (high&(1<<31)) {
-                       /* Clear it */
-                       wrmsr(msr, 0UL, 0UL);
-                       /* Serialize */
-                       wmb();
-                       add_taint(TAINT_MACHINE_CHECK);
-               }
-       }
-       mcgstl &= ~(1<<2);
-       wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
-{
-       u32 l, h;
-       int i;
-       
-       machine_check_vector = intel_machine_check;
-       wmb();
-
-       printk (KERN_INFO "Intel machine check architecture supported.\n");
-       rdmsr (MSR_IA32_MCG_CAP, l, h);
-       if (l & (1<<8)) /* Control register present ? */
-               wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-       nr_mce_banks = l & 0xff;
-
-       for (i=0; i<nr_mce_banks; i++) {
-               wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-               wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-       }
-
-       set_in_cr4 (X86_CR4_MCE);
-       printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
-               smp_processor_id());
-
-       /* Check for P4/Xeon extended MCE MSRs */
-       rdmsr (MSR_IA32_MCG_CAP, l, h);
-       if (l & (1<<9)) {/* MCG_EXT_P */
-               mce_num_extended_msrs = (l >> 16) & 0xff;
-               printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
-                               " available\n",
-                       smp_processor_id(), mce_num_extended_msrs);
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-               /* Check for P4/Xeon Thermal monitor */
-               intel_init_thermal(c);
-#endif
-       }
-}
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/p6.c
--- a/xen/arch/x86/cpu/mcheck/p6.c      Fri Dec 19 14:56:36 2008 +0000
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,118 +0,0 @@
-/*
- * P6 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@xxxxxxxxxx>
- */
-
-#include <xen/init.h>
-#include <xen/types.h>
-#include <xen/kernel.h>
-#include <xen/smp.h>
-
-#include <asm/processor.h> 
-#include <asm/system.h>
-#include <asm/msr.h>
-
-#include "mce.h"
-
-/* Machine Check Handler For PII/PIII */
-static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
-{
-       int recover=1;
-       u32 alow, ahigh, high, low;
-       u32 mcgstl, mcgsth;
-       int i;
-
-       rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-       if (mcgstl & (1<<0))    /* Recoverable ? */
-               recover=0;
-
-       printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
-               smp_processor_id(), mcgsth, mcgstl);
-
-       for (i=0; i<nr_mce_banks; i++) {
-               rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
-               if (high & (1<<31)) {
-                       if (high & (1<<29))
-                               recover |= 1;
-                       if (high & (1<<25))
-                               recover |= 2;
-                       printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
-                       high &= ~(1<<31);
-                       if (high & (1<<27)) {
-                               rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-                               printk ("[%08x%08x]", ahigh, alow);
-                       }
-                       if (high & (1<<26)) {
-                               rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-                               printk (" at %08x%08x", ahigh, alow);
-                       }
-                       printk ("\n");
-               }
-       }
-
-       if (recover & 2)
-               mc_panic ("CPU context corrupt");
-       if (recover & 1)
-               mc_panic ("Unable to continue");
-
-       printk (KERN_EMERG "Attempting to continue.\n");
-       /* 
-        * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
-        * recoverable/continuable.This will allow BIOS to look at the MSRs
-        * for errors if the OS could not log the error.
-        */
-       for (i=0; i<nr_mce_banks; i++) {
-               unsigned int msr;
-               msr = MSR_IA32_MC0_STATUS+i*4;
-               rdmsr (msr,low, high);
-               if (high & (1<<31)) {
-                       /* Clear it */
-                       wrmsr (msr, 0UL, 0UL);
-                       /* Serialize */
-                       wmb();
-                       add_taint(TAINT_MACHINE_CHECK);
-               }
-       }
-       mcgstl &= ~(1<<2);
-       wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
-/* Set up machine check reporting for processors with Intel style MCE */
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
-{
-       u32 l, h;
-       int i;
-       
-       /* Check for MCE support */
-       if (!cpu_has(c, X86_FEATURE_MCE))
-               return;
-
-       /* Check for PPro style MCA */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return;
-
-       /* Ok machine check is available */
-       machine_check_vector = intel_machine_check;
-       wmb();
-
-       printk (KERN_INFO "Intel machine check architecture supported.\n");
-       rdmsr (MSR_IA32_MCG_CAP, l, h);
-       if (l & (1<<8)) /* Control register present ? */
-               wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-       nr_mce_banks = l & 0xff;
-
-       /*
-        * Following the example in IA-32 SDM Vol 3:
-        * - MC0_CTL should not be written
-        * - Status registers on all banks should be cleared on reset
-        */
-       for (i=1; i<nr_mce_banks; i++)
-               wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-
-       for (i=0; i<nr_mce_banks; i++)
-               wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-
-       set_in_cr4 (X86_CR4_MCE);
-       printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
-               smp_processor_id());
-}
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Dec 22 08:12:33 2008 +0000
@@ -28,7 +28,10 @@
 /* Bitfield of the MSR_IA32_MCG_CAP register */
 #define MCG_CAP_COUNT           0x00000000000000ffULL
 #define MCG_CTL_P               0x0000000000000100ULL
-/* Bits 9-63 are reserved */
+#define MCG_EXT_P              (1UL<<9)
+#define MCG_EXT_CNT            (16)
+#define MCG_CMCI_P             (1UL<<10)
+/* Other bits are reserved */
 
 /* Bitfield of the MSR_IA32_MCG_STATUS register */
 #define MCG_STATUS_RIPV         0x0000000000000001ULL
@@ -70,3 +73,17 @@
 /* reserved bits */
 #define MCi_STATUS_OTHER_RESERVED2      0x0180000000000000ULL
 
+/*Intel Specific bitfield*/
+#define CMCI_THRESHOLD                 0x2
+
+
+#define MAX_NR_BANKS 128
+
+typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
+DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+/* Global variables */
+extern int mce_disabled;
+extern unsigned int nr_mce_banks;
+extern int firstbank;
+
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Dec 22 08:12:33 2008 +0000
@@ -2030,7 +2030,8 @@ static void vmx_do_extint(struct cpu_use
     fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
-#ifdef CONFIG_X86_MCE_P4THERMAL
+    fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs);
+#ifdef CONFIG_X86_MCE_THERMAL
     fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
 #endif
 
@@ -2060,10 +2061,13 @@ static void vmx_do_extint(struct cpu_use
     case ERROR_APIC_VECTOR:
         smp_error_interrupt(regs);
         break;
+    case CMCI_APIC_VECTOR:
+        smp_cmci_interrupt(regs);
+        break;
     case PMU_APIC_VECTOR:
         smp_pmu_apic_interrupt(regs);
         break;
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     case THERMAL_APIC_VECTOR:
         smp_thermal_interrupt(regs);
         break;
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/i8259.c      Mon Dec 22 08:12:33 2008 +0000
@@ -74,6 +74,7 @@ BUILD_SMP_INTERRUPT(spurious_interrupt,S
 BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(cmci_interrupt, CMCI_APIC_VECTOR)
 
 #define IRQ(x,y) \
     IRQ##x##y##_interrupt
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/arch/x86/smpboot.c    Mon Dec 22 08:12:33 2008 +0000
@@ -1237,11 +1237,25 @@ remove_siblinginfo(int cpu)
 }
 
 extern void fixup_irqs(cpumask_t map);
-int __cpu_disable(void)
+
+/*
+ * Functions called when offline cpu. 
+ * We need to process some new feature such as 
+ * CMCI owner change when do cpu hotplug in latest 
+ * Intel CPU families
+*/
+void (*cpu_down_handler)(int down_cpu) = NULL;
+void (*cpu_down_rollback_handler)(int down_cpu) = NULL;
+
+
+int __cpu_disable(int down_cpu)
 {
        cpumask_t map = cpu_online_map;
        int cpu = smp_processor_id();
 
+       /*Only down_cpu need to execute this function*/
+       if (cpu != down_cpu)
+               return 0;
        /*
         * Perhaps use cpufreq to drop frequency, but that could go
         * into generic code.
@@ -1293,10 +1307,14 @@ void __cpu_die(unsigned int cpu)
        }
        printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
-
-static int take_cpu_down(void *unused)
-{
-    return __cpu_disable();
+static int take_cpu_down(void *down_cpu)
+{
+
+    if (cpu_down_handler)
+        cpu_down_handler(*(int *)down_cpu);
+    wmb();
+
+    return __cpu_disable(*(int *)down_cpu);
 }
 
 int cpu_down(unsigned int cpu)
@@ -1322,7 +1340,7 @@ int cpu_down(unsigned int cpu)
 
        printk("Prepare to bring CPU%d down...\n", cpu);
 
-       err = stop_machine_run(take_cpu_down, NULL, cpu);
+       err = stop_machine_run(take_cpu_down, &cpu, cpu_online_map);
        if ( err < 0 )
                goto out;
 
@@ -1333,6 +1351,10 @@ int cpu_down(unsigned int cpu)
                err = -EBUSY;
        }
 out:
+       /*if cpu_offline failed, re-check cmci_owner*/
+
+       if ( err < 0 && cpu_down_rollback_handler) 
+               cpu_down_rollback_handler(cpu); 
        spin_unlock(&cpu_add_remove_lock);
        return err;
 }
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/common/stop_machine.c
--- a/xen/common/stop_machine.c Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/common/stop_machine.c Mon Dec 22 08:12:33 2008 +0000
@@ -45,7 +45,7 @@ struct stopmachine_data {
     enum stopmachine_state state;
     atomic_t done;
 
-    unsigned int fn_cpu;
+    cpumask_t fn_cpus;
     int fn_result;
     int (*fn)(void *);
     void *fn_data;
@@ -63,21 +63,22 @@ static void stopmachine_set_state(enum s
         cpu_relax();
 }
 
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpus)
 {
     cpumask_t allbutself;
     unsigned int i, nr_cpus;
-    int ret;
+    int cur_cpu, ret;
 
     BUG_ON(!local_irq_is_enabled());
 
     allbutself = cpu_online_map;
-    cpu_clear(smp_processor_id(), allbutself);
+    cur_cpu = smp_processor_id();
+    cpu_clear(cur_cpu, allbutself);
     nr_cpus = cpus_weight(allbutself);
 
     if ( nr_cpus == 0 )
     {
-        BUG_ON(cpu != smp_processor_id());
+        BUG_ON(!cpu_isset(cur_cpu, cpus));
         return (*fn)(data);
     }
 
@@ -91,7 +92,8 @@ int stop_machine_run(int (*fn)(void *), 
     stopmachine_data.fn = fn;
     stopmachine_data.fn_data = data;
     stopmachine_data.nr_cpus = nr_cpus;
-    stopmachine_data.fn_cpu = cpu;
+    stopmachine_data.fn_cpus = cpus;
+    stopmachine_data.fn_result = 0;
     atomic_set(&stopmachine_data.done, 0);
     stopmachine_data.state = STOPMACHINE_START;
 
@@ -105,8 +107,13 @@ int stop_machine_run(int (*fn)(void *), 
     local_irq_disable();
     stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
 
-    if ( cpu == smp_processor_id() )
-        stopmachine_data.fn_result = (*fn)(data);
+    /* callback will run on each cpu of the input map.
+     * If callback fails on any CPU, the stop_machine_run
+     * will return the  *ORed* the failure
+     */
+    if ( cpu_isset(cur_cpu, cpus) ){
+        stopmachine_data.fn_result |= (*fn)(data);
+    }
     stopmachine_set_state(STOPMACHINE_INVOKE);
     ret = stopmachine_data.fn_result;
 
@@ -121,7 +128,6 @@ static void stopmachine_softirq(void)
 static void stopmachine_softirq(void)
 {
     enum stopmachine_state state = STOPMACHINE_START;
-
     smp_mb();
 
     while ( state != STOPMACHINE_EXIT )
@@ -136,10 +142,11 @@ static void stopmachine_softirq(void)
             local_irq_disable();
             break;
         case STOPMACHINE_INVOKE:
-            if ( stopmachine_data.fn_cpu == smp_processor_id() )
-                stopmachine_data.fn_result =
+            if ( cpu_isset(smp_processor_id(), stopmachine_data.fn_cpus )) {
+                stopmachine_data.fn_result |= 
                     stopmachine_data.fn(stopmachine_data.fn_data);
-            break;
+            }
+           break;
         default:
             break;
         }
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/apicdef.h
--- a/xen/include/asm-x86/apicdef.h     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/apicdef.h     Mon Dec 22 08:12:33 2008 +0000
@@ -80,6 +80,8 @@
 #define                APIC_LVTTHMR    0x330
 #define                APIC_LVTPC      0x340
 #define                APIC_LVT0       0x350
+#define                APIC_CMCI       0x2F0
+
 #define                        APIC_LVT_TIMER_BASE_MASK        (0x3<<18)
 #define                        GET_APIC_TIMER_BASE(x)          (((x)>>18)&0x3)
 #define                        SET_APIC_TIMER_BASE(x)          (((x)<<18))
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/config.h      Mon Dec 22 08:12:33 2008 +0000
@@ -22,7 +22,7 @@
 #define CONFIG_X86_IO_APIC 1
 #define CONFIG_X86_PM_TIMER 1
 #define CONFIG_HPET_TIMER 1
-#define CONFIG_X86_MCE_P4THERMAL 1
+#define CONFIG_X86_MCE_THERMAL 1
 #define CONFIG_NUMA 1
 #define CONFIG_DISCONTIGMEM 1
 #define CONFIG_NUMA_EMU 1
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/irq.h Mon Dec 22 08:12:33 2008 +0000
@@ -33,6 +33,7 @@ fastcall void pmu_apic_interrupt(void);
 fastcall void pmu_apic_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
+fastcall void cmci_interrupt(void);
 
 void disable_8259A_irq(unsigned int irq);
 void enable_8259A_irq(unsigned int irq);
diff -r 2dffa6ceb0af -r 4d5203f95498 
xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h    Fri Dec 19 14:56:36 
2008 +0000
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h    Mon Dec 22 08:12:33 
2008 +0000
@@ -10,13 +10,13 @@
 #define THERMAL_APIC_VECTOR    0xfa
 #define LOCAL_TIMER_VECTOR     0xf9
 #define PMU_APIC_VECTOR        0xf8
-
+#define CMCI_APIC_VECTOR       0xf7
 /*
  * High-priority dynamically-allocated vectors. For interrupts that
  * must be higher priority than any guest-bound interrupt.
  */
 #define FIRST_HIPRIORITY_VECTOR        0xf0
-#define LAST_HIPRIORITY_VECTOR  0xf7
+#define LAST_HIPRIORITY_VECTOR  0xf6
 
 /* Legacy PIC uses vectors 0xe0-0xef. */
 #define FIRST_LEGACY_VECTOR    0xe0
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/msr-index.h   Mon Dec 22 08:12:33 2008 +0000
@@ -92,8 +92,10 @@
 #define MSR_IA32_MC0_STATUS            0x00000401
 #define MSR_IA32_MC0_ADDR              0x00000402
 #define MSR_IA32_MC0_MISC              0x00000403
-
-#define MSR_IA32_MC1_CTL               0x00000404
+#define MSR_IA32_MC0_CTL2              0x00000280
+#define CMCI_EN                        (1UL<<30)
+#define CMCI_THRESHOLD_MASK            0x7FFF
+
 #define MSR_IA32_MC1_STATUS            0x00000405
 #define MSR_IA32_MC1_ADDR              0x00000406
 #define MSR_IA32_MC1_MISC              0x00000407
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/asm-x86/smp.h Mon Dec 22 08:12:33 2008 +0000
@@ -101,7 +101,7 @@ static __inline int logical_smp_processo
 
 #endif
 
-extern int __cpu_disable(void);
+extern int __cpu_disable(int down_cpu);
 extern void __cpu_die(unsigned int cpu);
 #endif /* !__ASSEMBLY__ */
 
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h     Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/public/arch-x86/xen-mca.h     Mon Dec 22 08:12:33 2008 +0000
@@ -106,7 +106,10 @@ struct mcinfo_common {
 
 #define MC_FLAG_CORRECTABLE     (1 << 0)
 #define MC_FLAG_UNCORRECTABLE   (1 << 1)
-
+#define MC_FLAG_RECOVERABLE    (1 << 2)
+#define MC_FLAG_POLLED         (1 << 3)
+#define MC_FLAG_RESET          (1 << 4)
+#define MC_FLAG_CMCI           (1 << 5)
 /* contains global x86 mc information */
 struct mcinfo_global {
     struct mcinfo_common common;
@@ -115,6 +118,7 @@ struct mcinfo_global {
     uint16_t mc_domid;
     uint32_t mc_socketid; /* physical socket of the physical core */
     uint16_t mc_coreid; /* physical impacted core */
+    uint8_t  mc_apicid;
     uint16_t mc_core_threadid; /* core thread of physical core */
     uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
     uint64_t mc_gstatus; /* global status */
@@ -132,6 +136,8 @@ struct mcinfo_bank {
     uint64_t mc_addr;   /* bank address, only valid
                          * if addr bit is set in mc_status */
     uint64_t mc_misc;
+    uint64_t mc_ctrl2;
+    uint64_t mc_tsc;
 };
 
 
@@ -150,7 +156,12 @@ struct mcinfo_extended {
      * multiple times. */
 
     uint32_t mc_msrs; /* Number of msr with valid values. */
-    struct mcinfo_msr mc_msr[5];
+    /*
+     * Currently Intel extended MSR (32/64) including all gp registers
+     * and E(R)DI, E(R)BP, E(R)SP, E(R)FLAGS, E(R)IP, E(R)MISC, only 10
+     * of them might be useful. So expend this array to 10.
+    */
+    struct mcinfo_msr mc_msr[10];
 };
 
 #define MCINFO_HYPERCALLSIZE   1024
diff -r 2dffa6ceb0af -r 4d5203f95498 xen/include/xen/stop_machine.h
--- a/xen/include/xen/stop_machine.h    Fri Dec 19 14:56:36 2008 +0000
+++ b/xen/include/xen/stop_machine.h    Mon Dec 22 08:12:33 2008 +0000
@@ -5,7 +5,7 @@
  * stop_machine_run: freeze the machine on all CPUs and run this function
  * @fn: the function to run
  * @data: the data ptr for the @fn()
- * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS).
+ * @cpus: cpus to run @fn() on.
  *
  * Description: This causes every other cpu to enter a safe point, with
  * each of which disables interrupts, and finally interrupts are disabled
@@ -14,6 +14,6 @@
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpu);
 
 #endif /* __XEN_STOP_MACHINE_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
WARNING - OLD ARCHIVES

xen-changelog

[Xen-changelog] [xen-unstable] Enable CMCI for Intel CPUs