WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] Clean-up mcheck_init handler

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] Clean-up mcheck_init handler
From: "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx>
Date: Mon, 7 Jun 2010 16:43:12 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: Egger <Christoph.Egger@xxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Mon, 07 Jun 2010 01:45:51 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcsGHXZ7SMCU4rfRQEukfBUMZIq4Yw==
Thread-topic: [PATCH] Clean-up mcheck_init handler
This patch cleaned the mcheck_init.

Firstly and most importantly, the maxium MCA banks are hard-coded as 
MAX_NR_BANKS, which is 30. This is not architecture correct. This patch removes 
this definition, replacig the cpu_banks_t with mca_banks,  and provide some 
basic function, like set/clear/test/alloc/free for mcabanks_t.

Secondly, remove the broadcast_check code to intel specific, since only Intel 
platform support broadcast now.

Thirdly, the X86_FEATURE_MCA check and CR4_MCE enable is done in every 
vendor-specifc callback, that's redundant, move it to mcheck_init. Also, we 
should enable CR4_MCE only in the end of the mcheck_init, to close the small 
window between CR4_enable and the mca setup.

And we also move vmce specific code to vmce.c as vmce_init, to make code clean.

 arch/x86/cpu/mcheck/amd_k8.c    |    7
 arch/x86/cpu/mcheck/k7.c        |   12 -
 arch/x86/cpu/mcheck/mce.c       |  293 ++++++++++++++++++++--------------------
 arch/x86/cpu/mcheck/mce.h       |   20 +-
 arch/x86/cpu/mcheck/mce_intel.c |  126 +++++++++++------
 arch/x86/cpu/mcheck/non-fatal.c |    5
 arch/x86/cpu/mcheck/vmce.c      |   53 +++++++
 arch/x86/cpu/mcheck/x86_mca.h   |   33 ++++
 include/asm-x86/mce.h           |    2
 9 files changed, 333 insertions(+), 218 deletions(-)

Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>

diff -r 45321a57873a xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c  Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c  Mon Jun 07 16:41:43 2010 +0800
@@ -81,13 +81,8 @@ enum mcheck_type amd_k8_mcheck_init(stru
        uint32_t i;
        enum mcequirk_amd_flags quirkflag;

-       /* Check for PPro style MCA; our caller has confirmed MCE support. */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return mcheck_none;
-
        quirkflag = mcequirk_lookup_amd_quirkdata(c);

-       mce_cap_init();
        x86_mce_vector_register(k8_machine_check);

        for (i = 0; i < nr_mce_banks; i++) {
@@ -101,7 +96,5 @@ enum mcheck_type amd_k8_mcheck_init(stru
                }
        }

-       set_in_cr4(X86_CR4_MCE);
-
        return mcheck_amd_k8;
 }
diff -r 45321a57873a xen/arch/x86/cpu/mcheck/k7.c
--- a/xen/arch/x86/cpu/mcheck/k7.c      Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/k7.c      Mon Jun 07 16:41:43 2010 +0800
@@ -70,19 +70,9 @@ static fastcall void k7_machine_check(st
 /* AMD K7 machine check */
 enum mcheck_type amd_k7_mcheck_init(struct cpuinfo_x86 *c)
 {
-       u32 l, h;
        int i;

-       /* Check for PPro style MCA; our caller has confirmed MCE support. */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return mcheck_none;
-
        x86_mce_vector_register(k7_machine_check);
-
-       rdmsr (MSR_IA32_MCG_CAP, l, h);
-       if (l & (1<<8)) /* Control register present ? */
-               wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-       nr_mce_banks = l & 0xff;

        /* Clear status for MC index 0 separately, we don't touch CTL,
         * as some Athlons cause spurious MCEs when its enabled. */
@@ -92,7 +82,5 @@ enum mcheck_type amd_k7_mcheck_init(stru
                wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
        }

-       set_in_cr4 (X86_CR4_MCE);
-
        return mcheck_amd_k7;
 }
diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Mon Jun 07 16:43:46 2010 +0800
@@ -25,21 +25,15 @@

 int mce_disabled;
 invbool_param("mce", mce_disabled);
-static int mce_force_broadcast;
-boolean_param("mce_fb", mce_force_broadcast);
 int is_mc_panic;
 unsigned int nr_mce_banks;

 int mce_broadcast = 0;
-uint64_t g_mcg_cap;
-
-/* Real value in physical CTL MSR */
-uint64_t h_mcg_ctl = 0UL;
-uint64_t *h_mci_ctrl;
 int firstbank;

 static void intpose_init(void);
 static void mcinfo_clear(struct mc_info *);
+struct mca_banks *mca_allbanks;

 #define        SEG_PL(segsel)                  ((segsel) & 0x3)
 #define _MC_MSRINJ_F_REQ_HWCR_WREN     (1 << 16)
@@ -54,8 +48,6 @@ static int x86_mcerr(const char *msg, in
 #else
 #define x86_mcerr(msg, err) (err)
 #endif
-
-cpu_banks_t mca_allbanks;

 int mce_verbosity;
 static void __init mce_set_verbosity(char *str)
@@ -113,6 +105,36 @@ void mce_recoverable_register(mce_recove
     mc_recoverable_scan = cbfunc;
 }

+struct mca_banks *mcabanks_alloc(void)
+{
+    struct mca_banks *mb;
+
+    mb = xmalloc(struct mca_banks);
+    if (!mb)
+        return NULL;
+
+    mb->bank_map = xmalloc_array(unsigned long,
+            BITS_TO_LONGS(nr_mce_banks));
+    if (!mb->bank_map)
+    {
+        xfree(mb);
+        return NULL;
+    }
+
+    mb->num = nr_mce_banks;
+    memset(mb->bank_map, 0, sizeof(long) * BITS_TO_LONGS(nr_mce_banks));
+
+    return mb;
+}
+
+void mcabanks_free(struct mca_banks *banks)
+{
+    if (banks == NULL)
+        return;
+    if (banks->bank_map)
+        xfree(banks->bank_map);
+    xfree(banks);
+}
 /* Judging whether to Clear Machine Check error bank callback handler
  * According to Intel latest MCA OS Recovery Writer's Guide,
  * whether the error MCA bank needs to be cleared is decided by the mca_source
@@ -218,8 +240,8 @@ static int mca_init_global(uint32_t flag
  * For Intel latest CPU, whether to clear the error bank status needs to
  * be judged by the callback function defined above.
  */
-mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask,
-    struct mca_summary *sp, cpu_banks_t* clear_bank)
+mctelem_cookie_t mcheck_mca_logout(enum mca_source who, struct mca_banks 
*bankmask,
+    struct mca_summary *sp, struct mca_banks* clear_bank)
 {
        uint64_t gstatus, status;
        struct mcinfo_global *mig = NULL;       /* on stack */
@@ -263,7 +285,7 @@ mctelem_cookie_t mcheck_mca_logout(enum
                struct mcinfo_bank *mib;                /* on stack */

                /* Skip bank if corresponding bit in bankmask is clear */
-               if (!test_bit(i, bankmask))
+               if (!mcabanks_test(i, bankmask))
                        continue;

                mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
@@ -318,7 +340,7 @@ mctelem_cookie_t mcheck_mca_logout(enum
                        /* Clear status */
                        mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
                else if ( who == MCA_MCE_SCAN && need_clear)
-                       set_bit(i, clear_bank);
+                       mcabanks_set(i, clear_bank);

                wmb();
        }
@@ -352,7 +374,7 @@ mctelem_cookie_t mcheck_mca_logout(enum

 /* Shared #MC handler. */
 void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code,
-    cpu_banks_t bankmask)
+    struct mca_banks *bankmask)
 {
        int xen_state_lost, dom0_state_lost, domU_state_lost;
        struct vcpu *v = current;
@@ -568,13 +590,13 @@ cmn_handler_done:
        }
 }

-void mcheck_mca_clearbanks(cpu_banks_t bankmask)
+void mcheck_mca_clearbanks(struct mca_banks *bankmask)
 {
        int i;
        uint64_t status;

        for (i = 0; i < 32 && i < nr_mce_banks; i++) {
-               if (!test_bit(i, bankmask))
+               if (!mcabanks_test(i, bankmask))
                        continue;
                mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
                if (!(status & MCi_STATUS_VAL))
@@ -613,21 +635,6 @@ int mce_available(struct cpuinfo_x86 *c)
        return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
 }

-static int mce_is_broadcast(struct cpuinfo_x86 *c)
-{
-    if (mce_force_broadcast)
-        return 1;
-
-    /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with
-     * DisplayFamily_DisplayModel encoding of 06H_EH and above,
-     * a MCA signal is broadcast to all logical processors in the system
-     */
-    if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 &&
-        c->x86_model >= 0xe)
-            return 1;
-    return 0;
-}
-
 /*
  * Check if bank 0 is usable for MCE. It isn't for AMD K7,
  * and Intel P6 family before model 0x1a.
@@ -645,77 +652,9 @@ int mce_firstbank(struct cpuinfo_x86 *c)
        return 0;
 }

-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
-       int i, broadcast;
-       enum mcheck_type inited = mcheck_none;
+int show_mca_info(int inited, struct cpuinfo_x86 *c)
+{
        static enum mcheck_type g_type = mcheck_unset;
-    static int broadcast_check;
-
-       if (mce_disabled == 1) {
-               dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
-               return;
-       }
-
-    broadcast = mce_is_broadcast(c);
-    if (broadcast_check && (broadcast != mce_broadcast) )
-            dprintk(XENLOG_INFO,
-                "CPUs have mixed broadcast support"
-                "may cause undetermined result!!!\n");
-
-    broadcast_check = 1;
-    if (broadcast)
-        mce_broadcast = broadcast;
-
-       for (i = 0; i < MAX_NR_BANKS; i++)
-               set_bit(i,mca_allbanks);
-
-       /* Enforce at least MCE support in CPUID information.  Individual
-        * families may also need to enforce a check for MCA support. */
-       if (!cpu_has(c, X86_FEATURE_MCE)) {
-               printk(XENLOG_INFO "CPU%i: No machine check support 
available\n",
-                       smp_processor_id());
-               return;
-       }
-
-       intpose_init();
-       mctelem_init(sizeof (struct mc_info));
-
-       switch (c->x86_vendor) {
-       case X86_VENDOR_AMD:
-               inited = amd_mcheck_init(c);
-               break;
-
-       case X86_VENDOR_INTEL:
-               switch (c->x86) {
-               case 6:
-               case 15:
-                       inited = intel_mcheck_init(c);
-                       break;
-               }
-               break;
-
-       default:
-               break;
-       }
-
-    if ( !h_mci_ctrl )
-    {
-        h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks);
-        if (!h_mci_ctrl)
-        {
-            dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n");
-            return;
-        }
-        /* Don't care banks before firstbank */
-        memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl));
-        for (i = firstbank; i < nr_mce_banks; i++)
-            rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]);
-    }
-    if (g_mcg_cap & MCG_CTL_P)
-        rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl);
-    set_poll_bankmask(c);

        if (inited != g_type) {
                char prefix[20];
@@ -744,32 +683,130 @@ void mcheck_init(struct cpuinfo_x86 *c)
                        printk("%sNo machine check initialization\n", prefix);
                        break;
                }
-
-               g_type = inited;
-       }
-}
-
-u64 mce_cap_init(void)
+        g_type = inited;
+       }
+
+    return 0;
+}
+
+int set_poll_bankmask(struct cpuinfo_x86 *c)
+{
+    int cpu = smp_processor_id();
+    struct mca_banks *mb;
+
+    mb = mcabanks_alloc();
+    if (!mb)
+        return -ENOMEM;
+
+    if (cmci_support && !mce_disabled) {
+        mb->num = per_cpu(no_cmci_banks, cpu)->num;
+        bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map,
+            nr_mce_banks);
+    }
+    else {
+        bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks);
+        if (mce_firstbank(c))
+            mcabanks_clear(0, mb);
+    }
+    per_cpu(poll_bankmask, cpu) = mb;
+
+    return 0;
+}
+
+/* The perbank ctl/status init is platform specific because of AMD's quirk */
+int mca_cap_init(void)
 {
     u32 l, h;
     u64 value;

     rdmsr(MSR_IA32_MCG_CAP, l, h);
     value = ((u64)h << 32) | l;
-    /* For Guest vMCE usage */
-    g_mcg_cap = value & ~MCG_CMCI_P;

     if (l & MCG_CTL_P) /* Control register present ? */
         wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);

+    if (nr_mce_banks &&  (l & MCG_CAP_COUNT) != nr_mce_banks)
+    {
+        dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n",
+                smp_processor_id());
+        return -ENODEV;
+    }
     nr_mce_banks = l & MCG_CAP_COUNT;
-    if ( nr_mce_banks > MAX_NR_BANKS )
+
+    /* mcabanks_alloc depends on nr_mcebanks */
+    if (!mca_allbanks)
     {
-        printk(KERN_WARNING "MCE: exceed max mce banks\n");
-        g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS;
+        int i;
+
+        mca_allbanks = mcabanks_alloc();
+        for ( i = 0; i < nr_mce_banks; i++)
+            mcabanks_set(i, mca_allbanks);
     }

-    return value;
+    return mca_allbanks ? 0:-ENOMEM;
+}
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+       enum mcheck_type inited = mcheck_none;
+
+       if (mce_disabled == 1) {
+               dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
+               return;
+       }
+
+       if (!mce_available(c))
+       {
+               printk(XENLOG_INFO "CPU%i: No machine check support 
available\n",
+                 smp_processor_id());
+               return;
+       }
+
+       /*Hardware Enable */
+       if (mca_cap_init())
+               return;
+
+       switch (c->x86_vendor) {
+       case X86_VENDOR_AMD:
+               inited = amd_mcheck_init(c);
+               break;
+
+       case X86_VENDOR_INTEL:
+               switch (c->x86) {
+               case 6:
+               case 15:
+                       inited = intel_mcheck_init(c);
+                       break;
+               }
+               break;
+
+       default:
+               break;
+       }
+
+       show_mca_info(inited, c);
+       if (inited == mcheck_none || inited == mcheck_unset)
+               goto out;
+
+       intpose_init();
+
+       mctelem_init(sizeof(struct mc_info));
+
+       vmce_init(c);
+
+    /* Turn on MCE now */
+       set_in_cr4(X86_CR4_MCE);
+
+       set_poll_bankmask(c);
+
+       return;
+out:
+       if (smp_processor_id() == 0)
+       {
+               mcabanks_free(mca_allbanks);
+               mca_allbanks = NULL;
+       }
 }

 static void mcinfo_clear(struct mc_info *mi)
@@ -1040,23 +1077,6 @@ void intpose_inval(unsigned int cpu_nr,
     (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \
     ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */

-int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d)
-{
-    int bank_nr;
-
-    if ( !bank || !d || !h_mci_ctrl )
-        return 1;
-
-    /* Will MCE happen in host if If host mcg_ctl is 0? */
-    if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl )
-        return 1;
-
-    bank_nr = bank->mc_bank;
-    if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
-        return 1;
-    return 0;
-}
-
 static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
 {
        struct cpuinfo_x86 *c;
@@ -1481,19 +1501,6 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
        return ret;
 }

-void set_poll_bankmask(struct cpuinfo_x86 *c)
-{
-
-    if (cmci_support && !mce_disabled) {
-        memcpy(&(__get_cpu_var(poll_bankmask)),
-                &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t));
-    }
-    else {
-        memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, 
sizeof(cpu_banks_t));
-        if (mce_firstbank(c))
-            clear_bit(0, get_cpu_var(poll_bankmask));
-    }
-}
 void mc_panic(char *s)
 {
     is_mc_panic = 1;
diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Mon Jun 07 16:41:43 2010 +0800
@@ -72,7 +72,7 @@ extern void x86_mce_vector_register(x86_

 /* Common generic MCE handler that implementations may nominate
  * via x86_mce_vector_register. */
-extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t);
+extern void mcheck_cmn_handler(struct cpu_user_regs *, long, struct mca_banks 
*);

 /* Register a handler for judging whether mce is recoverable. */
 typedef int (*mce_recoverable_t)(u64 status);
@@ -126,18 +126,17 @@ struct mca_summary {
        uint32_t        recoverable;
 };

-extern cpu_banks_t mca_allbanks;
-void set_poll_bankmask(struct cpuinfo_x86 *c);
-DECLARE_PER_CPU(cpu_banks_t, poll_bankmask);
-DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks);
+DECLARE_PER_CPU(struct mca_banks *, poll_bankmask);
+DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks);
+
 extern int cmci_support;
 extern int ser_support;
 extern int is_mc_panic;
 extern int mce_broadcast;
-extern void mcheck_mca_clearbanks(cpu_banks_t);
+extern void mcheck_mca_clearbanks(struct mca_banks *);

-extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t,
-    struct mca_summary *, cpu_banks_t*);
+extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *,
+    struct mca_summary *, struct mca_banks *);

 /* Register a callback to be made during bank telemetry logout.
  * This callback is only available to those machine check handlers
@@ -170,10 +169,7 @@ int inject_vmce(struct domain *d);
 int inject_vmce(struct domain *d);
 int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct 
mcinfo_global *global);

-extern uint64_t g_mcg_cap;
-/* Real value in physical CTL MSR */
-extern uint64_t h_mcg_ctl;
-extern uint64_t *h_mci_ctrl;
+extern int vmce_init(struct cpuinfo_x86 *c);

 extern unsigned int nr_mce_banks;

diff -r 45321a57873a xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Mon Jun 07 16:41:43 2010 +0800
@@ -16,10 +16,13 @@
 #include "mce.h"
 #include "x86_mca.h"

-DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
-DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
+DEFINE_PER_CPU(struct mca_banks *, mce_banks_owned);
+DEFINE_PER_CPU(struct mca_banks *, no_cmci_banks);
+DEFINE_PER_CPU(struct mca_banks *, mce_clear_banks);
 int cmci_support = 0;
 int ser_support = 0;
+static int mce_force_broadcast;
+boolean_param("mce_fb", mce_force_broadcast);

 static int nr_intel_ext_msrs = 0;

@@ -532,12 +535,14 @@ static void intel_machine_check(struct c
     uint64_t gstatus;
     mctelem_cookie_t mctc = NULL;
     struct mca_summary bs;
-    cpu_banks_t clear_bank;
+    struct mca_banks *clear_bank;

     mce_spin_lock(&mce_logout_lock);

-    memset( &clear_bank, 0x0, sizeof(cpu_banks_t));
-    mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, &clear_bank);
+    clear_bank = __get_cpu_var(mce_clear_banks);
+    memset( clear_bank->bank_map, 0x0,
+        sizeof(long) * BITS_TO_LONGS(clear_bank->num));
+    mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, clear_bank);

     if (bs.errcnt) {
         /* dump MCE error */
@@ -703,7 +708,7 @@ static int do_cmci_discover(int i)
     rdmsrl(msr, val);
     /* Some other CPU already owns this bank. */
     if (val & CMCI_EN) {
-        clear_bit(i, __get_cpu_var(mce_banks_owned));
+        mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
         goto out;
     }

@@ -713,12 +718,12 @@ static int do_cmci_discover(int i)

     if (!(val & CMCI_EN)) {
         /* This bank does not support CMCI. Polling timer has to handle it. */
-        set_bit(i, __get_cpu_var(no_cmci_banks));
+        mcabanks_set(i, __get_cpu_var(no_cmci_banks));
         return 0;
     }
-    set_bit(i, __get_cpu_var(mce_banks_owned));
+    mcabanks_set(i, __get_cpu_var(mce_banks_owned));
 out:
-    clear_bit(i, __get_cpu_var(no_cmci_banks));
+    mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
     return 1;
 }

@@ -734,7 +739,7 @@ static void cmci_discover(void)
     spin_lock_irqsave(&cmci_discover_lock, flags);

     for (i = 0; i < nr_mce_banks; i++)
-        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+        if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned)))
             do_cmci_discover(i);

     spin_unlock_irqrestore(&cmci_discover_lock, flags);
@@ -761,8 +766,8 @@ static void cmci_discover(void)

     mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
            smp_processor_id(),
-           *((unsigned long *)__get_cpu_var(mce_banks_owned)),
-           *((unsigned long *)__get_cpu_var(no_cmci_banks)));
+           *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map),
+           *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map));
 }

 /*
@@ -808,12 +813,12 @@ static void clear_cmci(void)
     for (i = 0; i < nr_mce_banks; i++) {
         unsigned msr = MSR_IA32_MC0_CTL2 + i;
         u64 val;
-        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+        if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned)))
             continue;
         rdmsrl(msr, val);
         if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
             wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
-        clear_bit(i, __get_cpu_var(mce_banks_owned));
+        mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
     }
 }

@@ -882,16 +887,44 @@ fastcall void smp_cmci_interrupt(struct

 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
-
 #ifdef CONFIG_X86_MCE_THERMAL
     intel_init_thermal(c);
 #endif
     intel_init_cmci(c);
 }

-static void _mce_cap_init(struct cpuinfo_x86 *c)
-{
-    u32 l = mce_cap_init();
+static int mce_is_broadcast(struct cpuinfo_x86 *c)
+{
+    if (mce_force_broadcast)
+        return 1;
+
+    /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with
+     * DisplayFamily_DisplayModel encoding of 06H_EH and above,
+     * a MCA signal is broadcast to all logical processors in the system
+     */
+    if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 &&
+        c->x86_model >= 0xe)
+            return 1;
+    return 0;
+}
+
+static void intel_mca_cap_init(struct cpuinfo_x86 *c)
+{
+    static int broadcast_check;
+    int broadcast;
+    u32 l, h;
+
+    broadcast = mce_is_broadcast(c);
+    if (broadcast_check && (broadcast != mce_broadcast) )
+            dprintk(XENLOG_INFO,
+                "CPUs have mixed broadcast support"
+                "may cause undetermined result!!!\n");
+
+    broadcast_check = 1;
+    if (broadcast)
+        mce_broadcast = broadcast;
+
+    rdmsr(MSR_IA32_MCG_CAP, l, h);

     if ((l & MCG_CMCI_P) && cpu_has_apic)
         cmci_support = 1;
@@ -916,8 +949,6 @@ static void mce_init(void)
     mctelem_cookie_t mctc;
     struct mca_summary bs;

-    clear_in_cr4(X86_CR4_MCE);
-
     mce_barrier_init(&mce_inside_bar);
     mce_barrier_init(&mce_severity_bar);
     mce_barrier_init(&mce_trap_bar);
@@ -933,8 +964,6 @@ static void mce_init(void)
         x86_mcinfo_dump(mctelem_dataptr(mctc));
         mctelem_commit(mctc);
     }
-
-    set_in_cr4(X86_CR4_MCE);

     for (i = firstbank; i < nr_mce_banks; i++)
     {
@@ -953,10 +982,35 @@ static void mce_init(void)
         wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
 }

+static int init_mca_banks(void)
+{
+    struct mca_banks *mb1, *mb2, * mb3;
+
+    mb1 = mcabanks_alloc();
+    mb2 = mcabanks_alloc();
+    mb3 = mcabanks_alloc();
+    if (!mb1 || !mb2 || !mb3)
+        goto out;
+
+    __get_cpu_var(mce_clear_banks) = mb1;
+    __get_cpu_var(no_cmci_banks) = mb2;
+    __get_cpu_var(mce_banks_owned) = mb3;
+
+    return 0;
+out:
+    mcabanks_free(mb1);
+    mcabanks_free(mb2);
+    mcabanks_free(mb3);
+    return -ENOMEM;
+}
+
 /* p4/p6 family have similar MCA initialization process */
 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
 {
-    _mce_cap_init(c);
+    if (init_mca_banks())
+        return mcheck_none;
+
+    intel_mca_cap_init(c);

     /* machine check is available */
     x86_mce_vector_register(intel_machine_check);
@@ -974,17 +1028,14 @@ enum mcheck_type intel_mcheck_init(struc

 int intel_mce_wrmsr(uint32_t msr, uint64_t val)
 {
-    int ret = 1;
-
-    switch ( msr )
+    int ret = 0;
+
+    if (msr > MSR_IA32_MC0_CTL2 &&
+        msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1))
     {
-    case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
         mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
                  "Guest should not write this MSR!\n");
-        break;
-    default:
-        ret = 0;
-        break;
+         ret = 1;
     }

     return ret;
@@ -992,17 +1043,14 @@ int intel_mce_wrmsr(uint32_t msr, uint64

 int intel_mce_rdmsr(uint32_t msr, uint64_t *val)
 {
-    int ret = 1;
-
-    switch ( msr )
+    int ret = 0;
+
+    if (msr > MSR_IA32_MC0_CTL2 &&
+        msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1))
     {
-    case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
         mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
                  "Guest should not read this MSR!\n");
-        break;
-    default:
-        ret = 0;
-        break;
+        ret = 1;
     }

     return ret;
diff -r 45321a57873a xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c       Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c       Mon Jun 07 16:41:43 2010 +0800
@@ -22,7 +22,7 @@

 #include "mce.h"

-DEFINE_PER_CPU(cpu_banks_t, poll_bankmask);
+DEFINE_PER_CPU(struct mca_banks *, poll_bankmask);
 static struct timer mce_timer;

 #define MCE_PERIOD MILLISECS(8000)
@@ -94,6 +94,9 @@ static int __init init_nonfatal_mce_chec
        if (mce_disabled || !mce_available(c))
                return -ENODEV;

+    if ( __get_cpu_var(poll_bankmask) == NULL )
+        return -EINVAL;
+
        /*
         * Check for non-fatal errors every MCE_RATE s
         */
diff -r 45321a57873a xen/arch/x86/cpu/mcheck/vmce.c
--- a/xen/arch/x86/cpu/mcheck/vmce.c    Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/vmce.c    Mon Jun 07 16:41:43 2010 +0800
@@ -20,6 +20,12 @@

 #define dom_vmce(x)   ((x)->arch.vmca_msrs)

+uint64_t g_mcg_cap;
+
+/* Real value in physical CTL MSR */
+uint64_t h_mcg_ctl = 0UL;
+uint64_t *h_mci_ctrl;
+
 int vmce_init_msr(struct domain *d)
 {
     dom_vmce(d) = xmalloc(struct domain_mca_msrs);
@@ -431,3 +437,50 @@ int vmce_domain_inject(
     return inject_vmce(d);
 }

+int vmce_init(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+    u64 value;
+    int i;
+
+    if ( !h_mci_ctrl )
+    {
+        h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks);
+        if (!h_mci_ctrl)
+        {
+            dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n");
+            return -ENOMEM;
+        }
+        /* Don't care banks before firstbank */
+        memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl));
+        for (i = firstbank; i < nr_mce_banks; i++)
+            rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]);
+    }
+
+    if (g_mcg_cap & MCG_CTL_P)
+        rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl);
+
+    rdmsr(MSR_IA32_MCG_CAP, l, h);
+    value = ((u64)h << 32) | l;
+    /* For Guest vMCE usage */
+    g_mcg_cap = value & ~MCG_CMCI_P;
+
+    return 0;
+}
+
+int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d)
+{
+    int bank_nr;
+
+    if ( !bank || !d || !h_mci_ctrl )
+        return 1;
+
+    /* Will MCE happen in host if If host mcg_ctl is 0? */
+    if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl )
+        return 1;
+
+    bank_nr = bank->mc_bank;
+    if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
+        return 1;
+    return 0;
+}
diff -r 45321a57873a xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Mon Jun 07 16:41:43 2010 +0800
@@ -89,8 +89,37 @@
 #define CMCI_THRESHOLD                 0x2

 #include <asm/domain.h>
-typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
-DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+struct mca_banks
+{
+    int num;
+    unsigned long *bank_map;
+};
+
+static inline void mcabanks_clear(int bit, struct mca_banks *banks)    \
+{
+    if (!banks || !banks->bank_map || bit >= banks->num)
+        return ;
+    clear_bit(bit, banks->bank_map);
+}
+
+static inline void mcabanks_set(int bit, struct mca_banks* banks)
+{
+    if (!banks || !banks->bank_map || bit >= banks->num)
+        return;
+    set_bit(bit, banks->bank_map);
+}
+
+static inline int mcabanks_test(int bit, struct mca_banks* banks)
+{
+    if (!banks || !banks->bank_map || bit >= banks->num)
+        return 0;
+    return test_bit(bit, banks->bank_map);
+}
+
+struct mca_banks *mcabanks_alloc(void);
+void mcabanks_free(struct mca_banks *banks);
+extern struct mca_banks *mca_allbanks;

 /* Below interfaces are defined for MCA internal processing:
  * a. pre_handler will be called early in MCA ISR context, mainly for early
diff -r 45321a57873a xen/include/asm-x86/mce.h
--- a/xen/include/asm-x86/mce.h Mon Jun 07 16:41:39 2010 +0800
+++ b/xen/include/asm-x86/mce.h Mon Jun 07 16:41:43 2010 +0800
@@ -2,8 +2,6 @@
 #include <public/arch-x86/xen-mca.h>
 #ifndef _XEN_X86_MCE_H
 #define _XEN_X86_MCE_H
-/* Define for GUEST MCA handling */
-#define MAX_NR_BANKS 30

 /* This entry is for recording bank nodes for the impacted domain,
  * put into impact_header list. */


Attachment: nr_bank.patch
Description: nr_bank.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>