WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 6] MCA bank clear

To: Keir Fraser <keir.xen@xxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 6] MCA bank clear
From: "Liu, Jinsong" <jinsong.liu@xxxxxxxxx>
Date: Sun, 8 May 2011 04:32:32 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx>, "Li, Xin" <xin.li@xxxxxxxxx>
Delivery-date: Sat, 07 May 2011 13:33:36 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcwM9eR8s/56D8JmQNGSHAjnrcolqg==
Thread-topic: [PATCH 6] MCA bank clear
MCA bank clear

1). Add explicitly uc=pcc=1 fatal error detect, since under such case the 
s/ar/over bits status is undefined and hence may be mis-cleaned. Explicit fatal 
detect return 0 indicate stick MSR bank will have chance to be handled after 
reboot by polling;
2). Fix a little bug for case SRAR, over bit should be 0 when need clear bank, 
otherwise it would reset system;
3). When clear bank, add clear MCi_ADDR/MISC if it's valid, according to Intel 
SDM;

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>

diff -r 9800df3e11dc xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Fri May 06 15:43:35 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Sun May 08 02:58:51 2011 +0800
@@ -135,6 +135,21 @@ void mcabanks_free(struct mca_banks *ban
         xfree(banks->bank_map);
     xfree(banks);
 }
+
+static void mcabank_clear(int banknum)
+{
+    uint64_t status;
+
+    status = mca_rdmsr(MSR_IA32_MCx_STATUS(banknum));
+
+    if (status & MCi_STATUS_ADDRV)
+        mca_wrmsr(MSR_IA32_MCx_ADDR(banknum), 0x0ULL);
+    if (status & MCi_STATUS_MISCV)
+        mca_wrmsr(MSR_IA32_MCx_MISC(banknum), 0x0ULL);
+
+    mca_wrmsr(MSR_IA32_MCx_STATUS(banknum), 0x0ULL);
+}
+
 /* Judging whether to Clear Machine Check error bank callback handler
  * According to Intel latest MCA OS Recovery Writer's Guide, 
  * whether the error MCA bank needs to be cleared is decided by the mca_source
@@ -345,8 +360,8 @@ mctelem_cookie_t mcheck_mca_logout(enum 
 
         /* By default, need_clear = 1 */
         if (who != MCA_MCE_SCAN && need_clear)
-            /* Clear status */
-            mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
+            /* Clear bank */
+            mcabank_clear(i);
         else if ( who == MCA_MCE_SCAN && need_clear)
             mcabanks_set(i, clear_bank);
 
@@ -601,15 +616,11 @@ void mcheck_mca_clearbanks(struct mca_ba
 void mcheck_mca_clearbanks(struct mca_banks *bankmask)
 {
     int i;
-    uint64_t status;
 
-    for (i = 0; i < 32 && i < nr_mce_banks; i++) {
+    for (i = 0; i < nr_mce_banks; i++) {
         if (!mcabanks_test(i, bankmask))
             continue;
-        status = mca_rdmsr(MSR_IA32_MCx_STATUS(i));
-        if (!(status & MCi_STATUS_VAL))
-            continue;
-        mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
+        mcabank_clear(i);
     }
 }
 
diff -r 9800df3e11dc xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Fri May 06 15:43:35 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Sun May 08 02:58:51 2011 +0800
@@ -903,17 +903,25 @@ static int intel_need_clearbank_scan(enu
         else return 0;
     }
     else if ( who == MCA_MCE_SCAN) {
+        if ( !ser_support )
+            return 0;
+        /* 
+         * For fatal error, it shouldn't be cleared so that sticky bank
+         * have chance to be handled after reboot by polling
+         */
+        if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
+            return 0;
         /* Spurious need clear bank */
-        if ( ser_support && !(status & MCi_STATUS_OVER)
+        else if ( !(status & MCi_STATUS_OVER)
                     && (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN))
             return 1;
         /* SRAR OVER=0 clear bank. OVER = 1 have caused reset */
-        else if ( ser_support && (status & MCi_STATUS_UC)
+        else if ( (status & MCi_STATUS_UC)
                     && (status & MCi_STATUS_S) && (status & MCi_STATUS_AR )
-                    && (status & MCi_STATUS_OVER) )
+                    && !(status & MCi_STATUS_OVER) )
             return 1;
         /* SRAO need clear bank */
-        else if ( ser_support && !(status & MCi_STATUS_AR) 
+        else if ( !(status & MCi_STATUS_AR) 
                     && (status & MCi_STATUS_S) && (status & MCi_STATUS_UC))
             return 1; 
         else

Attachment: mca-cleanup-6.patch
Description: mca-cleanup-6.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 6] MCA bank clear, Liu, Jinsong <=