WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86/mca: MCA bank clear

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86/mca: MCA bank clear
From: Xen patchbot-unstable <patchbot@xxxxxxx>
Date: Sat, 14 May 2011 07:15:48 +0100
Delivery-date: Fri, 13 May 2011 23:16:33 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1305186822 -3600
# Node ID 3d0ba2b4941fd85678043edf996673232fddf407
# Parent  f69f5836d8c21cf949337cf425defbcca81af30d
x86/mca: MCA bank clear

1). Add explicitly uc=pcc=1 fatal error detect, since under such case
the s/ar/over bits status is undefined and hence may be
mis-cleaned. Explicit fatal detect return 0 indicate stick MSR bank
will have chance to be handled after reboot by polling;
2). Fix a little bug for case SRAR, over bit should be 0 when need
clear bank, otherwise it would reset system;
3). When clear bank, add clear MCi_ADDR/MISC if it's valid, according
to Intel SDM;

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
---


diff -r f69f5836d8c2 -r 3d0ba2b4941f xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Thu May 12 08:52:47 2011 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Thu May 12 08:53:42 2011 +0100
@@ -135,6 +135,21 @@
         xfree(banks->bank_map);
     xfree(banks);
 }
+
+static void mcabank_clear(int banknum)
+{
+    uint64_t status;
+
+    status = mca_rdmsr(MSR_IA32_MCx_STATUS(banknum));
+
+    if (status & MCi_STATUS_ADDRV)
+        mca_wrmsr(MSR_IA32_MCx_ADDR(banknum), 0x0ULL);
+    if (status & MCi_STATUS_MISCV)
+        mca_wrmsr(MSR_IA32_MCx_MISC(banknum), 0x0ULL);
+
+    mca_wrmsr(MSR_IA32_MCx_STATUS(banknum), 0x0ULL);
+}
+
 /* Judging whether to Clear Machine Check error bank callback handler
  * According to Intel latest MCA OS Recovery Writer's Guide, 
  * whether the error MCA bank needs to be cleared is decided by the mca_source
@@ -345,8 +360,8 @@
 
         /* By default, need_clear = 1 */
         if (who != MCA_MCE_SCAN && need_clear)
-            /* Clear status */
-            mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
+            /* Clear bank */
+            mcabank_clear(i);
         else if ( who == MCA_MCE_SCAN && need_clear)
             mcabanks_set(i, clear_bank);
 
@@ -601,15 +616,11 @@
 void mcheck_mca_clearbanks(struct mca_banks *bankmask)
 {
     int i;
-    uint64_t status;
 
-    for (i = 0; i < 32 && i < nr_mce_banks; i++) {
+    for (i = 0; i < nr_mce_banks; i++) {
         if (!mcabanks_test(i, bankmask))
             continue;
-        status = mca_rdmsr(MSR_IA32_MCx_STATUS(i));
-        if (!(status & MCi_STATUS_VAL))
-            continue;
-        mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
+        mcabank_clear(i);
     }
 }
 
diff -r f69f5836d8c2 -r 3d0ba2b4941f xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Thu May 12 08:52:47 2011 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Thu May 12 08:53:42 2011 +0100
@@ -903,17 +903,25 @@
         else return 0;
     }
     else if ( who == MCA_MCE_SCAN) {
+        if ( !ser_support )
+            return 0;
+        /* 
+         * For fatal error, it shouldn't be cleared so that sticky bank
+         * have chance to be handled after reboot by polling
+         */
+        if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
+            return 0;
         /* Spurious need clear bank */
-        if ( ser_support && !(status & MCi_STATUS_OVER)
+        else if ( !(status & MCi_STATUS_OVER)
                     && (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN))
             return 1;
         /* SRAR OVER=0 clear bank. OVER = 1 have caused reset */
-        else if ( ser_support && (status & MCi_STATUS_UC)
+        else if ( (status & MCi_STATUS_UC)
                     && (status & MCi_STATUS_S) && (status & MCi_STATUS_AR )
-                    && (status & MCi_STATUS_OVER) )
+                    && !(status & MCi_STATUS_OVER) )
             return 1;
         /* SRAO need clear bank */
-        else if ( ser_support && !(status & MCi_STATUS_AR) 
+        else if ( !(status & MCi_STATUS_AR) 
                     && (status & MCi_STATUS_S) && (status & MCi_STATUS_UC))
             return 1; 
         else

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86/mca: MCA bank clear, Xen patchbot-unstable <=