# HG changeset patch
# User Liu, Jinsong <jinsong.liu@xxxxxxxxx>
# Date 1305186822 -3600
# Node ID 3d0ba2b4941fd85678043edf996673232fddf407
# Parent f69f5836d8c21cf949337cf425defbcca81af30d
x86/mca: MCA bank clear
1). Add explicitly uc=pcc=1 fatal error detect, since under such case
the s/ar/over bits status is undefined and hence may be
mis-cleaned. Explicit fatal detect return 0 indicate stick MSR bank
will have chance to be handled after reboot by polling;
2). Fix a little bug for case SRAR, over bit should be 0 when need
clear bank, otherwise it would reset system;
3). When clear bank, add clear MCi_ADDR/MISC if it's valid, according
to Intel SDM;
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
---
diff -r f69f5836d8c2 -r 3d0ba2b4941f xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Thu May 12 08:52:47 2011 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c Thu May 12 08:53:42 2011 +0100
@@ -135,6 +135,21 @@
xfree(banks->bank_map);
xfree(banks);
}
+
+static void mcabank_clear(int banknum)
+{
+ uint64_t status;
+
+ status = mca_rdmsr(MSR_IA32_MCx_STATUS(banknum));
+
+ if (status & MCi_STATUS_ADDRV)
+ mca_wrmsr(MSR_IA32_MCx_ADDR(banknum), 0x0ULL);
+ if (status & MCi_STATUS_MISCV)
+ mca_wrmsr(MSR_IA32_MCx_MISC(banknum), 0x0ULL);
+
+ mca_wrmsr(MSR_IA32_MCx_STATUS(banknum), 0x0ULL);
+}
+
/* Judging whether to Clear Machine Check error bank callback handler
* According to Intel latest MCA OS Recovery Writer's Guide,
* whether the error MCA bank needs to be cleared is decided by the mca_source
@@ -345,8 +360,8 @@
/* By default, need_clear = 1 */
if (who != MCA_MCE_SCAN && need_clear)
- /* Clear status */
- mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
+ /* Clear bank */
+ mcabank_clear(i);
else if ( who == MCA_MCE_SCAN && need_clear)
mcabanks_set(i, clear_bank);
@@ -601,15 +616,11 @@
void mcheck_mca_clearbanks(struct mca_banks *bankmask)
{
int i;
- uint64_t status;
- for (i = 0; i < 32 && i < nr_mce_banks; i++) {
+ for (i = 0; i < nr_mce_banks; i++) {
if (!mcabanks_test(i, bankmask))
continue;
- status = mca_rdmsr(MSR_IA32_MCx_STATUS(i));
- if (!(status & MCi_STATUS_VAL))
- continue;
- mca_wrmsr(MSR_IA32_MCx_STATUS(i), 0x0ULL);
+ mcabank_clear(i);
}
}
diff -r f69f5836d8c2 -r 3d0ba2b4941f xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu May 12 08:52:47 2011 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Thu May 12 08:53:42 2011 +0100
@@ -903,17 +903,25 @@
else return 0;
}
else if ( who == MCA_MCE_SCAN) {
+ if ( !ser_support )
+ return 0;
+ /*
+ * For fatal error, it shouldn't be cleared so that sticky bank
+ * have chance to be handled after reboot by polling
+ */
+ if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
+ return 0;
/* Spurious need clear bank */
- if ( ser_support && !(status & MCi_STATUS_OVER)
+ else if ( !(status & MCi_STATUS_OVER)
&& (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN))
return 1;
/* SRAR OVER=0 clear bank. OVER = 1 have caused reset */
- else if ( ser_support && (status & MCi_STATUS_UC)
+ else if ( (status & MCi_STATUS_UC)
&& (status & MCi_STATUS_S) && (status & MCi_STATUS_AR )
- && (status & MCi_STATUS_OVER) )
+ && !(status & MCi_STATUS_OVER) )
return 1;
/* SRAO need clear bank */
- else if ( ser_support && !(status & MCi_STATUS_AR)
+ else if ( !(status & MCi_STATUS_AR)
&& (status & MCi_STATUS_S) && (status & MCi_STATUS_UC))
return 1;
else
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|