Provide ERST interface to Xen MCE
This patch is used to provide ERST write/read/clear operation interface to Xen
MCE.
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
b/xen/arch/x86/cpu/mcheck/mce-apei.c | 129 +++++++++++++++++++++++++++++++++++
xen/arch/x86/cpu/mcheck/Makefile | 1
xen/arch/x86/cpu/mcheck/mce.h | 24 ++++++
xen/arch/x86/time.c | 5 +
xen/include/xen/cper.h | 113 ++++++++++++++++++++++++++++++
5 files changed, 272 insertions(+)
diff -r 4c224a1c98c9 xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile Fri Aug 20 17:38:07 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/Makefile Fri Aug 20 20:59:52 2010 +0800
@@ -4,6 +4,7 @@ obj-y += amd_f10.o
obj-y += amd_f10.o
obj-y += mctelem.o
obj-y += mce.o
+obj-y += mce-apei.o
obj-y += mce_intel.o
obj-y += mce_amd_quirks.o
obj-y += non-fatal.o
diff -r 4c224a1c98c9 xen/arch/x86/cpu/mcheck/mce-apei.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce-apei.c Fri Aug 20 20:59:52 2010 +0800
@@ -0,0 +1,129 @@
+/*
+ * Bridge between MCE and APEI
+ *
+ * On some machine, corrected memory errors are reported via APEI
+ * generic hardware error source (GHES) instead of corrected Machine
+ * Check. These corrected memory errors can be reported to user space
+ * through /dev/mcelog via faking a corrected Machine Check, so that
+ * the error memory page can be offlined by /sbin/mcelog if the error
+ * count for one page is beyond the threshold.
+ *
+ * For fatal MCE, save MCE record into persistent storage via ERST, so
+ * that the MCE record can be logged after reboot via ERST.
+ *
+ * Copyright 2010 Intel Corp.
+ * Author: Huang Ying <ying.huang@xxxxxxxxx>
+ * Ported by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/kernel.h>
+#include <xen/cper.h>
+#include <xen/errno.h>
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+#include "mce.h"
+
+#define CPER_CREATOR_MCE \
+ UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_MCE \
+ UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
+
+#pragma pack(1)
+/*
+ * CPER specification (in UEFI specification 2.3 appendix N) requires
+ * byte-packed.
+ */
+struct cper_mce_record {
+ struct cper_record_header hdr;
+ struct cper_section_descriptor sec_hdr;
+ struct mce mce;
+} __packed;
+/* Reset to default packing */
+#pragma pack()
+
+int apei_write_mce(struct mce *m)
+{
+ struct cper_mce_record rcd;
+
+ if (!m)
+ return -EINVAL;
+
+ memset(&rcd, 0, sizeof(rcd));
+ memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+ rcd.hdr.revision = CPER_RECORD_REV;
+ rcd.hdr.signature_end = CPER_SIG_END;
+ rcd.hdr.section_count = 1;
+ rcd.hdr.error_severity = CPER_SER_FATAL;
+ /* timestamp, platform_id, partition_id are all invalid */
+ rcd.hdr.validation_bits = 0;
+ rcd.hdr.record_length = sizeof(rcd);
+ rcd.hdr.creator_id = CPER_CREATOR_MCE;
+ rcd.hdr.notification_type = CPER_NOTIFY_MCE;
+ rcd.hdr.record_id = cper_next_record_id();
+ rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+ rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
+ rcd.sec_hdr.section_length = sizeof(rcd.mce);
+ rcd.sec_hdr.revision = CPER_SEC_REV;
+ /* fru_id and fru_text is invalid */
+ rcd.sec_hdr.validation_bits = 0;
+ rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
+ rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+ rcd.sec_hdr.section_severity = CPER_SER_FATAL;
+
+ memcpy(&rcd.mce, m, sizeof(*m));
+
+ return erst_write(&rcd.hdr);
+}
+
+size_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+ struct cper_mce_record rcd;
+ size_t len;
+
+ if (!m || !record_id)
+ return -EINVAL;
+
+ len = erst_read_next(&rcd.hdr, sizeof(rcd));
+ if (len <= 0)
+ return len;
+ /* Can not skip other records in storage via ERST unless clear them */
+ else if (len != sizeof(rcd) ||
+ uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
+ printk(KERN_WARNING
+ "MCE-APEI: Can not skip the unknown record in ERST");
+ return -EIO;
+ }
+
+ memcpy(m, &rcd.mce, sizeof(*m));
+ *record_id = rcd.hdr.record_id;
+
+ return sizeof(*m);
+}
+
+/* Check whether there is record in ERST */
+int apei_check_mce(void)
+{
+ return erst_get_record_count();
+}
+
+int apei_clear_mce(u64 record_id)
+{
+ return erst_clear(record_id);
+}
diff -r 4c224a1c98c9 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Fri Aug 20 17:38:07 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Aug 20 20:59:52 2010 +0800
@@ -186,4 +186,28 @@ static inline int mce_bank_msr(uint32_t
return 1;
return 0;
}
+
+/* Fields are zero when not available */
+struct mce {
+ __u64 status;
+ __u64 misc;
+ __u64 addr;
+ __u64 mcgstatus;
+ __u64 ip;
+ __u64 tsc; /* cpu time stamp counter */
+ __u64 time; /* wall time_t when error was detected */
+ __u8 cpuvendor; /* cpu vendor as encoded in system.h */
+ __u8 inject_flags; /* software inject flags */
+ __u16 pad;
+ __u32 cpuid; /* CPUID 1 EAX */
+ __u8 cs; /* code segment */
+ __u8 bank; /* machine check bank */
+ __u8 cpu; /* cpu number; obsolete; use extcpu now */
+ __u8 finished; /* entry is valid */
+ __u32 extcpu; /* linux cpu number that detected the error */
+ __u32 socketid; /* CPU socket ID */
+ __u32 apicid; /* CPU initial apic ID */
+ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+};
+
#endif /* _MCE_H */
diff -r 4c224a1c98c9 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Fri Aug 20 17:38:07 2010 +0800
+++ b/xen/arch/x86/time.c Fri Aug 20 20:59:52 2010 +0800
@@ -1501,6 +1501,11 @@ unsigned long get_localtime(struct domai
+ d->time_offset_seconds;
}
+unsigned long get_sec(void)
+{
+ return wc_sec + (wc_nsec + NOW()) / 1000000000ULL;
+}
+
/* "cmos_utc_offset" is the difference between UTC time and CMOS time. */
static long cmos_utc_offset; /* in seconds */
diff -r 4c224a1c98c9 xen/include/xen/cper.h
--- a/xen/include/xen/cper.h Fri Aug 20 17:38:07 2010 +0800
+++ b/xen/include/xen/cper.h Fri Aug 20 20:59:52 2010 +0800
@@ -23,16 +23,129 @@
#define LINUX_CPER_H
#include <xen/types.h>
+#include <xen/string.h>
+
+extern unsigned long get_sec(void);
typedef struct {
__u8 b[16];
} uuid_le;
+
+static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+{
+ return memcmp(&u1, &u2, sizeof(uuid_le));
+}
+
+static inline u64 cper_next_record_id(void)
+{
+ static u64 record_id;
+
+ if (!record_id)
+ record_id = get_sec() << 32;
+
+ return ++record_id;
+}
+
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+((uuid_le) \
+{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+ (b) & 0xff, ((b) >> 8) & 0xff, \
+ (c) & 0xff, ((c) >> 8) & 0xff, \
+ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
/* CPER record signature and the size */
#define CPER_SIG_RECORD "CPER"
#define CPER_SIG_SIZE 4
/* Used in signature_end field in struct cper_record_header */
#define CPER_SIG_END 0xffffffff
+
+/*
+ * CPER record header revision, used in revision field in struct
+ * cper_record_header
+ */
+#define CPER_RECORD_REV 0x0100
+
+/*
+ * Severity difinition for error_severity in struct cper_record_header
+ * and section_severity in struct cper_section_descriptor
+ */
+#define CPER_SER_RECOVERABLE 0x0
+#define CPER_SER_FATAL 0x1
+#define CPER_SER_CORRECTED 0x2
+#define CPER_SER_INFORMATIONAL 0x3
+
+/*
+ * Notification type used to generate error record, used in
+ * notification_type in struct cper_record_header
+ *
+ * Corrected Machine Check
+ */
+#define CPER_NOTIFY_CMC
\
+ UUID_LE(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \
+ 0xEB, 0xD4, 0xF8, 0x90)
+/* Corrected Platform Error */
+#define CPER_NOTIFY_CPE
\
+ UUID_LE(0x4E292F96, 0xD843, 0x4a55, 0xA8, 0xC2, 0xD4, 0x81, \
+ 0xF2, 0x7E, 0xBE, 0xEE)
+/* Machine Check Exception */
+#define CPER_NOTIFY_MCE
\
+ UUID_LE(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \
+ 0xE1, 0x49, 0x13, 0xBB)
+/* PCI Express Error */
+#define CPER_NOTIFY_PCIE \
+ UUID_LE(0xCF93C01F, 0x1A16, 0x4dfc, 0xB8, 0xBC, 0x9C, 0x4D, \
+ 0xAF, 0x67, 0xC1, 0x04)
+/* INIT Record (for IPF) */
+#define CPER_NOTIFY_INIT \
+ UUID_LE(0xCC5263E8, 0x9308, 0x454a, 0x89, 0xD0, 0x34, 0x0B, \
+ 0xD3, 0x9B, 0xC9, 0x8E)
+/* Non-Maskable Interrupt */
+#define CPER_NOTIFY_NMI
\
+ UUID_LE(0x5BAD89FF, 0xB7E6, 0x42c9, 0x81, 0x4A, 0xCF, 0x24, \
+ 0x85, 0xD6, 0xE9, 0x8A)
+/* BOOT Error Record */
+#define CPER_NOTIFY_BOOT \
+ UUID_LE(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \
+ 0xD4, 0x64, 0xB3, 0x8F)
+/* DMA Remapping Error */
+#define CPER_NOTIFY_DMAR \
+ UUID_LE(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \
+ 0x72, 0x2D, 0xEB, 0x41)
+
+/*
+ * Flags bits definitions for flags in struct cper_record_header
+ * If set, the error has been recovered
+ */
+#define CPER_HW_ERROR_FLAGS_RECOVERED 0x1
+/* If set, the error is for previous boot */
+#define CPER_HW_ERROR_FLAGS_PREVERR 0x2
+/* If set, the error is injected for testing */
+#define CPER_HW_ERROR_FLAGS_SIMULATED 0x4
+
+/*
+ * CPER section header revision, used in revision field in struct
+ * cper_section_descriptor
+ */
+#define CPER_SEC_REV 0x0100
+
+/*
+ * Validation bits difinition for validation_bits in struct
+ * cper_section_descriptor. If set, corresponding fields in struct
+ * cper_section_descriptor contain valid information.
+ *
+ * corresponds fru_id
+ */
+#define CPER_SEC_VALID_FRU_ID 0x1
+/* corresponds fru_text */
+#define CPER_SEC_VALID_FRU_TEXT 0x2
+
+/*
+ * Flags bits definitions for flags in struct cper_section_descriptor
+ *
+ * If set, the section is associated with the error condition
+ * directly, and should be focused on
+ */
+#define CPER_SEC_PRIMARY 0x0001
/*
* All tables and structs must be byte-packed to match CPER
mce-erst-2.patch
Description: mce-erst-2.patch
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|