WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] mce: Provide ERST interface

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] mce: Provide ERST interface
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 26 Aug 2010 03:31:03 -0700
Delivery-date: Thu, 26 Aug 2010 03:35:17 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1282467195 -3600
# Node ID 3bd6d29f143504fc76094d78d8d9f44c53fc7e4a
# Parent  1666addd3d95a2f31b5eba3b2a96d5d11f819faf
mce: Provide ERST interface

This patch is used to provide ERST write/read/clear operation
interface to Xen MCE.

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
---
 xen/arch/x86/cpu/mcheck/Makefile   |    1 
 xen/arch/x86/cpu/mcheck/mce-apei.c |  129 +++++++++++++++++++++++++++++++++++++
 xen/arch/x86/cpu/mcheck/mce.h      |   24 ++++++
 xen/arch/x86/time.c                |    5 +
 xen/include/xen/cper.h             |  113 ++++++++++++++++++++++++++++++++
 5 files changed, 272 insertions(+)

diff -r 1666addd3d95 -r 3bd6d29f1435 xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile  Sun Aug 22 09:52:18 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/Makefile  Sun Aug 22 09:53:15 2010 +0100
@@ -4,6 +4,7 @@ obj-y += amd_f10.o
 obj-y += amd_f10.o
 obj-y += mctelem.o
 obj-y += mce.o
+obj-y += mce-apei.o
 obj-y += mce_intel.o
 obj-y += mce_amd_quirks.o
 obj-y += non-fatal.o
diff -r 1666addd3d95 -r 3bd6d29f1435 xen/arch/x86/cpu/mcheck/mce-apei.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce-apei.c        Sun Aug 22 09:53:15 2010 +0100
@@ -0,0 +1,129 @@
+/*
+ * Bridge between MCE and APEI
+ *
+ * On some machine, corrected memory errors are reported via APEI
+ * generic hardware error source (GHES) instead of corrected Machine
+ * Check. These corrected memory errors can be reported to user space
+ * through /dev/mcelog via faking a corrected Machine Check, so that
+ * the error memory page can be offlined by /sbin/mcelog if the error
+ * count for one page is beyond the threshold.
+ *
+ * For fatal MCE, save MCE record into persistent storage via ERST, so
+ * that the MCE record can be logged after reboot via ERST.
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@xxxxxxxxx>
+ *   Ported by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xen/kernel.h>
+#include <xen/cper.h>
+#include <xen/errno.h>
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+#include "mce.h"
+
+#define CPER_CREATOR_MCE                                               \
+       UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,     \
+               0x64, 0x90, 0xb8, 0x9d)
+#define CPER_SECTION_TYPE_MCE                                          \
+       UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,     \
+               0x04, 0x4a, 0x38, 0xfc)
+
+#pragma pack(1)
+/*
+ * CPER specification (in UEFI specification 2.3 appendix N) requires
+ * byte-packed.
+ */
+struct cper_mce_record {
+       struct cper_record_header hdr;
+       struct cper_section_descriptor sec_hdr;
+       struct mce mce;
+} __packed;
+/* Reset to default packing */
+#pragma pack()
+
+int apei_write_mce(struct mce *m)
+{
+       struct cper_mce_record rcd;
+
+       if (!m)
+               return -EINVAL;
+
+       memset(&rcd, 0, sizeof(rcd));
+       memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
+       rcd.hdr.revision = CPER_RECORD_REV;
+       rcd.hdr.signature_end = CPER_SIG_END;
+       rcd.hdr.section_count = 1;
+       rcd.hdr.error_severity = CPER_SER_FATAL;
+       /* timestamp, platform_id, partition_id are all invalid */
+       rcd.hdr.validation_bits = 0;
+       rcd.hdr.record_length = sizeof(rcd);
+       rcd.hdr.creator_id = CPER_CREATOR_MCE;
+       rcd.hdr.notification_type = CPER_NOTIFY_MCE;
+       rcd.hdr.record_id = cper_next_record_id();
+       rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
+
+       rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
+       rcd.sec_hdr.section_length = sizeof(rcd.mce);
+       rcd.sec_hdr.revision = CPER_SEC_REV;
+       /* fru_id and fru_text is invalid */
+       rcd.sec_hdr.validation_bits = 0;
+       rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
+       rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
+       rcd.sec_hdr.section_severity = CPER_SER_FATAL;
+
+       memcpy(&rcd.mce, m, sizeof(*m));
+
+       return erst_write(&rcd.hdr);
+}
+
+size_t apei_read_mce(struct mce *m, u64 *record_id)
+{
+       struct cper_mce_record rcd;
+       size_t len;
+
+       if (!m || !record_id)
+               return -EINVAL;
+
+       len = erst_read_next(&rcd.hdr, sizeof(rcd));
+       if (len <= 0)
+               return len;
+       /* Can not skip other records in storage via ERST unless clear them */
+       else if (len != sizeof(rcd) ||
+                uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
+               printk(KERN_WARNING
+                       "MCE-APEI: Can not skip the unknown record in ERST");
+               return -EIO;
+       }
+
+       memcpy(m, &rcd.mce, sizeof(*m));
+       *record_id = rcd.hdr.record_id;
+
+       return sizeof(*m);
+}
+
+/* Check whether there is record in ERST */
+int apei_check_mce(void)
+{
+       return erst_get_record_count();
+}
+
+int apei_clear_mce(u64 record_id)
+{
+       return erst_clear(record_id);
+}
diff -r 1666addd3d95 -r 3bd6d29f1435 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Sun Aug 22 09:52:18 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Sun Aug 22 09:53:15 2010 +0100
@@ -186,4 +186,28 @@ static inline int mce_bank_msr(uint32_t 
         return 1;
     return 0;
 }
+
+/* Fields are zero when not available */
+struct mce {
+    __u64 status;
+    __u64 misc;
+    __u64 addr;
+    __u64 mcgstatus;
+    __u64 ip;
+    __u64 tsc;      /* cpu time stamp counter */
+    __u64 time;     /* wall time_t when error was detected */
+    __u8  cpuvendor;        /* cpu vendor as encoded in system.h */
+    __u8  inject_flags;     /* software inject flags */
+    __u16  pad;
+    __u32 cpuid;    /* CPUID 1 EAX */
+    __u8  cs;               /* code segment */
+    __u8  bank;     /* machine check bank */
+    __u8  cpu;      /* cpu number; obsolete; use extcpu now */
+    __u8  finished;   /* entry is valid */
+    __u32 extcpu;   /* linux cpu number that detected the error */
+    __u32 socketid; /* CPU socket ID */
+    __u32 apicid;   /* CPU initial apic ID */
+    __u64 mcgcap;   /* MCGCAP MSR: machine check capabilities of CPU */
+};
+
 #endif /* _MCE_H */
diff -r 1666addd3d95 -r 3bd6d29f1435 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Sun Aug 22 09:52:18 2010 +0100
+++ b/xen/arch/x86/time.c       Sun Aug 22 09:53:15 2010 +0100
@@ -1506,6 +1506,11 @@ unsigned long get_localtime(struct domai
 {
     return wc_sec + (wc_nsec + NOW()) / 1000000000ULL 
         + d->time_offset_seconds;
+}
+
+unsigned long get_sec(void)
+{
+    return wc_sec + (wc_nsec + NOW()) / 1000000000ULL;
 }
 
 /* "cmos_utc_offset" is the difference between UTC time and CMOS time. */
diff -r 1666addd3d95 -r 3bd6d29f1435 xen/include/xen/cper.h
--- a/xen/include/xen/cper.h    Sun Aug 22 09:52:18 2010 +0100
+++ b/xen/include/xen/cper.h    Sun Aug 22 09:53:15 2010 +0100
@@ -23,16 +23,129 @@
 #define LINUX_CPER_H
 
 #include <xen/types.h>
+#include <xen/string.h>
+
+extern unsigned long get_sec(void);
 
 typedef struct {
        __u8 b[16];
 } uuid_le;
+
+static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+{
+        return memcmp(&u1, &u2, sizeof(uuid_le));
+}
+
+static inline u64 cper_next_record_id(void)
+{
+       static u64 record_id;
+
+       if (!record_id)
+               record_id = get_sec() << 32;
+
+       return ++record_id;
+}
+
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)               \
+((uuid_le)                                                             \
+{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
+   (b) & 0xff, ((b) >> 8) & 0xff,                                      \
+   (c) & 0xff, ((c) >> 8) & 0xff,                                      \
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
 /* CPER record signature and the size */
 #define CPER_SIG_RECORD                                "CPER"
 #define CPER_SIG_SIZE                          4
 /* Used in signature_end field in struct cper_record_header */
 #define CPER_SIG_END                           0xffffffff
+
+/*
+ * CPER record header revision, used in revision field in struct
+ * cper_record_header
+ */
+#define CPER_RECORD_REV                                0x0100
+
+/*
+ * Severity difinition for error_severity in struct cper_record_header
+ * and section_severity in struct cper_section_descriptor
+ */
+#define CPER_SER_RECOVERABLE                   0x0
+#define CPER_SER_FATAL                         0x1
+#define CPER_SER_CORRECTED                     0x2
+#define CPER_SER_INFORMATIONAL                 0x3
+
+/*
+ * Notification type used to generate error record, used in
+ * notification_type in struct cper_record_header
+ *
+ * Corrected Machine Check
+ */
+#define CPER_NOTIFY_CMC                                                        
\
+       UUID_LE(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4,     \
+               0xEB, 0xD4, 0xF8, 0x90)
+/* Corrected Platform Error */
+#define CPER_NOTIFY_CPE                                                        
\
+       UUID_LE(0x4E292F96, 0xD843, 0x4a55, 0xA8, 0xC2, 0xD4, 0x81,     \
+               0xF2, 0x7E, 0xBE, 0xEE)
+/* Machine Check Exception */
+#define CPER_NOTIFY_MCE                                                        
\
+       UUID_LE(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB,     \
+               0xE1, 0x49, 0x13, 0xBB)
+/* PCI Express Error */
+#define CPER_NOTIFY_PCIE                                               \
+       UUID_LE(0xCF93C01F, 0x1A16, 0x4dfc, 0xB8, 0xBC, 0x9C, 0x4D,     \
+               0xAF, 0x67, 0xC1, 0x04)
+/* INIT Record (for IPF) */
+#define CPER_NOTIFY_INIT                                               \
+       UUID_LE(0xCC5263E8, 0x9308, 0x454a, 0x89, 0xD0, 0x34, 0x0B,     \
+               0xD3, 0x9B, 0xC9, 0x8E)
+/* Non-Maskable Interrupt */
+#define CPER_NOTIFY_NMI                                                        
\
+       UUID_LE(0x5BAD89FF, 0xB7E6, 0x42c9, 0x81, 0x4A, 0xCF, 0x24,     \
+               0x85, 0xD6, 0xE9, 0x8A)
+/* BOOT Error Record */
+#define CPER_NOTIFY_BOOT                                               \
+       UUID_LE(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62,     \
+               0xD4, 0x64, 0xB3, 0x8F)
+/* DMA Remapping Error */
+#define CPER_NOTIFY_DMAR                                               \
+       UUID_LE(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E,     \
+               0x72, 0x2D, 0xEB, 0x41)
+
+/*
+ * Flags bits definitions for flags in struct cper_record_header
+ * If set, the error has been recovered
+ */
+#define CPER_HW_ERROR_FLAGS_RECOVERED          0x1
+/* If set, the error is for previous boot */
+#define CPER_HW_ERROR_FLAGS_PREVERR            0x2
+/* If set, the error is injected for testing */
+#define CPER_HW_ERROR_FLAGS_SIMULATED          0x4
+
+/*
+ * CPER section header revision, used in revision field in struct
+ * cper_section_descriptor
+ */
+#define CPER_SEC_REV                           0x0100
+
+/*
+ * Validation bits difinition for validation_bits in struct
+ * cper_section_descriptor. If set, corresponding fields in struct
+ * cper_section_descriptor contain valid information.
+ *
+ * corresponds fru_id
+ */
+#define CPER_SEC_VALID_FRU_ID                  0x1
+/* corresponds fru_text */
+#define CPER_SEC_VALID_FRU_TEXT                        0x2
+
+/*
+ * Flags bits definitions for flags in struct cper_section_descriptor
+ *
+ * If set, the section is associated with the error condition
+ * directly, and should be focused on
+ */
+#define CPER_SEC_PRIMARY                       0x0001
 
 /*
  * All tables and structs must be byte-packed to match CPER

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] mce: Provide ERST interface, Xen patchbot-unstable <=