WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [linux-2.6.18-xen] x86: add MCA logging support in DOM0

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [linux-2.6.18-xen] x86: add MCA logging support in DOM0
From: "Xen patchbot-linux-2.6.18-xen" <patchbot-linux-2.6.18-xen@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 16 Jun 2009 09:05:08 -0700
Delivery-date: Tue, 16 Jun 2009 09:05:52 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1245149935 -3600
# Node ID 75e5bfa7fbdc175b1e59f27563545421cbe96cd8
# Parent  9242c5b965c181a4eb095570c51b1cc05bd58a33
x86: add MCA logging support in DOM0

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to DOM0 by XEN. This patch will help to fetch
the xen-logged information by hypercall and then convert XEN-format
log into Linux format MCELOG. It makes using current available mcelog
tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to DOM0,
running mcelog tools in DOM0, you will get same detailed decoded mce
information as in Native Linux.

Signed-Off-By: Liping Ke <liping.ke@xxxxxxxxx>
Signed-Off-By: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
Acked-By: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 arch/x86_64/Kconfig                         |   12 +-
 arch/x86_64/kernel/Makefile                 |    1 
 arch/x86_64/kernel/entry-xen.S              |    9 -
 arch/x86_64/kernel/mce.c                    |   24 ++++-
 arch/x86_64/kernel/mce_dom0.c               |  131 ++++++++++++++++++++++++++++
 include/asm-x86_64/mach-xen/asm/hypercall.h |    9 +
 6 files changed, 170 insertions(+), 16 deletions(-)

diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/Kconfig
--- a/arch/x86_64/Kconfig       Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/Kconfig       Tue Jun 16 11:58:55 2009 +0100
@@ -471,8 +471,8 @@ config SWIOTLB
        bool
 
 config X86_MCE
-       bool "Machine check support" if EMBEDDED
-       depends on !X86_64_XEN
+       bool "Machine check support"
+       depends on (!XEN_UNPRIVILEGED_GUEST)
        default y
        help
           Include a machine check error handler to report hardware errors.
@@ -482,7 +482,7 @@ config X86_MCE
 
 config X86_MCE_INTEL
        bool "Intel MCE features"
-       depends on X86_MCE && X86_LOCAL_APIC
+       depends on X86_MCE && X86_LOCAL_APIC && !X86_64_XEN
        default y
        help
           Additional support for intel specific MCE features such as
@@ -490,11 +490,15 @@ config X86_MCE_INTEL
 
 config X86_MCE_AMD
        bool "AMD MCE features"
-       depends on X86_MCE && X86_LOCAL_APIC
+       depends on X86_MCE && X86_LOCAL_APIC && !X86_64_XEN
        default y
        help
           Additional support for AMD specific MCE features such as
           the DRAM Error Threshold.
+
+config X86_XEN_MCE
+       def_bool y
+       depends on X86_64_XEN && X86_MCE
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/Makefile
--- a/arch/x86_64/kernel/Makefile       Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/kernel/Makefile       Tue Jun 16 11:58:55 2009 +0100
@@ -13,6 +13,7 @@ obj-$(CONFIG_STACKTRACE)      += stacktrace.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)    += mce_intel.o
+obj-$(CONFIG_X86_XEN_MCE)      += mce_dom0.o
 obj-$(CONFIG_X86_MCE_AMD)      += mce_amd.o
 obj-$(CONFIG_MTRR)             += ../../i386/kernel/cpu/mtrr/
 obj-$(CONFIG_ACPI)             += acpi/
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/entry-xen.S
--- a/arch/x86_64/kernel/entry-xen.S    Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/kernel/entry-xen.S    Tue Jun 16 11:58:55 2009 +0100
@@ -1258,13 +1258,8 @@ END(spurious_interrupt_bug)
 
 #ifdef CONFIG_X86_MCE
        /* runs on exception stack */
-ENTRY(machine_check)
-       INTR_FRAME
-       pushq $0
-       CFI_ADJUST_CFA_OFFSET 8 
-       paranoidentry do_machine_check
-       jmp paranoid_exit1
-       CFI_ENDPROC
+KPROBE_ENTRY(machine_check)
+       zeroentry do_machine_check
 END(machine_check)
 #endif
 
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/mce.c
--- a/arch/x86_64/kernel/mce.c  Tue Jun 16 11:09:39 2009 +0100
+++ b/arch/x86_64/kernel/mce.c  Tue Jun 16 11:58:55 2009 +0100
@@ -276,9 +276,16 @@ void do_machine_check(struct pt_regs * r
 
 /*
  * Periodic polling timer for "silent" machine check errors.
- */
-
+ * We will disable polling in DOM0 since all CMCI/Polling
+ * mechanism will be done in XEN for Intel CPUs
+*/
+
+#if defined (CONFIG_X86_XEN_MCE)
+static int check_interval = 0; /* disable polling */
+#else
 static int check_interval = 5 * 60; /* 5 minutes */
+#endif
+
 static void mcheck_timer(void *data);
 static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
 
@@ -367,6 +374,7 @@ static void __cpuinit mce_cpu_quirks(str
 
 static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
 {
+#ifndef CONFIG_X86_64_XEN
        switch (c->x86_vendor) {
        case X86_VENDOR_INTEL:
                mce_intel_feature_init(c);
@@ -377,8 +385,8 @@ static void __cpuinit mce_cpu_features(s
        default:
                break;
        }
-}
-
+#endif
+}
 /* 
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off. 
@@ -649,6 +657,7 @@ static struct notifier_block mce_cpu_not
 };
 #endif
 
+extern void bind_virq_for_mce(void);
 static __init int mce_init_device(void)
 {
        int err;
@@ -664,6 +673,13 @@ static __init int mce_init_device(void)
 
        register_hotcpu_notifier(&mce_cpu_notifier);
        misc_register(&mce_log_device);
+
+    /*Register vIRQ handler for MCE LOG processing*/
+#if defined(CONFIG_X86_XEN_MCE)
+    printk(KERN_DEBUG "MCE: bind virq for DOM0 Logging\n");
+    bind_virq_for_mce();
+#endif
+
        return err;
 }
 
diff -r 9242c5b965c1 -r 75e5bfa7fbdc arch/x86_64/kernel/mce_dom0.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/x86_64/kernel/mce_dom0.c     Tue Jun 16 11:58:55 2009 +0100
@@ -0,0 +1,131 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <xen/evtchn.h>
+#include <xen/interface/vcpu.h>
+#include <asm/hypercall.h>
+#include <asm/mce.h>
+
+static int convert_log(struct mc_info *mi)
+{
+       struct mcinfo_common *mic = NULL;
+       struct mcinfo_global *mc_global;
+       struct mcinfo_bank *mc_bank;
+       struct mce m;
+
+       x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
+       if (mic == NULL)
+       {
+               printk(KERN_ERR "DOM0_MCE_LOG: global data is NULL\n");
+               return -1;
+       }
+
+       mc_global = (struct mcinfo_global*)mic;
+       m.mcgstatus = mc_global->mc_gstatus;
+       m.cpu = mc_global->mc_coreid;/*for test*/
+       x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK);
+       do
+       {
+               if (mic == NULL || mic->size == 0)
+                       break;
+               if (mic->type == MC_TYPE_BANK)
+               {
+                       mc_bank = (struct mcinfo_bank*)mic;
+                       m.misc = mc_bank->mc_misc;
+                       m.status = mc_bank->mc_status;
+                       m.addr = mc_bank->mc_addr;
+                       m.tsc = mc_bank->mc_tsc;
+                       m.res1 = mc_bank->mc_ctrl2;
+                       m.bank = mc_bank->mc_bank;
+                       printk(KERN_DEBUG "[CPU%d, BANK%d, addr %llx, state 
%llx]\n", 
+                                               m.bank, m.cpu, m.addr, 
m.status);
+                       /*log this record*/
+                       mce_log(&m);
+               }
+               mic = x86_mcinfo_next(mic);
+       }while (1);
+
+       return 0;
+}
+
+static struct mc_info *g_mi;
+
+/*dom0 mce virq handler, logging physical mce error info*/
+
+static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id,
+                                                                       struct 
pt_regs *regs)
+{
+       xen_mc_t mc_op;
+       int result = 0;
+
+       printk(KERN_DEBUG "MCE_DOM0_LOG: enter dom0 mce vIRQ handler\n");
+       mc_op.cmd = XEN_MC_fetch;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+       {
+               printk(KERN_DEBUG "MCE_DOM0_LOG: No more urgent data\n");
+               goto nonurgent;
+       }
+       else
+       {
+               result = convert_log(g_mi);
+               if (result) {
+                       printk(KERN_ERR "MCE_DOM0_LOG: Log conversion 
failed\n");
+                       goto end;
+               }
+               /* After fetching the telem from DOM0, we need to dec the 
telem's
+                * refcnt and release the entry. The telem is reserved and inc
+                * refcnt when filling the telem.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+
+               goto urgent;
+       }
+nonurgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+       {
+               printk(KERN_DEBUG "MCE_DOM0_LOG: No more nonurgent data\n");
+               goto end;
+       }
+       else
+       {
+               result = convert_log(g_mi);
+               if (result) {
+                       printk(KERN_ERR "MCE_DOM0_LOG: Log conversion 
failed\n");
+                       goto end;
+               }
+               /* After fetching the telem from DOM0, we need to dec the 
telem's
+                * refcnt and release the entry. The telem is reserved and inc
+                * refcnt when filling the telem.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+
+               goto nonurgent;
+       }
+end:
+       return IRQ_HANDLED;
+}
+
+void bind_virq_for_mce(void)
+{
+       int ret;
+
+       ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0, 
+               mce_dom0_interrupt, 0, "mce", NULL);
+
+       g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+       if (ret < 0)
+               printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+}
+
diff -r 9242c5b965c1 -r 75e5bfa7fbdc include/asm-x86_64/mach-xen/asm/hypercall.h
--- a/include/asm-x86_64/mach-xen/asm/hypercall.h       Tue Jun 16 11:09:39 
2009 +0100
+++ b/include/asm-x86_64/mach-xen/asm/hypercall.h       Tue Jun 16 11:58:55 
2009 +0100
@@ -39,6 +39,7 @@
 
 #include <linux/string.h> /* memcpy() */
 #include <linux/stringify.h>
+#include <xen/interface/arch-x86/xen-mca.h>
 
 #ifndef __HYPERVISOR_H__
 # error "please don't include this file directly"
@@ -215,7 +216,13 @@ HYPERVISOR_platform_op(
        platform_op->interface_version = XENPF_INTERFACE_VERSION;
        return _hypercall1(int, platform_op, platform_op);
 }
-
+static inline int __must_check
+HYPERVISOR_mca(
+       struct xen_mc *mc_op)
+{
+       mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+       return _hypercall1(int, mca, mc_op);
+}
 static inline int __must_check
 HYPERVISOR_set_debugreg(
        unsigned int reg, unsigned long value)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [linux-2.6.18-xen] x86: add MCA logging support in DOM0, Xen patchbot-linux-2.6.18-xen <=