WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [pvops-dom0] Adding MCA logging support in pv_ops

To: Jan Beulich <JBeulich@xxxxxxxxxx>
Subject: Re: [Xen-devel] [pvops-dom0] Adding MCA logging support in pv_ops
From: "Ke, Liping" <liping.ke@xxxxxxxxx>
Date: Tue, 18 Aug 2009 17:25:02 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: "Kleen, Andi" <andi.kleen@xxxxxxxxx>, Christoph Egger <Christoph.Egger@xxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Delivery-date: Tue, 18 Aug 2009 02:28:12 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4A8A8A290200007800010437@xxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <E2263E4A5B2284449EEBD0AAB751098402CDF6599F@xxxxxxxxxxxxxxxxxxxxxxxxxxxx> <4A7174D3020000780000D350@xxxxxxxxxxxxxxxxxx> <E2263E4A5B2284449EEBD0AAB751098402CDF65BEB@xxxxxxxxxxxxxxxxxxxxxxxxxxxx> <E2263E4A5B2284449EEBD0AAB751098402CDF65C35@xxxxxxxxxxxxxxxxxxxxxxxxxxxx> <4A71D4F8.70804@xxxxxxxx> <E2263E4A5B2284449EEBD0AAB751098402CE0B5780@xxxxxxxxxxxxxxxxxxxxxxxxxxxx> <4A7A0DF7.5040706@xxxxxxxx> <E2263E4A5B2284449EEBD0AAB751098402CE0B6567@xxxxxxxxxxxxxxxxxxxxxxxxxxxx> <4A7C6D67.7090202@xxxxxxxx> <E2263E4A5B2284449EEBD0AAB751098402CE11BCA8@xxxxxxxxxxxxxxxxxxxxxxxxxxxx> <4A8A8A290200007800010437@xxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: Acof4pAXmC58QjQxQB6RYarw66iYbQAAaoCQ
Thread-topic: [Xen-devel] [pvops-dom0] Adding MCA logging support in pv_ops
Hi, Jan
Yes, the added physinfo fetch op should be placed before
registering the vIRQ handler. Thanks for pointing out.
Below is the updated patch.
Regards,
Criping

Jan Beulich wrote:
>>>> "Ke, Liping" <liping.ke@xxxxxxxxx> 10.08.09 05:08 >>>
>> From e0815ca1fe7fbcc2f1de23608148e8d612974f2b Mon Sep 17 00:00:00
>> 2001 From: Liping Ke <liping.ke@xxxxxxxxx>
>> Date: Mon, 10 Aug 2009 11:04:21 +0800
>> Subject: [PATCH] Add MCA Logging support in pv-ops
>>
>> This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc
>>
>> When an MCE/CMCI error happens (or by polling), the related error
>> information will be sent to privileged pv-ops domain by XEN. This
>> patch will help to fetch the xen-logged information by hypercall
>> and then convert XEN-format log into Linux format MCELOG. It makes
>> using current available mcelog tools for native Linux possible.
>>
>> With this patch, after mce/cmci error log information is sent to
>> pv-ops guest, Running mcelog tools in the guest, you will get same
>> detailed decoded mce information as in Native Linux.
>>
>> Signed-off-by: Liping Ke <liping.ke@xxxxxxxxx>
>> Signed-off-by: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
>
> The order of operations in bind_virq_for_mce() needs to change: You
> must not register the vIRQ handler if any of the memory allocations
> failed, otherwise the interrupt handler will de-reference NULL.
>
> Jan

>From c2a612682cd1817c4852492ec077a547cadffbf7 Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@xxxxxxxxx>
Date: Tue, 18 Aug 2009 17:11:36 +0800
Subject: [PATCH] Add MCA Logging support in pv-ops

This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to privileged pv-ops domain by XEN. This
patch will help to fetch the xen-logged information by hypercall
and then convert XEN-format log into Linux format MCELOG. It makes
using current available mcelog tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to
pv-ops guest, Running mcelog tools in the guest, you will get same
detailed decoded mce information as in Native Linux.

Signed-off-by: Liping Ke <liping.ke@xxxxxxxxx>
Signed-off-by: Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
---
 arch/x86/include/asm/xen/hypercall.h |    8 +
 arch/x86/xen/enlighten.c             |    6 +
 drivers/xen/Kconfig                  |    6 +-
 drivers/xen/Makefile                 |    3 +-
 drivers/xen/mce.c                    |  213 +++++++++++++++++
 include/xen/interface/xen-mca.h      |  429 ++++++++++++++++++++++++++++++++++
 6 files changed, 663 insertions(+), 2 deletions(-)
 create mode 100644 drivers/xen/mce.c
 create mode 100644 include/xen/interface/xen-mca.h

diff --git a/arch/x86/include/asm/xen/hypercall.h 
b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>

 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }

 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+       mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+       return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
        return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..634543e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -224,6 +224,12 @@ static __init void xen_init_cpuid_mask(void)
                  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
                  (1 << X86_FEATURE_ACC));   /* thermal monitoring */

+#ifdef CONFIG_XEN_MCE
+       cpuid_leaf1_edx_mask =
+               ~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+                 (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
+
        cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));

        if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate

+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..cd7b185 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)        += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
 obj-$(CONFIG_XENFS)                    += xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)       += sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)                   += acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)  += mce.o
+obj-$(CONFIG_XEN_S3)                   += acpi.o
diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
new file mode 100644
index 0000000..b354dc8
--- /dev/null
+++ b/drivers/xen/mce.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * mce.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+       struct mcinfo_common *mic = NULL;
+       struct mcinfo_global *mc_global;
+       struct mcinfo_bank *mc_bank;
+       struct mce m;
+       int i, found = 0;
+
+       x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+       WARN_ON(!mic);
+
+       mce_setup(&m);
+       mc_global = (struct mcinfo_global *)mic;
+       m.mcgstatus = mc_global->mc_gstatus;
+       m.apicid = mc_global->mc_apicid;
+       for (i = 0; i < ncpus; i++) {
+               if (g_physinfo[i].mc_apicid == m.apicid) {
+                       found = 1;
+                       break;
+               }
+       }
+       WARN_ON(!found);
+
+       m.socketid = g_physinfo[i].mc_chipid;
+       m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+       m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+       x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+       do {
+               if (mic == NULL || mic->size == 0)
+                       break;
+               if (mic->type == MC_TYPE_BANK) {
+                       mc_bank = (struct mcinfo_bank *)mic;
+                       m.misc = mc_bank->mc_misc;
+                       m.status = mc_bank->mc_status;
+                       m.addr = mc_bank->mc_addr;
+                       m.tsc = mc_bank->mc_tsc;
+                       m.bank = mc_bank->mc_bank;
+                       /*log this record*/
+                       mce_log(&m);
+               }
+               mic = x86_mcinfo_next(mic);
+       } while (1);
+
+       return 0;
+}
+
+/*pv_ops domain mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom_interrupt(int irq, void *dev_id)
+{
+       xen_mc_t mc_op;
+       int result = 0;
+
+       mc_op.cmd = XEN_MC_fetch;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+               goto nonurgent;
+       else {
+               result = convert_log(g_mi);
+               if (result)
+                       goto end;
+               /* After fetching the error event log entry from DOM0,
+                * we need to dec the refcnt and release the entry.
+                * The entry is reserved and inc refcnt when filling
+                * the error log entry.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+               goto urgent;
+       }
+nonurgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+               goto end;
+       else {
+               result = convert_log(g_mi);
+               if (result)
+                       goto end;
+               /* After fetching the error event log entry from DOM0,
+                * we need to dec the refcnt and release the entry. The
+                * entry is reserved and inc refcnt when filling the
+                * error log entry.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+               goto nonurgent;
+       }
+end:
+       return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+       int ret;
+       xen_mc_t mc_op;
+
+       g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+       if (!g_mi)
+               return -ENOMEM;
+
+       /* Fetch physical CPU Numbers */
+       mc_op.cmd = XEN_MC_physcpuinfo;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+       ret = HYPERVISOR_mca(&mc_op);
+       if (ret) {
+               printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU 
numbers\n");
+               kfree(g_mi);
+               return ret;
+       }
+
+       /* Fetch each CPU Physical Info for later reference*/
+       ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+       g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus,
+                                       GFP_KERNEL);
+       if (!g_physinfo) {
+               kfree(g_mi);
+               return -ENOMEM;
+       }
+       set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+       ret = HYPERVISOR_mca(&mc_op);
+       if (ret) {
+               printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs 
info\n");
+               kfree(g_mi);
+               kfree(g_physinfo);
+               return ret;
+       }
+
+       ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+               mce_dom_interrupt, 0, "mce", NULL);
+
+       if (ret < 0) {
+               printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int __init mcelog_init(void)
+{
+       /* Only DOM0 is responsible for MCE logging */
+       if (xen_initial_domain())
+               return bind_virq_for_mce();
+
+       return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+       kfree(g_mi);
+       kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..f31fdab
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,429 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@xxxxxxx>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+       uint16_t type;      /* structure type */
+       uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE    (1 << 2)
+#define MC_FLAG_POLLED         (1 << 3)
+#define MC_FLAG_RESET          (1 << 4)
+#define MC_FLAG_CMCI           (1 << 5)
+#define MC_FLAG_MCE            (1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+       struct mcinfo_common common;
+
+       /* running domain at the time in error (most likely
+        * the impacted one) */
+       uint16_t mc_domid;
+       uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+       uint32_t mc_socketid; /* physical socket of the physical core */
+       uint16_t mc_coreid; /* physical impacted core */
+       uint16_t mc_core_threadid; /* core thread of physical core */
+       uint32_t mc_apicid;
+       uint32_t mc_flags;
+       uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+       struct mcinfo_common common;
+
+       uint16_t mc_bank; /* bank nr */
+       uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on
+                       * privileged pv-ops dom and if mc_addr is valid.
+                       * Never valid on DomU. */
+       uint64_t mc_status; /* bank status */
+       uint64_t mc_addr;   /* bank address, only valid
+                                        * if addr bit is set in mc_status */
+       uint64_t mc_misc;
+       uint64_t mc_ctrl2;
+       uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+       uint64_t reg;   /* MSR */
+       uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+       struct mcinfo_common common;
+
+       /* You can fill up to five registers.
+        * If you need more, then use this structure
+        * multiple times. */
+
+       uint32_t mc_msrs; /* Number of msr with valid values. */
+       /*
+        * Currently Intel extended MSR (32/64) include all gp registers
+        * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+        * useful at present. So expand this array to 16/32 to leave room.
+        */
+       struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action {
+       /* Params for passing the offlined page number to DOM0 */
+       uint64_t mfn;
+       uint64_t status;
+};
+
+struct cpu_offline_action {
+       /* Params for passing the identity of the offlined CPU to DOM0 */
+       uint32_t mc_socketid;
+       uint16_t mc_coreid;
+       uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery {
+       struct mcinfo_common common;
+       uint16_t mc_bank; /* bank nr */
+       /* Recovery Action Flags defined above such as REC_ACTION_DONE */
+       uint8_t action_flags;
+       /* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+       uint8_t action_types;
+       /* In future if more than one recovery action permitted per error bank,
+        * a mcinfo_recovery data array will be returned
+        */
+       union {
+               struct page_offline_action page_retire;
+               struct cpu_offline_action cpu_offline;
+               uint8_t pad[MAX_UNION_SIZE];
+       } action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE   1024
+#define MCINFO_MAXSIZE         768
+
+struct mc_info {
+       /* Number of mcinfo_* entries in mi_data */
+       uint32_t mi_nentries;
+       uint32_t _pad0;
+       uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS       7       /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX        0       /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX        1       /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM     2       /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX  3       /* Linux-defined */
+#define MC_CAPS_STD_ECX        4       /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA    5       /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX        6       /* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+       uint32_t mc_cpunr;
+       uint32_t mc_chipid;
+       uint16_t mc_coreid;
+       uint16_t mc_threadid;
+       uint32_t mc_apicid;
+       uint32_t mc_clusterid;
+       uint32_t mc_ncores;
+       uint32_t mc_ncores_active;
+       uint32_t mc_nthreads;
+       int32_t mc_cpuid_level;
+       uint32_t mc_family;
+       uint32_t mc_vendor;
+       uint32_t mc_model;
+       uint32_t mc_step;
+       char mc_vendorid[16];
+       char mc_brandid[64];
+       uint32_t mc_cpu_caps[MC_NCAPS];
+       uint32_t mc_cache_size;
+       uint32_t mc_cache_alignment;
+       int32_t mc_nmsrvals;
+       struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    ((_mi)->mi_nentries)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+       (struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+       uint32_t found = 0, i;
+       struct mcinfo_common *mic;
+
+       *ret = NULL;
+       if (!mi)
+               return;
+       mic = x86_mcinfo_first(mi);
+
+       for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
+               if (mic->type == type) {
+                       found = 1;
+                       break;
+               }
+               mic = x86_mcinfo_next(mic);
+       }
+
+       *ret = found ? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables.
+        * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+        * XEN_MC_ACK if ack'king an earlier fetch
+        * OUT: XEN_MC_OK, XEN_MC_FETCHAILED,
+        * XEN_MC_NODATA, XEN_MC_NOMATCH
+       */
+       uint32_t flags;
+       uint32_t _pad0;
+       /* OUT: id for ack, IN: id we are ack'ing */
+       uint64_t fetch_id;
+
+       /* OUT variables. */
+       GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+       /* IN variables. */
+       uint16_t mc_domid;/* The unprivileged domain to notify. */
+       uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify.
+                       * Usually echo'd value from the fetch hypercall. */
+
+       /* IN/OUT variables. */
+       uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+       /* IN/OUT */
+       uint32_t ncpus;
+       uint32_t _pad0;
+       /* OUT */
+       GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+       /* IN */
+       uint32_t mcinj_cpunr;/* target processor id */
+       uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */
+       uint32_t mcinj_count;/* 0 .. count-1 in array are valid */
+       uint32_t _pad0;
+       struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+       unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+       uint32_t cmd;
+       uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+       union {
+               struct xen_mc_fetch        mc_fetch;
+               struct xen_mc_notifydomain mc_notifydomain;
+               struct xen_mc_physcpuinfo  mc_physcpuinfo;
+               struct xen_mc_msrinject    mc_msrinject;
+               struct xen_mc_mceinject    mc_mceinject;
+       } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
--
1.5.1

Attachment: 0001-Add-MCA-Logging-support-in-pv-ops.patch
Description: 0001-Add-MCA-Logging-support-in-pv-ops.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel