WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Clean up MCA MSR virtualization and vMCE

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Clean up MCA MSR virtualization and vMCE injection
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 19 Apr 2010 18:50:13 -0700
Delivery-date: Mon, 19 Apr 2010 18:51:15 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1271663693 -3600
# Node ID 6233eb0f29ba6b89b4dd14fe3d385e85924cd9f1
# Parent  7ee8bb40200a1922d15036fd8788a364f8aaae8f
Clean up MCA MSR virtualization and vMCE injection

Remove all virtual MCE related work into a seperated file.
It also try to do some clean-up on the vMCE, including:
a) renmae some function name like mce_init_msr/mce_rdmsr to be
   vmce_init_msr/vmce_rdmsr to make it more straightforward,
b) make the vmca_msrs be a pointer in arch_domain,
    to decrease arch_domain's size
c) extract per-bank MCA MSR access to be seperated function
    (bank_mce_wrmsr/bank_mce_rdmsr) to make it be a bit cleaner.
d) A new file xen/include/asm-x86/mce.h  is added for vmce related
header.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
---
 xen/arch/x86/cpu/mcheck/Makefile    |    1 
 xen/arch/x86/cpu/mcheck/mce.c       |  238 ------------------
 xen/arch/x86/cpu/mcheck/mce.h       |   28 ++
 xen/arch/x86/cpu/mcheck/mce_intel.c |  123 ---------
 xen/arch/x86/cpu/mcheck/vmce.c      |  451 ++++++++++++++++++++++++++++++++++++
 xen/arch/x86/domain.c               |    3 
 xen/arch/x86/hvm/hvm.c              |    5 
 xen/arch/x86/traps.c                |   15 -
 xen/common/domain.c                 |    2 
 xen/include/asm-x86/domain.h        |   29 --
 xen/include/asm-x86/mce.h           |   36 ++
 xen/include/asm-x86/traps.h         |    5 
 12 files changed, 540 insertions(+), 396 deletions(-)

diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile  Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/Makefile  Mon Apr 19 08:54:53 2010 +0100
@@ -7,3 +7,4 @@ obj-y += mce_intel.o
 obj-y += mce_intel.o
 obj-y += mce_amd_quirks.o
 obj-y += non-fatal.o
+obj-y += vmce.o
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Mon Apr 19 08:54:53 2010 +0100
@@ -31,11 +31,11 @@ unsigned int nr_mce_banks;
 unsigned int nr_mce_banks;
 
 int mce_broadcast = 0;
-static uint64_t g_mcg_cap;
+uint64_t g_mcg_cap;
 
 /* Real value in physical CTL MSR */
-static uint64_t h_mcg_ctl = 0UL;
-static uint64_t *h_mci_ctrl;
+uint64_t h_mcg_ctl = 0UL;
+uint64_t *h_mci_ctrl;
 int firstbank;
 
 static void intpose_init(void);
@@ -752,234 +752,6 @@ u64 mce_cap_init(void)
     return value;
 }
 
-/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
-void mce_init_msr(struct domain *d)
-{
-    d->arch.vmca_msrs.mcg_status = 0x0;
-    d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
-    d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0;
-    d->arch.vmca_msrs.nr_injection = 0;
-    memset(d->arch.vmca_msrs.mci_ctl, ~0,
-           sizeof(d->arch.vmca_msrs.mci_ctl));
-    INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
-    spin_lock_init(&d->arch.vmca_msrs.lock);
-}
-
-int mce_rdmsr(uint32_t msr, uint64_t *val)
-{
-    struct domain *d = current->domain;
-    int ret = 1;
-    unsigned int bank;
-    struct bank_entry *entry = NULL;
-
-    *val = 0;
-    spin_lock(&d->arch.vmca_msrs.lock);
-
-    switch ( msr )
-    {
-    case MSR_IA32_MCG_STATUS:
-        *val = d->arch.vmca_msrs.mcg_status;
-        if (*val)
-            mce_printk(MCE_VERBOSE,
-                "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val);
-        break;
-    case MSR_IA32_MCG_CAP:
-        *val = d->arch.vmca_msrs.mcg_cap;
-        mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n",
-            *val);
-        break;
-    case MSR_IA32_MCG_CTL:
-        /* Always 0 if no CTL support */
-        *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl;
-        mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n",
-            *val);
-        break;
-    case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
-        bank = (msr - MSR_IA32_MC0_CTL) / 4;
-        if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
-        {
-            mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr);
-            ret = 0;
-            break;
-        }
-        switch (msr & (MSR_IA32_MC0_CTL | 3))
-        {
-        case MSR_IA32_MC0_CTL:
-            *val = d->arch.vmca_msrs.mci_ctl[bank] &
-                    (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
-            mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n",
-                     bank, *val);
-            break;
-        case MSR_IA32_MC0_STATUS:
-            /* Only error bank is read. Non-error banks simply return. */
-            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
-            {
-                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
-                                   struct bank_entry, list);
-                if (entry->bank == bank) {
-                    *val = entry->mci_status;
-                    mce_printk(MCE_VERBOSE,
-                             "MCE: rd MC%u_STATUS in vMCE# context "
-                             "value 0x%"PRIx64"\n", bank, *val);
-                }
-                else
-                    entry = NULL;
-            }
-            break;
-        case MSR_IA32_MC0_ADDR:
-            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
-            {
-                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
-                                   struct bank_entry, list);
-                if ( entry->bank == bank )
-                {
-                    *val = entry->mci_addr;
-                    mce_printk(MCE_VERBOSE,
-                             "MCE: rdmsr MC%u_ADDR in vMCE# context "
-                             "0x%"PRIx64"\n", bank, *val);
-                }
-            }
-            break;
-        case MSR_IA32_MC0_MISC:
-            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
-            {
-                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
-                                   struct bank_entry, list);
-                if ( entry->bank == bank )
-                {
-                    *val = entry->mci_misc;
-                    mce_printk(MCE_VERBOSE,
-                             "MCE: rd MC%u_MISC in vMCE# context "
-                             "0x%"PRIx64"\n", bank, *val);
-                }
-            }
-            break;
-        }
-        break;
-    default:
-        switch ( boot_cpu_data.x86_vendor )
-        {
-        case X86_VENDOR_INTEL:
-            ret = intel_mce_rdmsr(msr, val);
-            break;
-        default:
-            ret = 0;
-            break;
-        }
-        break;
-    }
-
-    spin_unlock(&d->arch.vmca_msrs.lock);
-    return ret;
-}
-
-int mce_wrmsr(u32 msr, u64 val)
-{
-    struct domain *d = current->domain;
-    struct bank_entry *entry = NULL;
-    unsigned int bank;
-    int ret = 1;
-
-    if ( !g_mcg_cap )
-        return 0;
-
-    spin_lock(&d->arch.vmca_msrs.lock);
-
-    switch ( msr )
-    {
-    case MSR_IA32_MCG_CTL:
-        d->arch.vmca_msrs.mcg_ctl = val;
-        break;
-    case MSR_IA32_MCG_STATUS:
-        d->arch.vmca_msrs.mcg_status = val;
-        mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val);
-        /* For HVM guest, this is the point for deleting vMCE injection node */
-        if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) )
-        {
-            d->arch.vmca_msrs.nr_injection--; /* Should be 0 */
-            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
-            {
-                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
-                    struct bank_entry, list);
-                if ( entry->mci_status & MCi_STATUS_VAL )
-                    mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have "
-                                "been cleared before write MCG_STATUS MSR\n");
-
-                mce_printk(MCE_QUIET, "MCE: Delete HVM last injection "
-                                "Node, nr_injection %u\n",
-                                d->arch.vmca_msrs.nr_injection);
-                list_del(&entry->list);
-                xfree(entry);
-            }
-            else
-                mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
-                    " last injection Node, something Wrong!\n");
-        }
-        break;
-    case MSR_IA32_MCG_CAP:
-        mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
-        ret = -1;
-        break;
-    case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
-        bank = (msr - MSR_IA32_MC0_CTL) / 4;
-        if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
-        {
-            mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr);
-            ret = 0;
-            break;
-        }
-        switch ( msr & (MSR_IA32_MC0_CTL | 3) )
-        {
-        case MSR_IA32_MC0_CTL:
-            d->arch.vmca_msrs.mci_ctl[bank] = val;
-            break;
-        case MSR_IA32_MC0_STATUS:
-            /* Give the first entry of the list, it corresponds to current
-             * vMCE# injection. When vMCE# is finished processing by the
-             * the guest, this node will be deleted.
-             * Only error bank is written. Non-error banks simply return.
-             */
-            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
-            {
-                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
-                                   struct bank_entry, list);
-                if ( entry->bank == bank )
-                    entry->mci_status = val;
-                mce_printk(MCE_VERBOSE,
-                         "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
-                         bank, val);
-            }
-            else
-                mce_printk(MCE_VERBOSE,
-                         "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);
-            break;
-        case MSR_IA32_MC0_ADDR:
-            mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank);
-            ret = -1;
-            break;
-        case MSR_IA32_MC0_MISC:
-            mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank);
-            ret = -1;
-            break;
-        }
-        break;
-    default:
-        switch ( boot_cpu_data.x86_vendor )
-        {
-        case X86_VENDOR_INTEL:
-            ret = intel_mce_wrmsr(msr, val);
-            break;
-        default:
-            ret = 0;
-            break;
-        }
-        break;
-    }
-
-    spin_unlock(&d->arch.vmca_msrs.lock);
-    return ret;
-}
-
 static void mcinfo_clear(struct mc_info *mi)
 {
        memset(mi, 0, sizeof(struct mc_info));
@@ -1238,11 +1010,11 @@ int mca_ctl_conflict(struct mcinfo_bank 
         return 1;
 
     /* Will MCE happen in host if If host mcg_ctl is 0? */
-    if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl )
+    if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl )
         return 1;
 
     bank_nr = bank->mc_bank;
-    if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
+    if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
         return 1;
     return 0;
 }
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Mon Apr 19 08:54:53 2010 +0100
@@ -164,4 +164,32 @@ int x86_mcinfo_add(struct mc_info *mi, v
 int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
 void x86_mcinfo_dump(struct mc_info *mi);
 
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+        uint64_t gstatus);
+int inject_vmce(struct domain *d);
+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct 
mcinfo_global *global);
+
+extern uint64_t g_mcg_cap;
+/* Real value in physical CTL MSR */
+extern uint64_t h_mcg_ctl;
+extern uint64_t *h_mci_ctrl;
+
+extern unsigned int nr_mce_banks;
+
+static inline int mce_vendor_bank_msr(uint32_t msr)
+{
+    if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+        (msr > MSR_IA32_MC0_CTL2 && msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks)) )
+          return 1;
+    return 0;
+}
+
+static inline int mce_bank_msr(uint32_t msr)
+{
+    if ( (msr > MSR_IA32_MC0_CTL2 &&
+         msr < (MSR_IA32_MC0_CTL + 4 * nr_mce_banks - 1)) ||
+        mce_vendor_bank_msr(msr) )
+        return 1;
+    return 0;
+}
 #endif /* _MCE_H */
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Mon Apr 19 08:54:53 2010 +0100
@@ -11,6 +11,7 @@
 #include <asm/system.h>
 #include <asm/msr.h>
 #include <asm/p2m.h>
+#include <asm/mce.h>
 #include "mce.h"
 #include "x86_mca.h"
 
@@ -199,126 +200,6 @@ intel_get_extended_msrs(struct mc_info *
     return MCA_EXTINFO_GLOBAL;
 }
 
-/* This node list records errors impacting a domain. when one
- * MCE# happens, one error bank impacts a domain. This error node
- * will be inserted to the tail of the per_dom data for vMCE# MSR
- * virtualization. When one vMCE# injection is finished processing
- * processed by guest, the corresponding node will be deleted. 
- * This node list is for GUEST vMCE# MSRS virtualization.
- */
-static struct bank_entry* alloc_bank_entry(void) {
-    struct bank_entry *entry;
-
-    entry = xmalloc(struct bank_entry);
-    if (!entry) {
-        printk(KERN_ERR "MCE: malloc bank_entry failed\n");
-        return NULL;
-    }
-    memset(entry, 0x0, sizeof(entry));
-    INIT_LIST_HEAD(&entry->list);
-    return entry;
-}
-
-/* Fill error bank info for #vMCE injection and GUEST vMCE#
- * MSR virtualization data
- * 1) Log down how many nr_injections of the impacted.
- * 2) Copy MCE# error bank to impacted DOM node list, 
-      for vMCE# MSRs virtualization
-*/
-
-static int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
-        uint64_t gstatus) {
-    struct bank_entry *entry;
-
-    /* This error bank impacts one domain, we need to fill domain related
-     * data for vMCE MSRs virtualization and vMCE# injection */
-    if (mc_bank->mc_domid != (uint16_t)~0) {
-        /* For HVM guest, Only when first vMCE is consumed by HVM guest 
successfully,
-         * will we generete another node and inject another vMCE
-         */
-        if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection > 0) )
-        {
-            mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous"
-                        " vMCE yet!\n");
-            return -1;
-        }
-        entry = alloc_bank_entry();
-        if (entry == NULL)
-            return -1;
-
-        entry->mci_status = mc_bank->mc_status;
-        entry->mci_addr = mc_bank->mc_addr;
-        entry->mci_misc = mc_bank->mc_misc;
-        entry->bank = mc_bank->mc_bank;
-
-        spin_lock(&d->arch.vmca_msrs.lock);
-        /* New error Node, insert to the tail of the per_dom data */
-        list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header);
-        /* Fill MSR global status */
-        d->arch.vmca_msrs.mcg_status = gstatus;
-        /* New node impact the domain, need another vMCE# injection*/
-        d->arch.vmca_msrs.nr_injection++;
-        spin_unlock(&d->arch.vmca_msrs.lock);
-
-        mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
-                "status %"PRIx64" addr %"PRIx64" domid %d]\n ",
-                mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr,
-                mc_bank->mc_domid);
-    }
-    return 0;
-}
-
-static int inject_mce(struct domain *d)
-{
-    int cpu = smp_processor_id();
-    cpumask_t affinity;
-
-    /* PV guest and HVM guest have different vMCE# injection
-     * methods*/
-
-    if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
-    {
-        if (d->is_hvm)
-        {
-            mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n", 
-                        d->domain_id);
-            vcpu_kick(d->vcpu[0]);
-        }
-        /* PV guest including DOM0 */
-        else
-        {
-            mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n", 
-                        d->domain_id);
-            if (guest_has_trap_callback
-                   (d, 0, TRAP_machine_check))
-            {
-                d->vcpu[0]->cpu_affinity_tmp =
-                        d->vcpu[0]->cpu_affinity;
-                cpus_clear(affinity);
-                cpu_set(cpu, affinity);
-                mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n", 
cpu,
-                            d->vcpu[0]->processor);
-                vcpu_set_affinity(d->vcpu[0], &affinity);
-                vcpu_kick(d->vcpu[0]);
-            }
-            else
-            {
-                mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE 
handler\n");
-                domain_crash(d);
-            }
-        }
-    }
-    else {
-        /* new vMCE comes while first one has not been injected yet,
-         * in this case, inject fail. [We can't lose this vMCE for
-         * the mce node's consistency].
-        */
-        mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected "
-                    " to this DOM%d!\n", d->domain_id);
-        return -1;
-    }
-    return 0;
-}
 
 static void intel_UCR_handler(struct mcinfo_bank *bank,
              struct mcinfo_global *global,
@@ -377,7 +258,7 @@ static void intel_UCR_handler(struct mci
                               return;
                           }
                           /* We will inject vMCE to DOMU*/
-                          if ( inject_mce(d) < 0 )
+                          if ( inject_vmce(d) < 0 )
                           {
                               mce_printk(MCE_QUIET, "inject vMCE to DOM%d"
                                           " failed\n", d->domain_id);
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/vmce.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/vmce.c    Mon Apr 19 08:54:53 2010 +0100
@@ -0,0 +1,451 @@
+/*
+ * vmce.c - virtual MCE support
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/delay.h>
+#include <xen/smp.h>
+#include <xen/mm.h>
+#include <asm/processor.h>
+#include <public/sysctl.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+#include <asm/p2m.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+int vmce_init_msr(struct domain *d)
+{
+    if ( dom_vmce(d) )
+    {
+        dprintk(XENLOG_G_WARNING, "Domain %d has inited vMCE\n", d->domain_id);
+        return 0;
+    }
+
+    /* Allocate the vmca_msrs and mci_ctl togother */
+    dom_vmce(d) = xmalloc(struct domain_mca_msrs);
+    if ( !dom_vmce(d) )
+        return -ENOMEM;
+
+    dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks);
+    if ( !dom_vmce(d)->mci_ctl )
+    {
+        xfree(dom_vmce(d));
+        return -ENOMEM;
+    }
+    memset(d->arch.vmca_msrs->mci_ctl, ~0,
+           sizeof(d->arch.vmca_msrs->mci_ctl));
+
+    dom_vmce(d)->mcg_status = 0x0;
+    dom_vmce(d)->mcg_cap = g_mcg_cap;
+    dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0;
+    dom_vmce(d)->nr_injection = 0;
+
+    INIT_LIST_HEAD(&d->arch.vmca_msrs->impact_header);
+    spin_lock_init(&d->arch.vmca_msrs->lock);
+
+    return 0;
+}
+
+/*
+ * Caller should make sure msr is bank msr */
+static int bank_mce_rdmsr(struct domain *d, uint32_t msr, uint64_t *val)
+{
+    int bank, ret = 1;
+    struct domain_mca_msrs *vmce;
+    struct bank_entry *entry = NULL;
+
+    if (!d)
+        return -EINVAL;
+    vmce = dom_vmce(d);
+    ASSERT(vmce);
+
+    bank = (msr - MSR_IA32_MC0_CTL) / 4;
+    if (bank >= nr_mce_banks)
+        return -1;
+
+    switch (msr & (MSR_IA32_MC0_CTL | 3))
+    {
+    case MSR_IA32_MC0_CTL:
+        *val = vmce->mci_ctl[bank] &
+          (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
+        mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n",
+          bank, *val);
+        break;
+    case MSR_IA32_MC0_STATUS:
+        /* Only error bank is read. Non-error banks simply return. */
+        if ( !list_empty(&vmce->impact_header) )
+        {
+            entry = list_entry(vmce->impact_header.next,
+              struct bank_entry, list);
+            if (entry->bank == bank) {
+                *val = entry->mci_status;
+                mce_printk(MCE_VERBOSE,
+                  "MCE: rd MC%u_STATUS in vMCE# context "
+                  "value 0x%"PRIx64"\n", bank, *val);
+            }
+            else
+                entry = NULL;
+        }
+        break;
+    case MSR_IA32_MC0_ADDR:
+        if ( !list_empty(&vmce->impact_header) )
+        {
+            entry = list_entry(vmce->impact_header.next,
+              struct bank_entry, list);
+            if ( entry->bank == bank )
+            {
+                *val = entry->mci_addr;
+                mce_printk(MCE_VERBOSE,
+                  "MCE: rdmsr MC%u_ADDR in vMCE# context "
+                  "0x%"PRIx64"\n", bank, *val);
+            }
+        }
+        break;
+    case MSR_IA32_MC0_MISC:
+        if ( !list_empty(&vmce->impact_header) )
+        {
+            entry = list_entry(vmce->impact_header.next,
+              struct bank_entry, list);
+            if ( entry->bank == bank )
+            {
+                *val = entry->mci_misc;
+                mce_printk(MCE_VERBOSE,
+                  "MCE: rd MC%u_MISC in vMCE# context "
+                  "0x%"PRIx64"\n", bank, *val);
+            }
+        }
+        break;
+    default:
+        switch ( boot_cpu_data.x86_vendor )
+        {
+            case X86_VENDOR_INTEL:
+                ret = intel_mce_rdmsr(msr, val);
+                break;
+            default:
+                ret = 0;
+                break;
+        }
+        break;
+    }
+
+    return ret;
+}
+
+/*
+ * < 0: Unsupported and will #GP fault to guest
+ * = 0: Not handled, should be handled by other components
+ * > 0: Success
+ */
+int vmce_rdmsr(uint32_t msr, uint64_t *val)
+{
+    struct domain *d = current->domain;
+    struct domain_mca_msrs *vmce;
+    int ret = 1;
+
+    *val = 0;
+
+    vmce = dom_vmce(d);
+    if ( !vmce )
+    {
+        /* XXX more handle here */
+        return 0;
+    }
+
+    spin_lock(&d->arch.vmca_msrs->lock);
+
+    switch ( msr )
+    {
+    case MSR_IA32_MCG_STATUS:
+        *val = vmce->mcg_status;
+        if (*val)
+            mce_printk(MCE_VERBOSE,
+                "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val);
+        break;
+    case MSR_IA32_MCG_CAP:
+        *val = vmce->mcg_cap;
+        mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n",
+            *val);
+        break;
+    case MSR_IA32_MCG_CTL:
+        /* Always 0 if no CTL support */
+        *val = vmce->mcg_ctl & h_mcg_ctl;
+        mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n",
+            *val);
+        break;
+    default:
+        if ( mce_bank_msr(msr) )
+            ret = bank_mce_rdmsr(d, msr, val);
+        else
+            ret = 0;
+        break;
+    }
+
+    spin_unlock(&d->arch.vmca_msrs->lock);
+    return ret;
+}
+
+int bank_mce_wrmsr(struct domain *d, u32 msr, u64 val)
+{
+    int bank, ret = 1;
+    struct domain_mca_msrs *vmce;
+    struct bank_entry *entry = NULL;
+
+    if (!d)
+        return -EINVAL;
+    vmce = dom_vmce(d);
+    ASSERT(vmce && vmce->mci_ctl);
+
+    bank = (msr - MSR_IA32_MC0_CTL) / 4;
+    if (bank >= nr_mce_banks)
+        return -EINVAL;
+
+    switch ( msr & (MSR_IA32_MC0_CTL | 3) )
+    {
+    case MSR_IA32_MC0_CTL:
+        vmce->mci_ctl[bank] = val;
+            break;
+    case MSR_IA32_MC0_STATUS:
+            /* Give the first entry of the list, it corresponds to current
+             * vMCE# injection. When vMCE# is finished processing by the
+             * the guest, this node will be deleted.
+             * Only error bank is written. Non-error banks simply return.
+             */
+            if ( !list_empty(&d->arch.vmca_msrs->impact_header) )
+            {
+                entry = list_entry(d->arch.vmca_msrs->impact_header.next,
+                                   struct bank_entry, list);
+                if ( entry->bank == bank )
+                    entry->mci_status = val;
+                mce_printk(MCE_VERBOSE,
+                         "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
+                         bank, val);
+            }
+            else
+                mce_printk(MCE_VERBOSE,
+                         "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);
+            break;
+    case MSR_IA32_MC0_ADDR:
+            mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank);
+            ret = -1;
+            break;
+    case MSR_IA32_MC0_MISC:
+            mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank);
+            ret = -1;
+            break;
+    default:
+        switch ( boot_cpu_data.x86_vendor )
+        {
+        case X86_VENDOR_INTEL:
+            ret = intel_mce_wrmsr(msr, val);
+            break;
+        default:
+            ret = 0;
+            break;
+        }
+        break;
+    }
+
+    return ret;
+}
+
+/*
+ * < 0: Unsupported and will #GP fault to guest
+ * = 0: Not handled, should be handled by other components
+ * > 0: Success
+ */
+int vmce_wrmsr(u32 msr, u64 val)
+{
+    struct domain *d = current->domain;
+    struct bank_entry *entry = NULL;
+    struct domain_mca_msrs *vmce;
+    int ret = 1;
+
+    if ( !g_mcg_cap )
+        return 0;
+
+    vmce = dom_vmce(d);
+    spin_lock(&vmce->lock);
+
+    switch ( msr )
+    {
+    case MSR_IA32_MCG_CTL:
+        vmce->mcg_ctl = val;
+        break;
+    case MSR_IA32_MCG_STATUS:
+        vmce->mcg_status = val;
+        mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val);
+        /* For HVM guest, this is the point for deleting vMCE injection node */
+        if ( d->is_hvm && (vmce->nr_injection > 0) )
+        {
+            vmce->nr_injection--; /* Should be 0 */
+            if ( !list_empty(&vmce->impact_header) )
+            {
+                entry = list_entry(vmce->impact_header.next,
+                    struct bank_entry, list);
+                if ( entry->mci_status & MCi_STATUS_VAL )
+                    mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have "
+                                "been cleared before write MCG_STATUS MSR\n");
+
+                mce_printk(MCE_QUIET, "MCE: Delete HVM last injection "
+                                "Node, nr_injection %u\n",
+                                vmce->nr_injection);
+                list_del(&entry->list);
+                xfree(entry);
+            }
+            else
+                mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
+                    " last injection Node, something Wrong!\n");
+        }
+        break;
+    case MSR_IA32_MCG_CAP:
+        mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
+        ret = -1;
+        break;
+    default:
+        if ( mce_bank_msr(msr) )
+            ret = bank_mce_wrmsr(d, msr, val);
+        else
+            ret = 0;
+        break;
+    }
+
+    spin_unlock(&vmce->lock);
+    return ret;
+}
+
+int inject_vmce(struct domain *d)
+{
+    int cpu = smp_processor_id();
+    cpumask_t affinity;
+
+    /* PV guest and HVM guest have different vMCE# injection
+     * methods*/
+    if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
+    {
+        if (d->is_hvm)
+        {
+            mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n",
+                        d->domain_id);
+            vcpu_kick(d->vcpu[0]);
+        }
+        /* PV guest including DOM0 */
+        else
+        {
+            mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n",
+                        d->domain_id);
+            if (guest_has_trap_callback
+                   (d, 0, TRAP_machine_check))
+            {
+                d->vcpu[0]->cpu_affinity_tmp =
+                        d->vcpu[0]->cpu_affinity;
+                cpus_clear(affinity);
+                cpu_set(cpu, affinity);
+                mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n", 
cpu,
+                            d->vcpu[0]->processor);
+                vcpu_set_affinity(d->vcpu[0], &affinity);
+                vcpu_kick(d->vcpu[0]);
+            }
+            else
+            {
+                mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE 
handler\n");
+                domain_crash(d);
+            }
+        }
+    }
+    else {
+        /* new vMCE comes while first one has not been injected yet,
+         * in this case, inject fail. [We can't lose this vMCE for
+         * the mce node's consistency].
+        */
+        mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected "
+                    " to this DOM%d!\n", d->domain_id);
+        return -1;
+    }
+    return 0;
+}
+
+/* This node list records errors impacting a domain. when one
+ * MCE# happens, one error bank impacts a domain. This error node
+ * will be inserted to the tail of the per_dom data for vMCE# MSR
+ * virtualization. When one vMCE# injection is finished processing
+ * processed by guest, the corresponding node will be deleted.
+ * This node list is for GUEST vMCE# MSRS virtualization.
+ */
+static struct bank_entry* alloc_bank_entry(void) {
+    struct bank_entry *entry;
+
+    entry = xmalloc(struct bank_entry);
+    if (!entry) {
+        printk(KERN_ERR "MCE: malloc bank_entry failed\n");
+        return NULL;
+    }
+    memset(entry, 0x0, sizeof(entry));
+    INIT_LIST_HEAD(&entry->list);
+    return entry;
+}
+
+/* Fill error bank info for #vMCE injection and GUEST vMCE#
+ * MSR virtualization data
+ * 1) Log down how many nr_injections of the impacted.
+ * 2) Copy MCE# error bank to impacted DOM node list,
+      for vMCE# MSRs virtualization
+*/
+
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+        uint64_t gstatus) {
+    struct bank_entry *entry;
+
+    /* This error bank impacts one domain, we need to fill domain related
+     * data for vMCE MSRs virtualization and vMCE# injection */
+    if (mc_bank->mc_domid != (uint16_t)~0) {
+        /* For HVM guest, Only when first vMCE is consumed by HVM guest 
successfully,
+         * will we generete another node and inject another vMCE
+         */
+        if ( (d->is_hvm) && (d->arch.vmca_msrs->nr_injection > 0) )
+        {
+            mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous"
+                        " vMCE yet!\n");
+            return -1;
+        }
+        entry = alloc_bank_entry();
+        if (entry == NULL)
+            return -1;
+
+        entry->mci_status = mc_bank->mc_status;
+        entry->mci_addr = mc_bank->mc_addr;
+        entry->mci_misc = mc_bank->mc_misc;
+        entry->bank = mc_bank->mc_bank;
+
+        spin_lock(&d->arch.vmca_msrs->lock);
+        /* New error Node, insert to the tail of the per_dom data */
+        list_add_tail(&entry->list, &d->arch.vmca_msrs->impact_header);
+        /* Fill MSR global status */
+        d->arch.vmca_msrs->mcg_status = gstatus;
+        /* New node impact the domain, need another vMCE# injection*/
+        d->arch.vmca_msrs->nr_injection++;
+        spin_unlock(&d->arch.vmca_msrs->lock);
+
+        mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
+                "status %"PRIx64" addr %"PRIx64" domid %d]\n ",
+                mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr,
+                mc_bank->mc_domid);
+    }
+    return 0;
+}
+
+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct 
mcinfo_global *global)
+{
+    int ret;
+
+    ret = fill_vmsr_data(bank, d, global->mc_gstatus);
+    if (ret < 0)
+        return ret;
+
+    return inject_vmce(d);
+}
+
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/domain.c     Mon Apr 19 08:54:53 2010 +0100
@@ -49,6 +49,7 @@
 #include <asm/msr.h>
 #include <asm/traps.h>
 #include <asm/nmi.h>
+#include <asm/mce.h>
 #include <xen/numa.h>
 #include <xen/iommu.h>
 #ifdef CONFIG_COMPAT
@@ -501,7 +502,7 @@ int arch_domain_create(struct domain *d,
             goto fail;
 
         /* For Guest vMCE MSRs virtualization */
-        mce_init_msr(d);
+        vmce_init_msr(d);
     }
 
     if ( is_hvm_domain(d) )
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Mon Apr 19 08:54:53 2010 +0100
@@ -47,6 +47,7 @@
 #include <asm/traps.h>
 #include <asm/mc146818rtc.h>
 #include <asm/spinlock.h>
+#include <asm/mce.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/vpt.h>
 #include <asm/hvm/support.h>
@@ -2061,7 +2062,7 @@ int hvm_msr_read_intercept(struct cpu_us
          break;
 
     default:
-        ret = mce_rdmsr(ecx, &msr_content);
+        ret = vmce_rdmsr(ecx, &msr_content);
         if ( ret < 0 )
             goto gp_fault;
         else if ( ret )
@@ -2160,7 +2161,7 @@ int hvm_msr_write_intercept(struct cpu_u
         break;
 
     default:
-        ret = mce_wrmsr(ecx, msr_content);
+        ret = vmce_wrmsr(ecx, msr_content);
         if ( ret < 0 )
             goto gp_fault;
         else if ( ret )
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/traps.c      Mon Apr 19 08:54:53 2010 +0100
@@ -65,6 +65,7 @@
 #include <asm/traps.h>
 #include <asm/hvm/vpt.h>
 #include <asm/hypercall.h>
+#include <asm/mce.h>
 #include <public/arch-x86/cpuid.h>
 
 /*
@@ -2295,7 +2296,7 @@ static int emulate_privileged_op(struct 
             if ( wrmsr_hypervisor_regs(regs->ecx, val) )
                 break;
 
-            rc = mce_wrmsr(regs->ecx, val);
+            rc = vmce_wrmsr(regs->ecx, val);
             if ( rc < 0 )
                 goto fail;
             if ( rc )
@@ -2388,7 +2389,7 @@ static int emulate_privileged_op(struct 
                 break;
             }
 
-            rc = mce_rdmsr(regs->ecx, &val);
+            rc = vmce_rdmsr(regs->ecx, &val);
             if ( rc < 0 )
                 goto fail;
             if ( rc )
@@ -2947,19 +2948,19 @@ void async_exception_cleanup(struct vcpu
         {
             struct domain *d = curr->domain;
 
-            if ( !d->arch.vmca_msrs.nr_injection )
+            if ( !d->arch.vmca_msrs->nr_injection )
             {
                 printk(XENLOG_WARNING "MCE: ret from vMCE#, "
                        "no injection node\n");
                 goto end;
             }
 
-            d->arch.vmca_msrs.nr_injection--;
-            if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
+            d->arch.vmca_msrs->nr_injection--;
+            if ( !list_empty(&d->arch.vmca_msrs->impact_header) )
             {
                 struct bank_entry *entry;
 
-                entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+                entry = list_entry(d->arch.vmca_msrs->impact_header.next,
                                    struct bank_entry, list);
                 gdprintk(XENLOG_DEBUG, "MCE: delete last injection node\n");
                 list_del(&entry->list);
@@ -2968,7 +2969,7 @@ void async_exception_cleanup(struct vcpu
                 printk(XENLOG_ERR "MCE: didn't found last injection node\n");
 
             /* further injection */
-            if ( d->arch.vmca_msrs.nr_injection > 0 &&
+            if ( d->arch.vmca_msrs->nr_injection > 0 &&
                  guest_has_trap_callback(d, 0, TRAP_machine_check) &&
                  !test_and_set_bool(curr->mce_pending) )
             {
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/common/domain.c
--- a/xen/common/domain.c       Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/common/domain.c       Mon Apr 19 08:54:53 2010 +0100
@@ -616,6 +616,8 @@ static void complete_domain_destroy(stru
 
     xfree(d->pirq_mask);
     xfree(d->pirq_to_evtchn);
+    xfree(dom_vmce(d)->mci_ctl);
+    xfree(dom_vmce(d));
 
     xsm_free_security_domain(d);
     free_domain_struct(d);
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/include/asm-x86/domain.h      Mon Apr 19 08:54:53 2010 +0100
@@ -6,6 +6,7 @@
 #include <asm/hvm/vcpu.h>
 #include <asm/hvm/domain.h>
 #include <asm/e820.h>
+#include <asm/mce.h>
 #include <public/vcpu.h>
 
 #define has_32bit_shinfo(d)    ((d)->arch.has_32bit_shinfo)
@@ -214,32 +215,6 @@ typedef xen_domctl_cpuid_t cpuid_input_t
 typedef xen_domctl_cpuid_t cpuid_input_t;
 
 struct p2m_domain;
-
-/* Define for GUEST MCA handling */
-#define MAX_NR_BANKS 30
-
-/* This entry is for recording bank nodes for the impacted domain,
- * put into impact_header list. */
-struct bank_entry {
-    struct list_head list;
-    uint16_t bank;
-    uint64_t mci_status;
-    uint64_t mci_addr;
-    uint64_t mci_misc;
-};
-
-struct domain_mca_msrs
-{
-    /* Guest should not change below values after DOM boot up */
-    uint64_t mcg_cap;
-    uint64_t mcg_ctl;
-    uint64_t mcg_status;
-    uint64_t mci_ctl[MAX_NR_BANKS];
-    uint16_t nr_injection;
-    struct list_head impact_header;
-    spinlock_t lock;
-};
-
 struct time_scale {
     int shift;
     u32 mul_frac;
@@ -311,7 +286,7 @@ struct arch_domain
     cpuid_input_t cpuids[MAX_CPUID_INPUT];
 
     /* For Guest vMCA handling */
-    struct domain_mca_msrs vmca_msrs;
+    struct domain_mca_msrs *vmca_msrs;
 
     /* TSC management (emulation, pv, scaling, stats) */
     int tsc_mode;            /* see include/asm-x86/time.h */
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/mce.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/mce.h Mon Apr 19 08:54:53 2010 +0100
@@ -0,0 +1,36 @@
+#include <xen/types.h>
+#include <public/arch-x86/xen-mca.h>
+#ifndef _XEN_X86_MCE_H
+#define _XEN_X86_MCE_H
+/* Define for GUEST MCA handling */
+#define MAX_NR_BANKS 30
+
+/* This entry is for recording bank nodes for the impacted domain,
+ * put into impact_header list. */
+struct bank_entry {
+    struct list_head list;
+    uint16_t bank;
+    uint64_t mci_status;
+    uint64_t mci_addr;
+    uint64_t mci_misc;
+};
+
+struct domain_mca_msrs
+{
+    /* Guest should not change below values after DOM boot up */
+    uint64_t mcg_cap;
+    uint64_t mcg_ctl;
+    uint64_t mcg_status;
+    uint64_t *mci_ctl;
+    uint16_t nr_injection;
+    struct list_head impact_header;
+    spinlock_t lock;
+};
+
+#define dom_vmce(x)   ((x)->arch.vmca_msrs)
+
+/* Guest vMCE MSRs virtualization */
+extern int vmce_init_msr(struct domain *d);
+extern int vmce_wrmsr(uint32_t msr, uint64_t val);
+extern int vmce_rdmsr(uint32_t msr, uint64_t *val);
+#endif
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/traps.h
--- a/xen/include/asm-x86/traps.h       Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/include/asm-x86/traps.h       Mon Apr 19 08:54:53 2010 +0100
@@ -49,9 +49,4 @@ extern int send_guest_trap(struct domain
 extern int send_guest_trap(struct domain *d, uint16_t vcpuid,
                                unsigned int trap_nr);
 
-/* Guest vMCE MSRs virtualization */
-extern void mce_init_msr(struct domain *d);
-extern int mce_wrmsr(uint32_t msr, uint64_t val);
-extern int mce_rdmsr(uint32_t msr, uint64_t *val);
-
 #endif /* ASM_TRAP_H */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Clean up MCA MSR virtualization and vMCE injection, Xen patchbot-unstable <=