# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1271663693 -3600
# Node ID 6233eb0f29ba6b89b4dd14fe3d385e85924cd9f1
# Parent 7ee8bb40200a1922d15036fd8788a364f8aaae8f
Clean up MCA MSR virtualization and vMCE injection
Remove all virtual MCE related work into a seperated file.
It also try to do some clean-up on the vMCE, including:
a) renmae some function name like mce_init_msr/mce_rdmsr to be
vmce_init_msr/vmce_rdmsr to make it more straightforward,
b) make the vmca_msrs be a pointer in arch_domain,
to decrease arch_domain's size
c) extract per-bank MCA MSR access to be seperated function
(bank_mce_wrmsr/bank_mce_rdmsr) to make it be a bit cleaner.
d) A new file xen/include/asm-x86/mce.h is added for vmce related
header.
Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
---
xen/arch/x86/cpu/mcheck/Makefile | 1
xen/arch/x86/cpu/mcheck/mce.c | 238 ------------------
xen/arch/x86/cpu/mcheck/mce.h | 28 ++
xen/arch/x86/cpu/mcheck/mce_intel.c | 123 ---------
xen/arch/x86/cpu/mcheck/vmce.c | 451 ++++++++++++++++++++++++++++++++++++
xen/arch/x86/domain.c | 3
xen/arch/x86/hvm/hvm.c | 5
xen/arch/x86/traps.c | 15 -
xen/common/domain.c | 2
xen/include/asm-x86/domain.h | 29 --
xen/include/asm-x86/mce.h | 36 ++
xen/include/asm-x86/traps.h | 5
12 files changed, 540 insertions(+), 396 deletions(-)
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/Makefile Mon Apr 19 08:54:53 2010 +0100
@@ -7,3 +7,4 @@ obj-y += mce_intel.o
obj-y += mce_intel.o
obj-y += mce_amd_quirks.o
obj-y += non-fatal.o
+obj-y += vmce.o
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Apr 19 08:54:53 2010 +0100
@@ -31,11 +31,11 @@ unsigned int nr_mce_banks;
unsigned int nr_mce_banks;
int mce_broadcast = 0;
-static uint64_t g_mcg_cap;
+uint64_t g_mcg_cap;
/* Real value in physical CTL MSR */
-static uint64_t h_mcg_ctl = 0UL;
-static uint64_t *h_mci_ctrl;
+uint64_t h_mcg_ctl = 0UL;
+uint64_t *h_mci_ctrl;
int firstbank;
static void intpose_init(void);
@@ -752,234 +752,6 @@ u64 mce_cap_init(void)
return value;
}
-/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
-void mce_init_msr(struct domain *d)
-{
- d->arch.vmca_msrs.mcg_status = 0x0;
- d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
- d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0;
- d->arch.vmca_msrs.nr_injection = 0;
- memset(d->arch.vmca_msrs.mci_ctl, ~0,
- sizeof(d->arch.vmca_msrs.mci_ctl));
- INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
- spin_lock_init(&d->arch.vmca_msrs.lock);
-}
-
-int mce_rdmsr(uint32_t msr, uint64_t *val)
-{
- struct domain *d = current->domain;
- int ret = 1;
- unsigned int bank;
- struct bank_entry *entry = NULL;
-
- *val = 0;
- spin_lock(&d->arch.vmca_msrs.lock);
-
- switch ( msr )
- {
- case MSR_IA32_MCG_STATUS:
- *val = d->arch.vmca_msrs.mcg_status;
- if (*val)
- mce_printk(MCE_VERBOSE,
- "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val);
- break;
- case MSR_IA32_MCG_CAP:
- *val = d->arch.vmca_msrs.mcg_cap;
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n",
- *val);
- break;
- case MSR_IA32_MCG_CTL:
- /* Always 0 if no CTL support */
- *val = d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl;
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n",
- *val);
- break;
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
- bank = (msr - MSR_IA32_MC0_CTL) / 4;
- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
- {
- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr);
- ret = 0;
- break;
- }
- switch (msr & (MSR_IA32_MC0_CTL | 3))
- {
- case MSR_IA32_MC0_CTL:
- *val = d->arch.vmca_msrs.mci_ctl[bank] &
- (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
- mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n",
- bank, *val);
- break;
- case MSR_IA32_MC0_STATUS:
- /* Only error bank is read. Non-error banks simply return. */
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if (entry->bank == bank) {
- *val = entry->mci_status;
- mce_printk(MCE_VERBOSE,
- "MCE: rd MC%u_STATUS in vMCE# context "
- "value 0x%"PRIx64"\n", bank, *val);
- }
- else
- entry = NULL;
- }
- break;
- case MSR_IA32_MC0_ADDR:
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- {
- *val = entry->mci_addr;
- mce_printk(MCE_VERBOSE,
- "MCE: rdmsr MC%u_ADDR in vMCE# context "
- "0x%"PRIx64"\n", bank, *val);
- }
- }
- break;
- case MSR_IA32_MC0_MISC:
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- {
- *val = entry->mci_misc;
- mce_printk(MCE_VERBOSE,
- "MCE: rd MC%u_MISC in vMCE# context "
- "0x%"PRIx64"\n", bank, *val);
- }
- }
- break;
- }
- break;
- default:
- switch ( boot_cpu_data.x86_vendor )
- {
- case X86_VENDOR_INTEL:
- ret = intel_mce_rdmsr(msr, val);
- break;
- default:
- ret = 0;
- break;
- }
- break;
- }
-
- spin_unlock(&d->arch.vmca_msrs.lock);
- return ret;
-}
-
-int mce_wrmsr(u32 msr, u64 val)
-{
- struct domain *d = current->domain;
- struct bank_entry *entry = NULL;
- unsigned int bank;
- int ret = 1;
-
- if ( !g_mcg_cap )
- return 0;
-
- spin_lock(&d->arch.vmca_msrs.lock);
-
- switch ( msr )
- {
- case MSR_IA32_MCG_CTL:
- d->arch.vmca_msrs.mcg_ctl = val;
- break;
- case MSR_IA32_MCG_STATUS:
- d->arch.vmca_msrs.mcg_status = val;
- mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val);
- /* For HVM guest, this is the point for deleting vMCE injection node */
- if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) )
- {
- d->arch.vmca_msrs.nr_injection--; /* Should be 0 */
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->mci_status & MCi_STATUS_VAL )
- mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have "
- "been cleared before write MCG_STATUS MSR\n");
-
- mce_printk(MCE_QUIET, "MCE: Delete HVM last injection "
- "Node, nr_injection %u\n",
- d->arch.vmca_msrs.nr_injection);
- list_del(&entry->list);
- xfree(entry);
- }
- else
- mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
- " last injection Node, something Wrong!\n");
- }
- break;
- case MSR_IA32_MCG_CAP:
- mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
- ret = -1;
- break;
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1:
- bank = (msr - MSR_IA32_MC0_CTL) / 4;
- if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) )
- {
- mce_printk(MCE_QUIET, "MCE: MSR %x is not MCA MSR\n", msr);
- ret = 0;
- break;
- }
- switch ( msr & (MSR_IA32_MC0_CTL | 3) )
- {
- case MSR_IA32_MC0_CTL:
- d->arch.vmca_msrs.mci_ctl[bank] = val;
- break;
- case MSR_IA32_MC0_STATUS:
- /* Give the first entry of the list, it corresponds to current
- * vMCE# injection. When vMCE# is finished processing by the
- * the guest, this node will be deleted.
- * Only error bank is written. Non-error banks simply return.
- */
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
- {
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
- struct bank_entry, list);
- if ( entry->bank == bank )
- entry->mci_status = val;
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
- bank, val);
- }
- else
- mce_printk(MCE_VERBOSE,
- "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);
- break;
- case MSR_IA32_MC0_ADDR:
- mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank);
- ret = -1;
- break;
- case MSR_IA32_MC0_MISC:
- mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank);
- ret = -1;
- break;
- }
- break;
- default:
- switch ( boot_cpu_data.x86_vendor )
- {
- case X86_VENDOR_INTEL:
- ret = intel_mce_wrmsr(msr, val);
- break;
- default:
- ret = 0;
- break;
- }
- break;
- }
-
- spin_unlock(&d->arch.vmca_msrs.lock);
- return ret;
-}
-
static void mcinfo_clear(struct mc_info *mi)
{
memset(mi, 0, sizeof(struct mc_info));
@@ -1238,11 +1010,11 @@ int mca_ctl_conflict(struct mcinfo_bank
return 1;
/* Will MCE happen in host if If host mcg_ctl is 0? */
- if ( ~d->arch.vmca_msrs.mcg_ctl & h_mcg_ctl )
+ if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl )
return 1;
bank_nr = bank->mc_bank;
- if (~d->arch.vmca_msrs.mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
+ if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
return 1;
return 0;
}
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.h Mon Apr 19 08:54:53 2010 +0100
@@ -164,4 +164,32 @@ int x86_mcinfo_add(struct mc_info *mi, v
int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
void x86_mcinfo_dump(struct mc_info *mi);
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+ uint64_t gstatus);
+int inject_vmce(struct domain *d);
+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct
mcinfo_global *global);
+
+extern uint64_t g_mcg_cap;
+/* Real value in physical CTL MSR */
+extern uint64_t h_mcg_ctl;
+extern uint64_t *h_mci_ctrl;
+
+extern unsigned int nr_mce_banks;
+
+static inline int mce_vendor_bank_msr(uint32_t msr)
+{
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ (msr > MSR_IA32_MC0_CTL2 && msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks)) )
+ return 1;
+ return 0;
+}
+
+static inline int mce_bank_msr(uint32_t msr)
+{
+ if ( (msr > MSR_IA32_MC0_CTL2 &&
+ msr < (MSR_IA32_MC0_CTL + 4 * nr_mce_banks - 1)) ||
+ mce_vendor_bank_msr(msr) )
+ return 1;
+ return 0;
+}
#endif /* _MCE_H */
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Apr 19 08:54:53 2010 +0100
@@ -11,6 +11,7 @@
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/p2m.h>
+#include <asm/mce.h>
#include "mce.h"
#include "x86_mca.h"
@@ -199,126 +200,6 @@ intel_get_extended_msrs(struct mc_info *
return MCA_EXTINFO_GLOBAL;
}
-/* This node list records errors impacting a domain. when one
- * MCE# happens, one error bank impacts a domain. This error node
- * will be inserted to the tail of the per_dom data for vMCE# MSR
- * virtualization. When one vMCE# injection is finished processing
- * processed by guest, the corresponding node will be deleted.
- * This node list is for GUEST vMCE# MSRS virtualization.
- */
-static struct bank_entry* alloc_bank_entry(void) {
- struct bank_entry *entry;
-
- entry = xmalloc(struct bank_entry);
- if (!entry) {
- printk(KERN_ERR "MCE: malloc bank_entry failed\n");
- return NULL;
- }
- memset(entry, 0x0, sizeof(entry));
- INIT_LIST_HEAD(&entry->list);
- return entry;
-}
-
-/* Fill error bank info for #vMCE injection and GUEST vMCE#
- * MSR virtualization data
- * 1) Log down how many nr_injections of the impacted.
- * 2) Copy MCE# error bank to impacted DOM node list,
- for vMCE# MSRs virtualization
-*/
-
-static int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
- uint64_t gstatus) {
- struct bank_entry *entry;
-
- /* This error bank impacts one domain, we need to fill domain related
- * data for vMCE MSRs virtualization and vMCE# injection */
- if (mc_bank->mc_domid != (uint16_t)~0) {
- /* For HVM guest, Only when first vMCE is consumed by HVM guest
successfully,
- * will we generete another node and inject another vMCE
- */
- if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection > 0) )
- {
- mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous"
- " vMCE yet!\n");
- return -1;
- }
- entry = alloc_bank_entry();
- if (entry == NULL)
- return -1;
-
- entry->mci_status = mc_bank->mc_status;
- entry->mci_addr = mc_bank->mc_addr;
- entry->mci_misc = mc_bank->mc_misc;
- entry->bank = mc_bank->mc_bank;
-
- spin_lock(&d->arch.vmca_msrs.lock);
- /* New error Node, insert to the tail of the per_dom data */
- list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header);
- /* Fill MSR global status */
- d->arch.vmca_msrs.mcg_status = gstatus;
- /* New node impact the domain, need another vMCE# injection*/
- d->arch.vmca_msrs.nr_injection++;
- spin_unlock(&d->arch.vmca_msrs.lock);
-
- mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
- "status %"PRIx64" addr %"PRIx64" domid %d]\n ",
- mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr,
- mc_bank->mc_domid);
- }
- return 0;
-}
-
-static int inject_mce(struct domain *d)
-{
- int cpu = smp_processor_id();
- cpumask_t affinity;
-
- /* PV guest and HVM guest have different vMCE# injection
- * methods*/
-
- if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
- {
- if (d->is_hvm)
- {
- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n",
- d->domain_id);
- vcpu_kick(d->vcpu[0]);
- }
- /* PV guest including DOM0 */
- else
- {
- mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n",
- d->domain_id);
- if (guest_has_trap_callback
- (d, 0, TRAP_machine_check))
- {
- d->vcpu[0]->cpu_affinity_tmp =
- d->vcpu[0]->cpu_affinity;
- cpus_clear(affinity);
- cpu_set(cpu, affinity);
- mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n",
cpu,
- d->vcpu[0]->processor);
- vcpu_set_affinity(d->vcpu[0], &affinity);
- vcpu_kick(d->vcpu[0]);
- }
- else
- {
- mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE
handler\n");
- domain_crash(d);
- }
- }
- }
- else {
- /* new vMCE comes while first one has not been injected yet,
- * in this case, inject fail. [We can't lose this vMCE for
- * the mce node's consistency].
- */
- mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected "
- " to this DOM%d!\n", d->domain_id);
- return -1;
- }
- return 0;
-}
static void intel_UCR_handler(struct mcinfo_bank *bank,
struct mcinfo_global *global,
@@ -377,7 +258,7 @@ static void intel_UCR_handler(struct mci
return;
}
/* We will inject vMCE to DOMU*/
- if ( inject_mce(d) < 0 )
+ if ( inject_vmce(d) < 0 )
{
mce_printk(MCE_QUIET, "inject vMCE to DOM%d"
" failed\n", d->domain_id);
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/cpu/mcheck/vmce.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/vmce.c Mon Apr 19 08:54:53 2010 +0100
@@ -0,0 +1,451 @@
+/*
+ * vmce.c - virtual MCE support
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/delay.h>
+#include <xen/smp.h>
+#include <xen/mm.h>
+#include <asm/processor.h>
+#include <public/sysctl.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+#include <asm/p2m.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+int vmce_init_msr(struct domain *d)
+{
+ if ( dom_vmce(d) )
+ {
+ dprintk(XENLOG_G_WARNING, "Domain %d has inited vMCE\n", d->domain_id);
+ return 0;
+ }
+
+ /* Allocate the vmca_msrs and mci_ctl togother */
+ dom_vmce(d) = xmalloc(struct domain_mca_msrs);
+ if ( !dom_vmce(d) )
+ return -ENOMEM;
+
+ dom_vmce(d)->mci_ctl = xmalloc_array(uint64_t, nr_mce_banks);
+ if ( !dom_vmce(d)->mci_ctl )
+ {
+ xfree(dom_vmce(d));
+ return -ENOMEM;
+ }
+ memset(d->arch.vmca_msrs->mci_ctl, ~0,
+ sizeof(d->arch.vmca_msrs->mci_ctl));
+
+ dom_vmce(d)->mcg_status = 0x0;
+ dom_vmce(d)->mcg_cap = g_mcg_cap;
+ dom_vmce(d)->mcg_ctl = ~(uint64_t)0x0;
+ dom_vmce(d)->nr_injection = 0;
+
+ INIT_LIST_HEAD(&d->arch.vmca_msrs->impact_header);
+ spin_lock_init(&d->arch.vmca_msrs->lock);
+
+ return 0;
+}
+
+/*
+ * Caller should make sure msr is bank msr */
+static int bank_mce_rdmsr(struct domain *d, uint32_t msr, uint64_t *val)
+{
+ int bank, ret = 1;
+ struct domain_mca_msrs *vmce;
+ struct bank_entry *entry = NULL;
+
+ if (!d)
+ return -EINVAL;
+ vmce = dom_vmce(d);
+ ASSERT(vmce);
+
+ bank = (msr - MSR_IA32_MC0_CTL) / 4;
+ if (bank >= nr_mce_banks)
+ return -1;
+
+ switch (msr & (MSR_IA32_MC0_CTL | 3))
+ {
+ case MSR_IA32_MC0_CTL:
+ *val = vmce->mci_ctl[bank] &
+ (h_mci_ctrl ? h_mci_ctrl[bank] : ~0UL);
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MC%u_CTL 0x%"PRIx64"\n",
+ bank, *val);
+ break;
+ case MSR_IA32_MC0_STATUS:
+ /* Only error bank is read. Non-error banks simply return. */
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if (entry->bank == bank) {
+ *val = entry->mci_status;
+ mce_printk(MCE_VERBOSE,
+ "MCE: rd MC%u_STATUS in vMCE# context "
+ "value 0x%"PRIx64"\n", bank, *val);
+ }
+ else
+ entry = NULL;
+ }
+ break;
+ case MSR_IA32_MC0_ADDR:
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == bank )
+ {
+ *val = entry->mci_addr;
+ mce_printk(MCE_VERBOSE,
+ "MCE: rdmsr MC%u_ADDR in vMCE# context "
+ "0x%"PRIx64"\n", bank, *val);
+ }
+ }
+ break;
+ case MSR_IA32_MC0_MISC:
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == bank )
+ {
+ *val = entry->mci_misc;
+ mce_printk(MCE_VERBOSE,
+ "MCE: rd MC%u_MISC in vMCE# context "
+ "0x%"PRIx64"\n", bank, *val);
+ }
+ }
+ break;
+ default:
+ switch ( boot_cpu_data.x86_vendor )
+ {
+ case X86_VENDOR_INTEL:
+ ret = intel_mce_rdmsr(msr, val);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * < 0: Unsupported and will #GP fault to guest
+ * = 0: Not handled, should be handled by other components
+ * > 0: Success
+ */
+int vmce_rdmsr(uint32_t msr, uint64_t *val)
+{
+ struct domain *d = current->domain;
+ struct domain_mca_msrs *vmce;
+ int ret = 1;
+
+ *val = 0;
+
+ vmce = dom_vmce(d);
+ if ( !vmce )
+ {
+ /* XXX more handle here */
+ return 0;
+ }
+
+ spin_lock(&d->arch.vmca_msrs->lock);
+
+ switch ( msr )
+ {
+ case MSR_IA32_MCG_STATUS:
+ *val = vmce->mcg_status;
+ if (*val)
+ mce_printk(MCE_VERBOSE,
+ "MCE: rdmsr MCG_STATUS 0x%"PRIx64"\n", *val);
+ break;
+ case MSR_IA32_MCG_CAP:
+ *val = vmce->mcg_cap;
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CAP 0x%"PRIx64"\n",
+ *val);
+ break;
+ case MSR_IA32_MCG_CTL:
+ /* Always 0 if no CTL support */
+ *val = vmce->mcg_ctl & h_mcg_ctl;
+ mce_printk(MCE_VERBOSE, "MCE: rdmsr MCG_CTL 0x%"PRIx64"\n",
+ *val);
+ break;
+ default:
+ if ( mce_bank_msr(msr) )
+ ret = bank_mce_rdmsr(d, msr, val);
+ else
+ ret = 0;
+ break;
+ }
+
+ spin_unlock(&d->arch.vmca_msrs->lock);
+ return ret;
+}
+
+int bank_mce_wrmsr(struct domain *d, u32 msr, u64 val)
+{
+ int bank, ret = 1;
+ struct domain_mca_msrs *vmce;
+ struct bank_entry *entry = NULL;
+
+ if (!d)
+ return -EINVAL;
+ vmce = dom_vmce(d);
+ ASSERT(vmce && vmce->mci_ctl);
+
+ bank = (msr - MSR_IA32_MC0_CTL) / 4;
+ if (bank >= nr_mce_banks)
+ return -EINVAL;
+
+ switch ( msr & (MSR_IA32_MC0_CTL | 3) )
+ {
+ case MSR_IA32_MC0_CTL:
+ vmce->mci_ctl[bank] = val;
+ break;
+ case MSR_IA32_MC0_STATUS:
+ /* Give the first entry of the list, it corresponds to current
+ * vMCE# injection. When vMCE# is finished processing by the
+ * the guest, this node will be deleted.
+ * Only error bank is written. Non-error banks simply return.
+ */
+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) )
+ {
+ entry = list_entry(d->arch.vmca_msrs->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == bank )
+ entry->mci_status = val;
+ mce_printk(MCE_VERBOSE,
+ "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
+ bank, val);
+ }
+ else
+ mce_printk(MCE_VERBOSE,
+ "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);
+ break;
+ case MSR_IA32_MC0_ADDR:
+ mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank);
+ ret = -1;
+ break;
+ case MSR_IA32_MC0_MISC:
+ mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank);
+ ret = -1;
+ break;
+ default:
+ switch ( boot_cpu_data.x86_vendor )
+ {
+ case X86_VENDOR_INTEL:
+ ret = intel_mce_wrmsr(msr, val);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * < 0: Unsupported and will #GP fault to guest
+ * = 0: Not handled, should be handled by other components
+ * > 0: Success
+ */
+int vmce_wrmsr(u32 msr, u64 val)
+{
+ struct domain *d = current->domain;
+ struct bank_entry *entry = NULL;
+ struct domain_mca_msrs *vmce;
+ int ret = 1;
+
+ if ( !g_mcg_cap )
+ return 0;
+
+ vmce = dom_vmce(d);
+ spin_lock(&vmce->lock);
+
+ switch ( msr )
+ {
+ case MSR_IA32_MCG_CTL:
+ vmce->mcg_ctl = val;
+ break;
+ case MSR_IA32_MCG_STATUS:
+ vmce->mcg_status = val;
+ mce_printk(MCE_VERBOSE, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", val);
+ /* For HVM guest, this is the point for deleting vMCE injection node */
+ if ( d->is_hvm && (vmce->nr_injection > 0) )
+ {
+ vmce->nr_injection--; /* Should be 0 */
+ if ( !list_empty(&vmce->impact_header) )
+ {
+ entry = list_entry(vmce->impact_header.next,
+ struct bank_entry, list);
+ if ( entry->mci_status & MCi_STATUS_VAL )
+ mce_printk(MCE_QUIET, "MCE: MCi_STATUS MSR should have "
+ "been cleared before write MCG_STATUS MSR\n");
+
+ mce_printk(MCE_QUIET, "MCE: Delete HVM last injection "
+ "Node, nr_injection %u\n",
+ vmce->nr_injection);
+ list_del(&entry->list);
+ xfree(entry);
+ }
+ else
+ mce_printk(MCE_QUIET, "MCE: Not found HVM guest"
+ " last injection Node, something Wrong!\n");
+ }
+ break;
+ case MSR_IA32_MCG_CAP:
+ mce_printk(MCE_QUIET, "MCE: MCG_CAP is read-only\n");
+ ret = -1;
+ break;
+ default:
+ if ( mce_bank_msr(msr) )
+ ret = bank_mce_wrmsr(d, msr, val);
+ else
+ ret = 0;
+ break;
+ }
+
+ spin_unlock(&vmce->lock);
+ return ret;
+}
+
+int inject_vmce(struct domain *d)
+{
+ int cpu = smp_processor_id();
+ cpumask_t affinity;
+
+ /* PV guest and HVM guest have different vMCE# injection
+ * methods*/
+ if ( !test_and_set_bool(d->vcpu[0]->mce_pending) )
+ {
+ if (d->is_hvm)
+ {
+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to HVM DOM %d\n",
+ d->domain_id);
+ vcpu_kick(d->vcpu[0]);
+ }
+ /* PV guest including DOM0 */
+ else
+ {
+ mce_printk(MCE_VERBOSE, "MCE: inject vMCE to PV DOM%d\n",
+ d->domain_id);
+ if (guest_has_trap_callback
+ (d, 0, TRAP_machine_check))
+ {
+ d->vcpu[0]->cpu_affinity_tmp =
+ d->vcpu[0]->cpu_affinity;
+ cpus_clear(affinity);
+ cpu_set(cpu, affinity);
+ mce_printk(MCE_VERBOSE, "MCE: CPU%d set affinity, old %d\n",
cpu,
+ d->vcpu[0]->processor);
+ vcpu_set_affinity(d->vcpu[0], &affinity);
+ vcpu_kick(d->vcpu[0]);
+ }
+ else
+ {
+ mce_printk(MCE_VERBOSE, "MCE: Kill PV guest with No MCE
handler\n");
+ domain_crash(d);
+ }
+ }
+ }
+ else {
+ /* new vMCE comes while first one has not been injected yet,
+ * in this case, inject fail. [We can't lose this vMCE for
+ * the mce node's consistency].
+ */
+ mce_printk(MCE_QUIET, "There's a pending vMCE waiting to be injected "
+ " to this DOM%d!\n", d->domain_id);
+ return -1;
+ }
+ return 0;
+}
+
+/* This node list records errors impacting a domain. when one
+ * MCE# happens, one error bank impacts a domain. This error node
+ * will be inserted to the tail of the per_dom data for vMCE# MSR
+ * virtualization. When one vMCE# injection is finished processing
+ * processed by guest, the corresponding node will be deleted.
+ * This node list is for GUEST vMCE# MSRS virtualization.
+ */
+static struct bank_entry* alloc_bank_entry(void) {
+ struct bank_entry *entry;
+
+ entry = xmalloc(struct bank_entry);
+ if (!entry) {
+ printk(KERN_ERR "MCE: malloc bank_entry failed\n");
+ return NULL;
+ }
+ memset(entry, 0x0, sizeof(entry));
+ INIT_LIST_HEAD(&entry->list);
+ return entry;
+}
+
+/* Fill error bank info for #vMCE injection and GUEST vMCE#
+ * MSR virtualization data
+ * 1) Log down how many nr_injections of the impacted.
+ * 2) Copy MCE# error bank to impacted DOM node list,
+ for vMCE# MSRs virtualization
+*/
+
+int fill_vmsr_data(struct mcinfo_bank *mc_bank, struct domain *d,
+ uint64_t gstatus) {
+ struct bank_entry *entry;
+
+ /* This error bank impacts one domain, we need to fill domain related
+ * data for vMCE MSRs virtualization and vMCE# injection */
+ if (mc_bank->mc_domid != (uint16_t)~0) {
+ /* For HVM guest, Only when first vMCE is consumed by HVM guest
successfully,
+ * will we generete another node and inject another vMCE
+ */
+ if ( (d->is_hvm) && (d->arch.vmca_msrs->nr_injection > 0) )
+ {
+ mce_printk(MCE_QUIET, "MCE: HVM guest has not handled previous"
+ " vMCE yet!\n");
+ return -1;
+ }
+ entry = alloc_bank_entry();
+ if (entry == NULL)
+ return -1;
+
+ entry->mci_status = mc_bank->mc_status;
+ entry->mci_addr = mc_bank->mc_addr;
+ entry->mci_misc = mc_bank->mc_misc;
+ entry->bank = mc_bank->mc_bank;
+
+ spin_lock(&d->arch.vmca_msrs->lock);
+ /* New error Node, insert to the tail of the per_dom data */
+ list_add_tail(&entry->list, &d->arch.vmca_msrs->impact_header);
+ /* Fill MSR global status */
+ d->arch.vmca_msrs->mcg_status = gstatus;
+ /* New node impact the domain, need another vMCE# injection*/
+ d->arch.vmca_msrs->nr_injection++;
+ spin_unlock(&d->arch.vmca_msrs->lock);
+
+ mce_printk(MCE_VERBOSE,"MCE: Found error @[BANK%d "
+ "status %"PRIx64" addr %"PRIx64" domid %d]\n ",
+ mc_bank->mc_bank, mc_bank->mc_status, mc_bank->mc_addr,
+ mc_bank->mc_domid);
+ }
+ return 0;
+}
+
+int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct
mcinfo_global *global)
+{
+ int ret;
+
+ ret = fill_vmsr_data(bank, d, global->mc_gstatus);
+ if (ret < 0)
+ return ret;
+
+ return inject_vmce(d);
+}
+
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/domain.c Mon Apr 19 08:54:53 2010 +0100
@@ -49,6 +49,7 @@
#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/nmi.h>
+#include <asm/mce.h>
#include <xen/numa.h>
#include <xen/iommu.h>
#ifdef CONFIG_COMPAT
@@ -501,7 +502,7 @@ int arch_domain_create(struct domain *d,
goto fail;
/* For Guest vMCE MSRs virtualization */
- mce_init_msr(d);
+ vmce_init_msr(d);
}
if ( is_hvm_domain(d) )
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c Mon Apr 19 08:54:53 2010 +0100
@@ -47,6 +47,7 @@
#include <asm/traps.h>
#include <asm/mc146818rtc.h>
#include <asm/spinlock.h>
+#include <asm/mce.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/vpt.h>
#include <asm/hvm/support.h>
@@ -2061,7 +2062,7 @@ int hvm_msr_read_intercept(struct cpu_us
break;
default:
- ret = mce_rdmsr(ecx, &msr_content);
+ ret = vmce_rdmsr(ecx, &msr_content);
if ( ret < 0 )
goto gp_fault;
else if ( ret )
@@ -2160,7 +2161,7 @@ int hvm_msr_write_intercept(struct cpu_u
break;
default:
- ret = mce_wrmsr(ecx, msr_content);
+ ret = vmce_wrmsr(ecx, msr_content);
if ( ret < 0 )
goto gp_fault;
else if ( ret )
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/arch/x86/traps.c Mon Apr 19 08:54:53 2010 +0100
@@ -65,6 +65,7 @@
#include <asm/traps.h>
#include <asm/hvm/vpt.h>
#include <asm/hypercall.h>
+#include <asm/mce.h>
#include <public/arch-x86/cpuid.h>
/*
@@ -2295,7 +2296,7 @@ static int emulate_privileged_op(struct
if ( wrmsr_hypervisor_regs(regs->ecx, val) )
break;
- rc = mce_wrmsr(regs->ecx, val);
+ rc = vmce_wrmsr(regs->ecx, val);
if ( rc < 0 )
goto fail;
if ( rc )
@@ -2388,7 +2389,7 @@ static int emulate_privileged_op(struct
break;
}
- rc = mce_rdmsr(regs->ecx, &val);
+ rc = vmce_rdmsr(regs->ecx, &val);
if ( rc < 0 )
goto fail;
if ( rc )
@@ -2947,19 +2948,19 @@ void async_exception_cleanup(struct vcpu
{
struct domain *d = curr->domain;
- if ( !d->arch.vmca_msrs.nr_injection )
+ if ( !d->arch.vmca_msrs->nr_injection )
{
printk(XENLOG_WARNING "MCE: ret from vMCE#, "
"no injection node\n");
goto end;
}
- d->arch.vmca_msrs.nr_injection--;
- if ( !list_empty(&d->arch.vmca_msrs.impact_header) )
+ d->arch.vmca_msrs->nr_injection--;
+ if ( !list_empty(&d->arch.vmca_msrs->impact_header) )
{
struct bank_entry *entry;
- entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ entry = list_entry(d->arch.vmca_msrs->impact_header.next,
struct bank_entry, list);
gdprintk(XENLOG_DEBUG, "MCE: delete last injection node\n");
list_del(&entry->list);
@@ -2968,7 +2969,7 @@ void async_exception_cleanup(struct vcpu
printk(XENLOG_ERR "MCE: didn't found last injection node\n");
/* further injection */
- if ( d->arch.vmca_msrs.nr_injection > 0 &&
+ if ( d->arch.vmca_msrs->nr_injection > 0 &&
guest_has_trap_callback(d, 0, TRAP_machine_check) &&
!test_and_set_bool(curr->mce_pending) )
{
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/common/domain.c
--- a/xen/common/domain.c Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/common/domain.c Mon Apr 19 08:54:53 2010 +0100
@@ -616,6 +616,8 @@ static void complete_domain_destroy(stru
xfree(d->pirq_mask);
xfree(d->pirq_to_evtchn);
+ xfree(dom_vmce(d)->mci_ctl);
+ xfree(dom_vmce(d));
xsm_free_security_domain(d);
free_domain_struct(d);
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/include/asm-x86/domain.h Mon Apr 19 08:54:53 2010 +0100
@@ -6,6 +6,7 @@
#include <asm/hvm/vcpu.h>
#include <asm/hvm/domain.h>
#include <asm/e820.h>
+#include <asm/mce.h>
#include <public/vcpu.h>
#define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo)
@@ -214,32 +215,6 @@ typedef xen_domctl_cpuid_t cpuid_input_t
typedef xen_domctl_cpuid_t cpuid_input_t;
struct p2m_domain;
-
-/* Define for GUEST MCA handling */
-#define MAX_NR_BANKS 30
-
-/* This entry is for recording bank nodes for the impacted domain,
- * put into impact_header list. */
-struct bank_entry {
- struct list_head list;
- uint16_t bank;
- uint64_t mci_status;
- uint64_t mci_addr;
- uint64_t mci_misc;
-};
-
-struct domain_mca_msrs
-{
- /* Guest should not change below values after DOM boot up */
- uint64_t mcg_cap;
- uint64_t mcg_ctl;
- uint64_t mcg_status;
- uint64_t mci_ctl[MAX_NR_BANKS];
- uint16_t nr_injection;
- struct list_head impact_header;
- spinlock_t lock;
-};
-
struct time_scale {
int shift;
u32 mul_frac;
@@ -311,7 +286,7 @@ struct arch_domain
cpuid_input_t cpuids[MAX_CPUID_INPUT];
/* For Guest vMCA handling */
- struct domain_mca_msrs vmca_msrs;
+ struct domain_mca_msrs *vmca_msrs;
/* TSC management (emulation, pv, scaling, stats) */
int tsc_mode; /* see include/asm-x86/time.h */
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/mce.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/mce.h Mon Apr 19 08:54:53 2010 +0100
@@ -0,0 +1,36 @@
+#include <xen/types.h>
+#include <public/arch-x86/xen-mca.h>
+#ifndef _XEN_X86_MCE_H
+#define _XEN_X86_MCE_H
+/* Define for GUEST MCA handling */
+#define MAX_NR_BANKS 30
+
+/* This entry is for recording bank nodes for the impacted domain,
+ * put into impact_header list. */
+struct bank_entry {
+ struct list_head list;
+ uint16_t bank;
+ uint64_t mci_status;
+ uint64_t mci_addr;
+ uint64_t mci_misc;
+};
+
+struct domain_mca_msrs
+{
+ /* Guest should not change below values after DOM boot up */
+ uint64_t mcg_cap;
+ uint64_t mcg_ctl;
+ uint64_t mcg_status;
+ uint64_t *mci_ctl;
+ uint16_t nr_injection;
+ struct list_head impact_header;
+ spinlock_t lock;
+};
+
+#define dom_vmce(x) ((x)->arch.vmca_msrs)
+
+/* Guest vMCE MSRs virtualization */
+extern int vmce_init_msr(struct domain *d);
+extern int vmce_wrmsr(uint32_t msr, uint64_t val);
+extern int vmce_rdmsr(uint32_t msr, uint64_t *val);
+#endif
diff -r 7ee8bb40200a -r 6233eb0f29ba xen/include/asm-x86/traps.h
--- a/xen/include/asm-x86/traps.h Thu Apr 15 19:11:16 2010 +0100
+++ b/xen/include/asm-x86/traps.h Mon Apr 19 08:54:53 2010 +0100
@@ -49,9 +49,4 @@ extern int send_guest_trap(struct domain
extern int send_guest_trap(struct domain *d, uint16_t vcpuid,
unsigned int trap_nr);
-/* Guest vMCE MSRs virtualization */
-extern void mce_init_msr(struct domain *d);
-extern int mce_wrmsr(uint32_t msr, uint64_t val);
-extern int mce_rdmsr(uint32_t msr, uint64_t *val);
-
#endif /* ASM_TRAP_H */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|