Hi,
This is a xen part patch.
Signed-off-by: Kazuhiro Suzuki <kaz@xxxxxxxxxxxxxx>
Thanks,
KAZ
diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/amd_f10.c
--- a/xen/arch/x86/cpu/mcheck/amd_f10.c Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/cpu/mcheck/amd_f10.c Fri Sep 26 14:30:17 2008 +0900
@@ -82,8 +82,6 @@
}
-extern void k8_machine_check(struct cpu_user_regs *regs, long error_code);
-
/* AMD Family10 machine check */
void amd_f10_mcheck_init(struct cpuinfo_x86 *c)
{
@@ -91,7 +89,7 @@
uint32_t i;
int cpu_nr;
- machine_check_vector = k8_machine_check;
+ machine_check_vector = x86_machine_check;
mc_callback_bank_extended = amd_f10_handler;
cpu_nr = smp_processor_id();
wmb();
diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Sep 26 14:30:17 2008 +0900
@@ -70,219 +70,6 @@
#include "x86_mca.h"
-/* Machine Check Handler for AMD K8 family series */
-void k8_machine_check(struct cpu_user_regs *regs, long error_code)
-{
- struct vcpu *vcpu = current;
- struct domain *curdom;
- struct mc_info *mc_data;
- struct mcinfo_global mc_global;
- struct mcinfo_bank mc_info;
- uint64_t status, addrv, miscv, uc;
- uint32_t i;
- unsigned int cpu_nr;
- uint32_t xen_impacted = 0;
-#define DOM_NORMAL 0
-#define DOM0_TRAP 1
-#define DOMU_TRAP 2
-#define DOMU_KILLED 4
- uint32_t dom_state = DOM_NORMAL;
-
- /* This handler runs as interrupt gate. So IPIs from the
- * polling service routine are defered until we finished.
- */
-
- /* Disable interrupts for the _vcpu_. It may not re-scheduled to
- * an other physical CPU or the impacted process in the guest
- * continues running with corrupted data, otherwise. */
- vcpu_schedule_lock_irq(vcpu);
-
- mc_data = x86_mcinfo_getptr();
- cpu_nr = smp_processor_id();
- curdom = vcpu->domain;
-
- memset(&mc_global, 0, sizeof(mc_global));
- mc_global.common.type = MC_TYPE_GLOBAL;
- mc_global.common.size = sizeof(mc_global);
-
- mc_global.mc_domid = curdom->domain_id; /* impacted domain */
- mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
- BUG_ON(cpu_nr != vcpu->processor);
- mc_global.mc_core_threadid = 0;
- mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
- It's not clear to me how to figure this out. */
- mc_global.mc_socketid = ???;
-#endif
- mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
- rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
-
- /* Quick check, who is impacted */
- xen_impacted = is_idle_domain(curdom);
-
- /* Dom0 */
- x86_mcinfo_clear(mc_data);
- x86_mcinfo_add(mc_data, &mc_global);
-
- for (i = 0; i < nr_mce_banks; i++) {
- struct domain *d;
-
- rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
-
- if (!(status & MCi_STATUS_VAL))
- continue;
-
- /* An error happened in this bank.
- * This is expected to be an uncorrectable error,
- * since correctable errors get polled.
- */
- uc = status & MCi_STATUS_UC;
-
- memset(&mc_info, 0, sizeof(mc_info));
- mc_info.common.type = MC_TYPE_BANK;
- mc_info.common.size = sizeof(mc_info);
- mc_info.mc_bank = i;
- mc_info.mc_status = status;
-
- addrv = 0;
- if (status & MCi_STATUS_ADDRV) {
- rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv);
-
- d = maddr_get_owner(addrv);
- if (d != NULL)
- mc_info.mc_domid = d->domain_id;
- }
-
- miscv = 0;
- if (status & MCi_STATUS_MISCV)
- rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv);
-
- mc_info.mc_addr = addrv;
- mc_info.mc_misc = miscv;
-
- x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */
-
- if (mc_callback_bank_extended)
- mc_callback_bank_extended(mc_data, i, status);
-
- /* clear status */
- wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
-
- status = mc_global.mc_gstatus;
-
- /* clear MCIP or cpu enters shutdown state
- * in case another MCE occurs. */
- status &= ~MCG_STATUS_MCIP;
- wrmsrl(MSR_IA32_MCG_STATUS, status);
- wmb();
-
- /* For the details see the discussion "MCE/MCA concept" on xen-devel.
- * The thread started here:
- *
http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html
- */
-
- /* MCG_STATUS_RIPV:
- * When this bit is not set, then the instruction pointer onto the stack
- * to resume at is not valid. If xen is interrupted, then we panic
anyway
- * right below. Otherwise it is up to the guest to figure out if
- * guest kernel or guest userland is affected and should kill either
- * itself or the affected process.
- */
-
- /* MCG_STATUS_EIPV:
- * Evaluation of EIPV is the job of the guest.
- */
-
- if (xen_impacted) {
- /* Now we are going to panic anyway. Allow interrupts, so that
- * printk on serial console can work. */
- vcpu_schedule_unlock_irq(vcpu);
-
- /* Uh, that means, machine check exception
- * inside Xen occured. */
- printk("Machine check exception occured in Xen.\n");
-
- /* if MCG_STATUS_EIPV indicates, the IP on the stack is related
- * to the error then it makes sense to print a stack trace.
- * That can be useful for more detailed error analysis and/or
- * error case studies to figure out, if we can clear
- * xen_impacted and kill a DomU instead
- * (i.e. if a guest only control structure is affected, but then
- * we must ensure the bad pages are not re-used again).
- */
- if (status & MCG_STATUS_EIPV) {
- printk("MCE: Instruction Pointer is related to the
error. "
- "Therefore, print the execution state.\n");
- show_execution_state(regs);
- }
- x86_mcinfo_dump(mc_data);
- panic("End of MCE. Use mcelog to decode above error codes.\n");
- }
-
- /* If Dom0 registered a machine check handler, which is only possible
- * with a PV MCA driver, then ... */
- if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) {
- dom_state = DOM0_TRAP;
-
- /* ... deliver machine check trap to Dom0. */
- send_guest_trap(dom0, 0, TRAP_machine_check);
-
- /* Xen may tell Dom0 now to notify the DomU.
- * But this will happen through a hypercall. */
- } else
- /* Dom0 did not register a machine check handler, but if DomU
- * did so, then... */
- if ( guest_has_trap_callback(curdom, vcpu->vcpu_id,
TRAP_machine_check) ) {
- dom_state = DOMU_TRAP;
-
- /* ... deliver machine check trap to DomU */
- send_guest_trap(curdom, vcpu->vcpu_id,
TRAP_machine_check);
- } else {
- /* hmm... noone feels responsible to handle the error.
- * So, do a quick check if a DomU is impacted or not.
- */
- if (curdom == dom0) {
- /* Dom0 is impacted. Since noone can't handle
- * this error, panic! */
- x86_mcinfo_dump(mc_data);
- panic("MCE occured in Dom0, which it can't handle\n");
-
- /* UNREACHED */
- } else {
- dom_state = DOMU_KILLED;
-
- /* Enable interrupts. This basically results in
- * calling sti on the *physical* cpu. But after
- * domain_crash() the vcpu pointer is invalid.
- * Therefore, we must unlock the irqs before killing
- * it. */
- vcpu_schedule_unlock_irq(vcpu);
-
- /* DomU is impacted. Kill it and continue. */
- domain_crash(curdom);
- }
- }
-
-
- switch (dom_state) {
- case DOM0_TRAP:
- case DOMU_TRAP:
- /* Enable interrupts. */
- vcpu_schedule_unlock_irq(vcpu);
-
- /* guest softirqs and event callbacks are scheduled
- * immediately after this handler exits. */
- break;
- case DOMU_KILLED:
- /* Nothing to do here. */
- break;
- default:
- BUG();
- }
-}
/* AMD K8 machine check */
@@ -292,7 +79,7 @@
uint32_t i;
int cpu_nr;
- machine_check_vector = k8_machine_check;
+ machine_check_vector = x86_machine_check;
cpu_nr = smp_processor_id();
wmb();
diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/amd_nonfatal.c
--- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Sep 26 14:30:17 2008 +0900
@@ -65,117 +65,12 @@
#include "mce.h"
#include "x86_mca.h"
-static struct timer mce_timer;
+static int hw_threshold = 0;
-#define MCE_PERIOD MILLISECS(15000)
-#define MCE_MIN MILLISECS(2000)
-#define MCE_MAX MILLISECS(30000)
+extern struct timer mce_timer;
-static s_time_t period = MCE_PERIOD;
-static int hw_threshold = 0;
-static int adjust = 0;
-
-/* The polling service routine:
- * Collects information of correctable errors and notifies
- * Dom0 via an event.
- */
-void mce_amd_checkregs(void *info)
-{
- struct vcpu *vcpu = current;
- struct mc_info *mc_data;
- struct mcinfo_global mc_global;
- struct mcinfo_bank mc_info;
- uint64_t status, addrv, miscv;
- unsigned int i;
- unsigned int event_enabled;
- unsigned int cpu_nr;
- int error_found;
-
- /* We don't need a slot yet. Only allocate one on error. */
- mc_data = NULL;
-
- cpu_nr = smp_processor_id();
- event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
- error_found = 0;
-
- memset(&mc_global, 0, sizeof(mc_global));
- mc_global.common.type = MC_TYPE_GLOBAL;
- mc_global.common.size = sizeof(mc_global);
-
- mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
- mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
- BUG_ON(cpu_nr != vcpu->processor);
- mc_global.mc_core_threadid = 0;
- mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
- It's not clear to me how to figure this out. */
- mc_global.mc_socketid = ???;
-#endif
- mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
- rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
-
- for (i = 0; i < nr_mce_banks; i++) {
- struct domain *d;
-
- rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
-
- if (!(status & MCi_STATUS_VAL))
- continue;
-
- if (mc_data == NULL) {
- /* Now we need a slot to fill in error telemetry. */
- mc_data = x86_mcinfo_getptr();
- BUG_ON(mc_data == NULL);
- x86_mcinfo_clear(mc_data);
- x86_mcinfo_add(mc_data, &mc_global);
- }
-
- memset(&mc_info, 0, sizeof(mc_info));
- mc_info.common.type = MC_TYPE_BANK;
- mc_info.common.size = sizeof(mc_info);
- mc_info.mc_bank = i;
- mc_info.mc_status = status;
-
- /* Increase polling frequency */
- error_found = 1;
-
- addrv = 0;
- if (status & MCi_STATUS_ADDRV) {
- rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv);
-
- d = maddr_get_owner(addrv);
- if (d != NULL)
- mc_info.mc_domid = d->domain_id;
- }
-
- miscv = 0;
- if (status & MCi_STATUS_MISCV)
- rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv);
-
- mc_info.mc_addr = addrv;
- mc_info.mc_misc = miscv;
- x86_mcinfo_add(mc_data, &mc_info);
-
- if (mc_callback_bank_extended)
- mc_callback_bank_extended(mc_data, i, status);
-
- /* clear status */
- wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL);
- wmb();
- }
-
- if (error_found > 0) {
- /* If Dom0 enabled the VIRQ_MCA event, then ... */
- if (event_enabled)
- /* ... notify it. */
- send_guest_global_virq(dom0, VIRQ_MCA);
- else
- /* ... or dump it */
- x86_mcinfo_dump(mc_data);
- }
-
- adjust += error_found;
-}
+extern s_time_t period;
+extern int adjust;
/* polling service routine invoker:
* Adjust poll frequency at runtime. No error means slow polling frequency,
@@ -186,7 +81,7 @@
*/
static void mce_amd_work_fn(void *data)
{
- on_each_cpu(mce_amd_checkregs, data, 1, 1);
+ on_each_cpu(x86_mce_checkregs, data, 1, 1);
if (adjust > 0) {
if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) {
diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Sep 26 14:30:17 2008 +0900
@@ -7,6 +7,8 @@
#include <xen/types.h>
#include <xen/kernel.h>
#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/smp.h>
#include <xen/errno.h>
@@ -431,6 +433,226 @@
} while (1);
}
+
+/* Machine Check Handler for AMD K8 family series and Intel P4/Xeon family */
+void x86_machine_check(struct cpu_user_regs *regs, long error_code)
+{
+ struct vcpu *vcpu = current;
+ struct domain *curdom;
+ struct mc_info *mc_data;
+ struct mcinfo_global mc_global;
+ struct mcinfo_bank mc_info;
+ uint64_t status, addrv, miscv, uc;
+ uint32_t i;
+ unsigned int cpu_nr;
+ uint32_t xen_impacted = 0;
+#define DOM_NORMAL 0
+#define DOM0_TRAP 1
+#define DOMU_TRAP 2
+#define DOMU_KILLED 4
+ uint32_t dom_state = DOM_NORMAL;
+
+ /* This handler runs as interrupt gate. So IPIs from the
+ * polling service routine are defered until we finished.
+ */
+
+ /* Disable interrupts for the _vcpu_. It may not re-scheduled to
+ * an other physical CPU or the impacted process in the guest
+ * continues running with corrupted data, otherwise. */
+ vcpu_schedule_lock_irq(vcpu);
+
+ mc_data = x86_mcinfo_getptr();
+ cpu_nr = smp_processor_id();
+ curdom = vcpu->domain;
+
+ memset(&mc_global, 0, sizeof(mc_global));
+ mc_global.common.type = MC_TYPE_GLOBAL;
+ mc_global.common.size = sizeof(mc_global);
+
+ mc_global.mc_domid = curdom->domain_id; /* impacted domain */
+ mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
+ BUG_ON(cpu_nr != vcpu->processor);
+ mc_global.mc_core_threadid = 0;
+ mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
+#if 0 /* TODO: on which socket is this physical core?
+ It's not clear to me how to figure this out. */
+ mc_global.mc_socketid = ???;
+#endif
+ mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
+ rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
+
+ /* Quick check, who is impacted */
+ xen_impacted = is_idle_domain(curdom);
+
+ /* Dom0 */
+ x86_mcinfo_clear(mc_data);
+ x86_mcinfo_add(mc_data, &mc_global);
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ struct domain *d;
+
+ rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
+
+ if (!(status & MCi_STATUS_VAL))
+ continue;
+
+ /* An error happened in this bank.
+ * This is expected to be an uncorrectable error,
+ * since correctable errors get polled.
+ */
+ uc = status & MCi_STATUS_UC;
+
+ memset(&mc_info, 0, sizeof(mc_info));
+ mc_info.common.type = MC_TYPE_BANK;
+ mc_info.common.size = sizeof(mc_info);
+ mc_info.mc_bank = i;
+ mc_info.mc_status = status;
+
+ addrv = 0;
+ if (status & MCi_STATUS_ADDRV) {
+ rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv);
+
+ d = maddr_get_owner(addrv);
+ if (d != NULL)
+ mc_info.mc_domid = d->domain_id;
+ }
+
+ miscv = 0;
+ if (status & MCi_STATUS_MISCV)
+ rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv);
+
+ mc_info.mc_addr = addrv;
+ mc_info.mc_misc = miscv;
+
+ x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */
+
+ if (mc_callback_bank_extended)
+ mc_callback_bank_extended(mc_data, i, status);
+
+ /* clear status */
+ wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
+ wmb();
+ add_taint(TAINT_MACHINE_CHECK);
+ }
+
+ /* Never do anything final for the previous reset */
+ if (!regs) {
+ vcpu_schedule_unlock_irq(vcpu);
+ return;
+ }
+
+ status = mc_global.mc_gstatus;
+
+ /* clear MCIP or cpu enters shutdown state
+ * in case another MCE occurs. */
+ status &= ~MCG_STATUS_MCIP;
+ wrmsrl(MSR_IA32_MCG_STATUS, status);
+ wmb();
+
+ /* For the details see the discussion "MCE/MCA concept" on xen-devel.
+ * The thread started here:
+ *
http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html
+ */
+
+ /* MCG_STATUS_RIPV:
+ * When this bit is not set, then the instruction pointer onto the stack
+ * to resume at is not valid. If xen is interrupted, then we panic
anyway
+ * right below. Otherwise it is up to the guest to figure out if
+ * guest kernel or guest userland is affected and should kill either
+ * itself or the affected process.
+ */
+
+ /* MCG_STATUS_EIPV:
+ * Evaluation of EIPV is the job of the guest.
+ */
+
+ if (xen_impacted) {
+ /* Now we are going to panic anyway. Allow interrupts, so that
+ * printk on serial console can work. */
+ vcpu_schedule_unlock_irq(vcpu);
+
+ /* Uh, that means, machine check exception
+ * inside Xen occured. */
+ printk("Machine check exception occured in Xen.\n");
+
+ /* if MCG_STATUS_EIPV indicates, the IP on the stack is related
+ * to the error then it makes sense to print a stack trace.
+ * That can be useful for more detailed error analysis and/or
+ * error case studies to figure out, if we can clear
+ * xen_impacted and kill a DomU instead
+ * (i.e. if a guest only control structure is affected, but then
+ * we must ensure the bad pages are not re-used again).
+ */
+ if (status & MCG_STATUS_EIPV) {
+ printk("MCE: Instruction Pointer is related to the
error. "
+ "Therefore, print the execution state.\n");
+ show_execution_state(regs);
+ }
+ x86_mcinfo_dump(mc_data);
+ panic("End of MCE. Use mcelog to decode above error codes.\n");
+ }
+
+ /* If Dom0 registered a machine check handler, which is only possible
+ * with a PV MCA driver, then ... */
+ if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) {
+ dom_state = DOM0_TRAP;
+
+ /* ... deliver machine check trap to Dom0. */
+ send_guest_trap(dom0, 0, TRAP_machine_check);
+
+ /* Xen may tell Dom0 now to notify the DomU.
+ * But this will happen through a hypercall. */
+ } else
+ /* Dom0 did not register a machine check handler, but if DomU
+ * did so, then... */
+ if ( guest_has_trap_callback(curdom, vcpu->vcpu_id,
TRAP_machine_check) ) {
+ dom_state = DOMU_TRAP;
+
+ /* ... deliver machine check trap to DomU */
+ send_guest_trap(curdom, vcpu->vcpu_id,
TRAP_machine_check);
+ } else {
+ /* hmm... noone feels responsible to handle the error.
+ * So, do a quick check if a DomU is impacted or not.
+ */
+ if (curdom == dom0) {
+ /* Dom0 is impacted. Since noone can't handle
+ * this error, panic! */
+ x86_mcinfo_dump(mc_data);
+ panic("MCE occured in Dom0, which it can't handle\n");
+
+ /* UNREACHED */
+ } else {
+ dom_state = DOMU_KILLED;
+
+ /* Enable interrupts. This basically results in
+ * calling sti on the *physical* cpu. But after
+ * domain_crash() the vcpu pointer is invalid.
+ * Therefore, we must unlock the irqs before killing
+ * it. */
+ vcpu_schedule_unlock_irq(vcpu);
+
+ /* DomU is impacted. Kill it and continue. */
+ domain_crash(curdom);
+ }
+ }
+
+
+ switch (dom_state) {
+ case DOM0_TRAP:
+ case DOMU_TRAP:
+ /* Enable interrupts. */
+ vcpu_schedule_unlock_irq(vcpu);
+
+ /* guest softirqs and event callbacks are scheduled
+ * immediately after this handler exits. */
+ break;
+ case DOMU_KILLED:
+ /* Nothing to do here. */
+ break;
+ default:
+ BUG();
+ }
+}
/* Machine Check Architecture Hypercall */
diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Fri Sep 26 14:30:17 2008 +0900
@@ -14,16 +14,158 @@
#include <xen/smp.h>
#include <xen/timer.h>
#include <xen/errno.h>
+#include <xen/event.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/msr.h>
#include "mce.h"
+#include "x86_mca.h"
static int firstbank;
-static struct timer mce_timer;
-#define MCE_PERIOD MILLISECS(15000)
+struct timer mce_timer;
+
+s_time_t period = MCE_PERIOD;
+int adjust = 0;
+
+/* The polling service routine:
+ * Collects information of correctable errors and notifies
+ * Dom0 via an event.
+ */
+void x86_mce_checkregs(void *info)
+{
+ struct vcpu *vcpu = current;
+ struct mc_info *mc_data;
+ struct mcinfo_global mc_global;
+ struct mcinfo_bank mc_info;
+ uint64_t status, addrv, miscv;
+ unsigned int i;
+ unsigned int event_enabled;
+ unsigned int cpu_nr;
+ int error_found;
+
+ /* We don't need a slot yet. Only allocate one on error. */
+ mc_data = NULL;
+
+ cpu_nr = smp_processor_id();
+ event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
+ error_found = 0;
+
+ memset(&mc_global, 0, sizeof(mc_global));
+ mc_global.common.type = MC_TYPE_GLOBAL;
+ mc_global.common.size = sizeof(mc_global);
+
+ mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
+ mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
+ BUG_ON(cpu_nr != vcpu->processor);
+ mc_global.mc_core_threadid = 0;
+ mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
+#if 0 /* TODO: on which socket is this physical core?
+ It's not clear to me how to figure this out. */
+ mc_global.mc_socketid = ???;
+#endif
+ mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
+ rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ struct domain *d;
+
+ rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
+
+ if (!(status & MCi_STATUS_VAL))
+ continue;
+
+ if (mc_data == NULL) {
+ /* Now we need a slot to fill in error telemetry. */
+ mc_data = x86_mcinfo_getptr();
+ BUG_ON(mc_data == NULL);
+ x86_mcinfo_clear(mc_data);
+ x86_mcinfo_add(mc_data, &mc_global);
+ }
+
+ memset(&mc_info, 0, sizeof(mc_info));
+ mc_info.common.type = MC_TYPE_BANK;
+ mc_info.common.size = sizeof(mc_info);
+ mc_info.mc_bank = i;
+ mc_info.mc_status = status;
+
+ /* Increase polling frequency */
+ error_found = 1;
+
+ addrv = 0;
+ if (status & MCi_STATUS_ADDRV) {
+ rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv);
+
+ d = maddr_get_owner(addrv);
+ if (d != NULL)
+ mc_info.mc_domid = d->domain_id;
+ }
+
+ miscv = 0;
+ if (status & MCi_STATUS_MISCV)
+ rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv);
+
+ mc_info.mc_addr = addrv;
+ mc_info.mc_misc = miscv;
+ x86_mcinfo_add(mc_data, &mc_info);
+
+ if (mc_callback_bank_extended)
+ mc_callback_bank_extended(mc_data, i, status);
+
+ /* clear status */
+ wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL);
+ wmb();
+ }
+
+ if (error_found > 0) {
+ /* If Dom0 enabled the VIRQ_MCA event, then ... */
+ if (event_enabled)
+ /* ... notify it. */
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ else
+ /* ... or dump it */
+ x86_mcinfo_dump(mc_data);
+ }
+
+ adjust += error_found;
+}
+
+static void p4_mce_work_fn(void *data)
+{
+ on_each_cpu(x86_mce_checkregs, NULL, 1, 1);
+
+ if (adjust > 0) {
+ if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) {
+ /* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
+ printk("MCE: polling routine found correctable error. "
+ " Use mcelog to parse above error output.\n");
+ }
+ }
+
+ if (adjust > 0) {
+ /* Increase polling frequency */
+ adjust++; /* adjust == 1 must have an effect */
+ period /= adjust;
+ } else {
+ /* Decrease polling frequency */
+ period *= 2;
+ }
+ if (period > MCE_MAX) {
+ /* limit: Poll at least every 30s */
+ period = MCE_MAX;
+ }
+ if (period < MCE_MIN) {
+ /* limit: Poll every 2s.
+ * When this is reached an uncorrectable error
+ * is expected to happen, if Dom0 does nothing.
+ */
+ period = MCE_MIN;
+ }
+
+ set_timer(&mce_timer, NOW() + period);
+ adjust = 0;
+}
static void mce_checkregs (void *info)
{
@@ -85,6 +227,11 @@
break;
case X86_VENDOR_INTEL:
+ if (c->x86 == 15) { /* P4/Xeon */
+ init_timer(&mce_timer, p4_mce_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + period);
+ break;
+ }
init_timer(&mce_timer, mce_work_fn, NULL, 0);
set_timer(&mce_timer, NOW() + MCE_PERIOD);
break;
diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/p4.c
--- a/xen/arch/x86/cpu/mcheck/p4.c Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/cpu/mcheck/p4.c Fri Sep 26 14:30:17 2008 +0900
@@ -15,6 +15,7 @@
#include <asm/apic.h>
#include "mce.h"
+#include "x86_mca.h"
/* as supported by the P4/Xeon family */
struct intel_mce_extended_msrs {
@@ -32,6 +33,7 @@
};
static int mce_num_extended_msrs = 0;
+static int mce_bootlog = 1;
#ifdef CONFIG_X86_MCE_P4THERMAL
@@ -158,85 +160,13 @@
return mce_num_extended_msrs;
}
-static fastcall void intel_machine_check(struct cpu_user_regs * regs, long
error_code)
-{
- int recover=1;
- u32 alow, ahigh, high, low;
- u32 mcgstl, mcgsth;
- int i;
- struct intel_mce_extended_msrs dbg;
-
- rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
- if (mcgstl & (1<<0)) /* Recoverable ? */
- recover=0;
-
- printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
- smp_processor_id(), mcgsth, mcgstl);
-
- if (intel_get_extended_msrs(&dbg)) {
- printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
- smp_processor_id(), dbg.eip, dbg.eflags);
- printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx:
%08x\n",
- dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
- printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp:
%08x\n",
- dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
- }
-
- for (i=0; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
- if (high & (1<<31)) {
- if (high & (1<<29))
- recover |= 1;
- if (high & (1<<25))
- recover |= 2;
- printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
- high &= ~(1<<31);
- if (high & (1<<27)) {
- rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- printk ("[%08x%08x]", ahigh, alow);
- }
- if (high & (1<<26)) {
- rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- printk (" at %08x%08x", ahigh, alow);
- }
- printk ("\n");
- }
- }
-
- if (recover & 2)
- panic ("CPU context corrupt");
- if (recover & 1)
- panic ("Unable to continue");
-
- printk(KERN_EMERG "Attempting to continue.\n");
- /*
- * Do not clear the MSR_IA32_MCi_STATUS if the error is not
- * recoverable/continuable.This will allow BIOS to look at the MSRs
- * for errors if the OS could not log the error.
- */
- for (i=0; i<nr_mce_banks; i++) {
- u32 msr;
- msr = MSR_IA32_MC0_STATUS+i*4;
- rdmsr (msr, low, high);
- if (high&(1<<31)) {
- /* Clear it */
- wrmsr(msr, 0UL, 0UL);
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
- mcgstl &= ~(1<<2);
- wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
-}
-
void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
- machine_check_vector = intel_machine_check;
+ machine_check_vector = x86_machine_check;
wmb();
printk (KERN_INFO "Intel machine check architecture supported.\n");
@@ -244,6 +174,10 @@
if (l & (1<<8)) /* Control register present ? */
wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
nr_mce_banks = l & 0xff;
+
+ /* Log the machine checks left over from the previous reset.
+ This also clears all registers */
+ x86_machine_check(NULL, mce_bootlog ? -1 : -2);
for (i=0; i<nr_mce_banks; i++) {
wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
diff -r f4552d9f6afb xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Sep 26 14:30:17 2008 +0900
@@ -70,3 +70,11 @@
/* reserved bits */
#define MCi_STATUS_OTHER_RESERVED2 0x0180000000000000ULL
+/* Polling period */
+#define MCE_PERIOD MILLISECS(15000)
+#define MCE_MIN MILLISECS(2000)
+#define MCE_MAX MILLISECS(30000)
+
+/* Common routines */
+void x86_machine_check(struct cpu_user_regs *regs, long error_code);
+void x86_mce_checkregs(void *info);
diff -r f4552d9f6afb xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Tue Sep 23 17:11:33 2008 +0100
+++ b/xen/arch/x86/traps.c Fri Sep 26 14:30:17 2008 +0900
@@ -713,8 +713,10 @@
__clear_bit(X86_FEATURE_VME, &d);
__clear_bit(X86_FEATURE_PSE, &d);
__clear_bit(X86_FEATURE_PGE, &d);
+#ifndef __x86_64__
__clear_bit(X86_FEATURE_MCE, &d);
__clear_bit(X86_FEATURE_MCA, &d);
+#endif
__clear_bit(X86_FEATURE_PSE36, &d);
}
switch ( (uint32_t)regs->eax )
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|