# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID b9b411b505878f7600871c68de989d33c1994cf7
# Parent fcc833cbaf827327ad9e9348c93aaf7f99f0253f
Upgrade arch/x86/cpu/* files to their equivalents in
linux-2.6.16-rc2/arch/i386/kernel/cpu/*.
Also include kernel taint tracking and include that
information, and Xen release info, in our crash dumps.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/Makefile Tue Feb 14 15:23:43 2006
@@ -4,9 +4,10 @@
OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S))
OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c))
OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c))
-OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c))
OBJS += $(patsubst %.c,%.o,$(wildcard genapic/*.c))
OBJS += $(patsubst %.c,%.o,$(wildcard cpu/*.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard cpu/mcheck/*.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard cpu/mtrr/*.c))
OBJS += $(patsubst %.c,%.o,$(wildcard hvm/*.c))
OBJS += $(patsubst %.c,%.o,$(wildcard hvm/vmx/*.c))
OBJS += $(patsubst %.S,%.o,$(wildcard hvm/vmx/$(TARGET_SUBARCH)/*.S))
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/apic.c Tue Feb 14 15:23:43 2006
@@ -927,7 +927,7 @@
return 1;
}
-void smp_apic_timer_interrupt(struct cpu_user_regs * regs)
+fastcall void smp_apic_timer_interrupt(struct cpu_user_regs * regs)
{
ack_APIC_irq();
perfc_incrc(apic_timer);
@@ -937,7 +937,7 @@
/*
* This interrupt should _never_ happen with our APIC/SMP architecture
*/
-asmlinkage void smp_spurious_interrupt(struct cpu_user_regs *regs)
+fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs)
{
unsigned long v;
@@ -959,7 +959,7 @@
* This interrupt should never happen with our APIC/SMP architecture
*/
-asmlinkage void smp_error_interrupt(struct cpu_user_regs *regs)
+fastcall void smp_error_interrupt(struct cpu_user_regs *regs)
{
unsigned long v, v1;
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/amd.c Tue Feb 14 15:23:43 2006
@@ -47,6 +47,22 @@
u32 l, h;
int mbytes = num_physpages >> (20-PAGE_SHIFT);
int r;
+
+#ifdef CONFIG_SMP
+ unsigned long long value;
+
+ /* Disable TLB flush filter by setting HWCR.FFDIS on K8
+ * bit 6 of msr C001_0015
+ *
+ * Errata 63 for SH-B3 steppings
+ * Errata 122 for all steppings (F+ have it disabled by default)
+ */
+ if (c->x86 == 15) {
+ rdmsrl(MSR_K7_HWCR, value);
+ value |= 1 << 6;
+ wrmsrl(MSR_K7_HWCR, value);
+ }
+#endif
/*
* FIXME: We should handle the K5 here. Set up the write
@@ -165,8 +181,13 @@
set_bit(X86_FEATURE_K6_MTRR,
c->x86_capability);
break;
}
+
+ if (c->x86_model == 10) {
+ /* AMD Geode LX is model 10 */
+ /* placeholder for any needed mods */
+ break;
+ }
break;
-
case 6: /* An Athlon/Duron */
/* Bit 15 of Athlon specific MSR 15, needs to be 0
@@ -225,9 +246,15 @@
display_cacheinfo(c);
if (cpuid_eax(0x80000000) >= 0x80000008) {
- c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
- if (c->x86_num_cores & (c->x86_num_cores - 1))
- c->x86_num_cores = 1;
+ c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+ if (c->x86_max_cores & (c->x86_max_cores - 1))
+ c->x86_max_cores = 1;
+ }
+
+ if (cpuid_eax(0x80000000) >= 0x80000007) {
+ c->x86_power = cpuid_edx(0x80000007);
+ if (c->x86_power & (1<<8))
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
}
#ifdef CONFIG_X86_HT
@@ -236,15 +263,15 @@
* distingush the cores. Assumes number of cores is a power
* of two.
*/
- if (c->x86_num_cores > 1) {
+ if (c->x86_max_cores > 1) {
int cpu = smp_processor_id();
unsigned bits = 0;
- while ((1 << bits) < c->x86_num_cores)
+ while ((1 << bits) < c->x86_max_cores)
bits++;
cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
phys_proc_id[cpu] >>= bits;
printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
- cpu, c->x86_num_cores, cpu_core_id[cpu]);
+ cpu, c->x86_max_cores, cpu_core_id[cpu]);
}
#endif
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/common.c Tue Feb 14 15:23:43 2006
@@ -17,13 +17,11 @@
#define tsc_disable 0
#define disable_pse 0
-static int cachesize_override __initdata = -1;
-static int disable_x86_fxsr __initdata = 0;
-static int disable_x86_serial_nr __initdata = 1;
+static int cachesize_override __devinitdata = -1;
+static int disable_x86_fxsr __devinitdata = 0;
+static int disable_x86_serial_nr __devinitdata = 0;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
-
-extern void mcheck_init(struct cpuinfo_x86 *c);
static void default_init(struct cpuinfo_x86 * c)
{
@@ -43,7 +41,9 @@
};
static struct cpu_dev * this_cpu = &default_cpu;
-int __init get_model_name(struct cpuinfo_x86 *c)
+integer_param("cachesize", cachesize_override);
+
+int __devinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
@@ -73,7 +73,7 @@
}
-void __init display_cacheinfo(struct cpuinfo_x86 *c)
+void __devinit display_cacheinfo(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ecx, edx, l2size;
@@ -114,7 +114,7 @@
/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
*/
/* Look up CPU names by table lookup. */
-static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+static char __devinit *table_lookup_model(struct cpuinfo_x86 *c)
{
struct cpu_model_info *info;
@@ -135,7 +135,7 @@
}
-void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
{
char *v = c->x86_vendor_id;
int i;
@@ -155,12 +155,7 @@
}
-static int __init x86_fxsr_setup(char * s)
-{
- disable_x86_fxsr = 1;
- return 1;
-}
-__setup("nofxsr", x86_fxsr_setup);
+boolean_param("nofxsr", disable_x86_fxsr);
/* Standard macro to see if a specific flag is changeable */
@@ -186,14 +181,17 @@
/* Probe for the CPUID instruction */
-static int __init have_cpuid_p(void)
+static int __devinit have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
/* Do minimum CPU detection early.
Fields really needed: vendor, cpuid_level, family, model, mask, cache
alignment.
- The others are not touched to avoid unwanted side effects. */
+ The others are not touched to avoid unwanted side effects.
+
+ WARNING: this function is only called on the BP. Don't add code here
+ that is supposed to run on all CPUs. */
static void __init early_cpu_detect(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -217,24 +215,18 @@
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
c->x86 = (tfms >> 8) & 15;
c->x86_model = (tfms >> 4) & 15;
- if (c->x86 == 0xf) {
+ if (c->x86 == 0xf)
c->x86 += (tfms >> 20) & 0xff;
+ if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
- }
c->x86_mask = tfms & 15;
if (cap0 & (1<<19))
c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
c->x86_capability[0] = cap0; /* Added for Xen bootstrap */
}
-
- early_intel_workaround(c);
-
-#ifdef CONFIG_X86_HT
- phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
-#endif
-}
-
-void __init generic_identify(struct cpuinfo_x86 * c)
+}
+
+void __devinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
int junk;
@@ -279,9 +271,15 @@
get_model_name(c); /* Default name */
}
}
-}
-
-static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+
+ early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+ phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
+}
+
+static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
/* Disable processor serial number */
@@ -297,19 +295,14 @@
}
}
-static int __init x86_serial_nr_setup(char *s)
-{
- disable_x86_serial_nr = 0;
- return 1;
-}
-__setup("serialnumber", x86_serial_nr_setup);
+boolean_param("noserialnumber", disable_x86_serial_nr);
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-void __init identify_cpu(struct cpuinfo_x86 *c)
+void __devinit identify_cpu(struct cpuinfo_x86 *c)
{
int i;
@@ -319,7 +312,7 @@
c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_num_cores = 1;
+ c->x86_max_cores = 1;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (!have_cpuid_p()) {
@@ -342,6 +335,7 @@
if (this_cpu->c_identify) {
this_cpu->c_identify(c);
+
#ifdef NOISY_CAPS
printk(KERN_DEBUG "CPU: After vendor identify, caps:");
for (i = 0; i < NCAPINTS; i++)
@@ -397,12 +391,14 @@
}
/* Now the feature flags better reflect actual CPU features! */
+
#ifdef NOISY_CAPS
printk(KERN_DEBUG "CPU: After all inits, caps:");
for (i = 0; i < NCAPINTS; i++)
printk(" %08x", c->x86_capability[i]);
printk("\n");
#endif
+
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
@@ -416,68 +412,69 @@
}
/* Init Machine Check Exception if available. */
-#ifdef CONFIG_X86_MCE
mcheck_init(c);
-#endif
+
+#if 0
+ if (c == &boot_cpu_data)
+ sysenter_setup();
+ enable_sep_cpu();
+#endif
+
+ if (c == &boot_cpu_data)
+ mtrr_bp_init();
+ else
+ mtrr_ap_init();
}
#ifdef CONFIG_X86_HT
-void __init detect_ht(struct cpuinfo_x86 *c)
+void __devinit detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
- int index_msb, tmp;
+ int index_msb, core_bits;
int cpu = smp_processor_id();
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+
+ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
- cpuid(1, &eax, &ebx, &ecx, &edx);
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
} else if (smp_num_siblings > 1 ) {
- index_msb = 31;
if (smp_num_siblings > NR_CPUS) {
printk(KERN_WARNING "CPU: Unsupported number of the
siblings %d", smp_num_siblings);
smp_num_siblings = 1;
return;
}
- tmp = smp_num_siblings;
- while ((tmp & 0x80000000 ) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
+
+ index_msb = get_count_order(smp_num_siblings);
phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
phys_proc_id[cpu]);
- smp_num_siblings = smp_num_siblings / c->x86_num_cores;
-
- tmp = smp_num_siblings;
- index_msb = 31;
- while ((tmp & 0x80000000) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
-
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
-
- cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
-
- if (c->x86_num_cores > 1)
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+ index_msb = get_count_order(smp_num_siblings) ;
+
+ core_bits = get_count_order(c->x86_max_cores);
+
+ cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+ ((1 << core_bits) - 1);
+
+ if (c->x86_max_cores > 1)
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
cpu_core_id[cpu]);
}
}
#endif
-void __init print_cpu_info(struct cpuinfo_x86 *c)
+void __devinit print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
@@ -500,7 +497,7 @@
printk("\n");
}
-cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
+cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE;
/* This is hacky. :)
* We're emulating future behavior.
@@ -537,7 +534,7 @@
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
*/
-void __init cpu_init (void)
+void __devinit cpu_init(void)
{
int cpu = smp_processor_id();
struct tss_struct *t = &init_tss[cpu];
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/intel.c Tue Feb 14 15:23:43 2006
@@ -22,10 +22,10 @@
/*
* Alignment at which movsl is preferred for bulk memory copies.
*/
-struct movsl_mask movsl_mask;
+struct movsl_mask movsl_mask __read_mostly;
#endif
-void __init early_intel_workaround(struct cpuinfo_x86 *c)
+void __devinit early_intel_workaround(struct cpuinfo_x86 *c)
{
if (c->x86_vendor != X86_VENDOR_INTEL)
return;
@@ -40,7 +40,7 @@
* This is called before we do cpu ident work
*/
-int __init ppro_with_ram_bug(void)
+int __devinit ppro_with_ram_bug(void)
{
/* Uses data from early_cpu_detect now */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -58,7 +58,7 @@
* P4 Xeon errata 037 workaround.
* Hardware prefetcher may cause stale data to be loaded into the cache.
*/
-static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c)
+static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@@ -77,25 +77,22 @@
/*
* find out the number of processor cores on the die
*/
-static int __init num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax;
+static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
+{
+ unsigned int eax, ebx, ecx, edx;
if (c->cpuid_level < 4)
return 1;
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (4), "c" (0)
- : "bx", "dx");
-
+ /* Intel has a non-standard dependency on %ecx for this CPUID level. */
+ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
return ((eax >> 26) + 1);
else
return 1;
}
-static void __init init_intel(struct cpuinfo_x86 *c)
+static void __devinit init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
char *p = NULL;
@@ -157,7 +154,7 @@
if ( p )
strcpy(c->x86_model_id, p);
- c->x86_num_cores = num_cpu_cores(c);
+ c->x86_max_cores = num_cpu_cores(c);
detect_ht(c);
@@ -182,10 +179,13 @@
}
#endif
- if (c->x86 == 15)
+ if (c->x86 == 15)
set_bit(X86_FEATURE_P4, c->x86_capability);
if (c->x86 == 6)
set_bit(X86_FEATURE_P3, c->x86_capability);
+ if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+ (c->x86 == 0x6 && c->x86_model >= 0x0e))
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
start_vmx();
}
@@ -203,7 +203,7 @@
return size;
}
-static struct cpu_dev intel_cpu_dev __initdata = {
+static struct cpu_dev intel_cpu_dev __devinitdata = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
.c_models = {
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/dom0_ops.c Tue Feb 14 15:23:43 2006
@@ -26,7 +26,7 @@
#include <public/sched_ctl.h>
#include <asm/mtrr.h>
-#include "mtrr/mtrr.h"
+#include "cpu/mtrr/mtrr.h"
#define TRC_DOM0OP_ENTER_BASE 0x00020000
#define TRC_DOM0OP_LEAVE_BASE 0x00030000
@@ -39,13 +39,13 @@
static void write_msr_for(void *unused)
{
if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
- (void)wrmsr_user(msr_addr, msr_lo, msr_hi);
+ (void)wrmsr_safe(msr_addr, msr_lo, msr_hi);
}
static void read_msr_for(void *unused)
{
if ( ((1 << smp_processor_id()) & msr_cpu_mask) )
- (void)rdmsr_user(msr_addr, msr_lo, msr_hi);
+ (void)rdmsr_safe(msr_addr, msr_lo, msr_hi);
}
long arch_do_dom0_op(struct dom0_op *op, struct dom0_op *u_dom0_op)
@@ -182,7 +182,7 @@
dom0_physinfo_t *pi = &op->u.physinfo;
pi->threads_per_core = smp_num_siblings;
- pi->cores_per_socket = boot_cpu_data.x86_num_cores;
+ pi->cores_per_socket = boot_cpu_data.x86_max_cores;
pi->sockets_per_node =
num_online_cpus() / (pi->threads_per_core * pi->cores_per_socket);
pi->nr_nodes = 1;
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/hvm/svm/svm.c Tue Feb 14 15:23:43 2006
@@ -69,8 +69,6 @@
extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
int inst_len);
extern asmlinkage void do_IRQ(struct cpu_user_regs *);
-extern void smp_apic_timer_interrupt(struct cpu_user_regs *);
-extern void timer_interrupt(int, void *, struct cpu_user_regs *);
extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
unsigned long count, int size, long value, int dir, int pvalid);
extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
@@ -1761,7 +1759,7 @@
default:
if (long_mode_do_msr_read(regs))
goto done;
- rdmsr_user(regs->ecx, regs->eax, regs->edx);
+ rdmsr_safe(regs->ecx, regs->eax, regs->edx);
break;
}
}
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Feb 14 15:23:43 2006
@@ -1568,7 +1568,7 @@
default:
if(long_mode_do_msr_read(regs))
return;
- rdmsr_user(regs->ecx, regs->eax, regs->edx);
+ rdmsr_safe(regs->ecx, regs->eax, regs->edx);
break;
}
@@ -1658,13 +1658,12 @@
int error;
asmlinkage void do_IRQ(struct cpu_user_regs *);
- void smp_apic_timer_interrupt(struct cpu_user_regs *);
- void timer_interrupt(int, void *, struct cpu_user_regs *);
- void smp_event_check_interrupt(void);
- void smp_invalidate_interrupt(void);
- void smp_call_function_interrupt(void);
- void smp_spurious_interrupt(struct cpu_user_regs *regs);
- void smp_error_interrupt(struct cpu_user_regs *regs);
+ fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *);
+ fastcall void smp_event_check_interrupt(void);
+ fastcall void smp_invalidate_interrupt(void);
+ fastcall void smp_call_function_interrupt(void);
+ fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
+ fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
&& !(vector & INTR_INFO_VALID_MASK))
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/i8259.c Tue Feb 14 15:23:43 2006
@@ -68,9 +68,10 @@
* overflow. Linux uses the local APIC timer interrupt to get
* a much simpler SMP time architecture:
*/
-BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+BUILD_SMP_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
#define IRQ(x,y) \
IRQ##x##y##_interrupt
@@ -391,6 +392,7 @@
/* IPI vectors for APIC spurious and error interrupts. */
set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+ set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
/* Set the clock to HZ Hz */
#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/setup.c Tue Feb 14 15:23:43 2006
@@ -440,7 +440,7 @@
{
max_cpus = 0;
smp_num_siblings = 1;
- boot_cpu_data.x86_num_cores = 1;
+ boot_cpu_data.x86_max_cores = 1;
}
smp_prepare_cpus(max_cpus);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/smp.c Tue Feb 14 15:23:43 2006
@@ -188,7 +188,7 @@
static cpumask_t flush_cpumask;
static unsigned long flush_va;
-asmlinkage void smp_invalidate_interrupt(void)
+fastcall void smp_invalidate_interrupt(void)
{
ack_APIC_irq();
perfc_incrc(ipis);
@@ -339,13 +339,13 @@
local_irq_enable();
}
-asmlinkage void smp_event_check_interrupt(void)
+fastcall void smp_event_check_interrupt(struct cpu_user_regs *regs)
{
ack_APIC_irq();
perfc_incrc(ipis);
}
-asmlinkage void smp_call_function_interrupt(void)
+fastcall void smp_call_function_interrupt(struct cpu_user_regs *regs)
{
void (*func)(void *info) = call_data->func;
void *info = call_data->info;
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/smpboot.c Tue Feb 14 15:23:43 2006
@@ -53,8 +53,6 @@
static int _foo;
#define set_kernel_exec(x,y) (_foo=0)
#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
-int tainted;
-#define TAINT_UNSAFE_SMP 0
/* Set if we find a B stepping CPU */
static int __initdata smp_b_stepping;
@@ -1110,7 +1108,7 @@
smp_num_siblings = siblings;
}
- if (c->x86_num_cores > 1) {
+ if (c->x86_max_cores > 1) {
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_isset(i, cpu_callout_map))
continue;
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/traps.c Tue Feb 14 15:23:43 2006
@@ -981,26 +981,26 @@
{
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
- if ( wrmsr_user(MSR_FS_BASE, regs->eax, regs->edx) )
+ if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
goto fail;
v->arch.guest_context.fs_base =
((u64)regs->edx << 32) | regs->eax;
break;
case MSR_GS_BASE:
- if ( wrmsr_user(MSR_GS_BASE, regs->eax, regs->edx) )
+ if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
goto fail;
v->arch.guest_context.gs_base_kernel =
((u64)regs->edx << 32) | regs->eax;
break;
case MSR_SHADOW_GS_BASE:
- if ( wrmsr_user(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
+ if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
goto fail;
v->arch.guest_context.gs_base_user =
((u64)regs->edx << 32) | regs->eax;
break;
#endif
default:
- if ( (rdmsr_user(regs->ecx, l, h) != 0) ||
+ if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
(regs->ecx != MSR_EFER) ||
(regs->eax != l) || (regs->edx != h) )
DPRINTK("Domain attempted WRMSR %p from "
@@ -1028,13 +1028,13 @@
break;
#endif
case MSR_EFER:
- if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
+ if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
goto fail;
break;
default:
DPRINTK("Domain attempted RDMSR %p.\n", _p(regs->ecx));
/* Everyone can read the MSR space. */
- if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) )
+ if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
goto fail;
break;
}
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/x86_32/traps.c Tue Feb 14 15:23:43 2006
@@ -1,5 +1,6 @@
#include <xen/config.h>
+#include <xen/compile.h>
#include <xen/domain_page.h>
#include <xen/init.h>
#include <xen/sched.h>
@@ -20,6 +21,7 @@
{
struct cpu_user_regs fault_regs = *regs;
unsigned long fault_crs[8];
+ char taint_str[TAINT_STRING_MAX_LEN];
const char *context;
if ( HVM_DOMAIN(current) && GUEST_MODE(regs) )
@@ -49,6 +51,9 @@
fault_crs[3] = read_cr3();
}
+ printk("----[ Xen-%d.%d%s %s ]----\n",
+ XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION,
+ print_tainted(taint_str));
printk("CPU: %d\nEIP: %04x:[<%08x>]",
smp_processor_id(), fault_regs.cs, fault_regs.eip);
if ( !GUEST_MODE(regs) )
@@ -201,11 +206,11 @@
}
BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
-asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs)
+fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
{
asmlinkage void do_nmi(struct cpu_user_regs *);
ack_APIC_irq();
- do_nmi(®s);
+ do_nmi(regs);
}
void __init percpu_traps_init(void)
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/x86_64/mm.c Tue Feb 14 15:23:43 2006
@@ -243,21 +243,21 @@
switch ( which )
{
case SEGBASE_FS:
- if ( wrmsr_user(MSR_FS_BASE, base, base>>32) )
+ if ( wrmsr_safe(MSR_FS_BASE, base, base>>32) )
ret = -EFAULT;
else
v->arch.guest_context.fs_base = base;
break;
case SEGBASE_GS_USER:
- if ( wrmsr_user(MSR_SHADOW_GS_BASE, base, base>>32) )
+ if ( wrmsr_safe(MSR_SHADOW_GS_BASE, base, base>>32) )
ret = -EFAULT;
else
v->arch.guest_context.gs_base_user = base;
break;
case SEGBASE_GS_KERNEL:
- if ( wrmsr_user(MSR_GS_BASE, base, base>>32) )
+ if ( wrmsr_safe(MSR_GS_BASE, base, base>>32) )
ret = -EFAULT;
else
v->arch.guest_context.gs_base_kernel = base;
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/x86_64/traps.c Tue Feb 14 15:23:43 2006
@@ -1,5 +1,6 @@
#include <xen/config.h>
+#include <xen/compile.h>
#include <xen/init.h>
#include <xen/sched.h>
#include <xen/lib.h>
@@ -20,6 +21,7 @@
{
struct cpu_user_regs fault_regs = *regs;
unsigned long fault_crs[8];
+ char taint_str[TAINT_STRING_MAX_LEN];
const char *context;
if ( HVM_DOMAIN(current) && GUEST_MODE(regs) )
@@ -35,6 +37,9 @@
fault_crs[3] = read_cr3();
}
+ printk("----[ Xen-%d.%d%s %s ]----\n",
+ XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION,
+ print_tainted(taint_str));
printk("CPU: %d\nRIP: %04x:[<%016lx>]",
smp_processor_id(), fault_regs.cs, fault_regs.rip);
if ( !GUEST_MODE(regs) )
diff -r fcc833cbaf82 -r b9b411b50587 xen/common/kernel.c
--- a/xen/common/kernel.c Mon Feb 13 16:41:23 2006
+++ b/xen/common/kernel.c Tue Feb 14 15:23:43 2006
@@ -14,6 +14,8 @@
#include <public/nmi.h>
#include <public/version.h>
+int tainted;
+
void cmdline_parse(char *cmdline)
{
char opt[100], *optval, *p = cmdline, *q;
@@ -76,6 +78,37 @@
}
}
}
+}
+
+/**
+ * print_tainted - return a string to represent the kernel taint state.
+ *
+ * 'S' - SMP with CPUs not designed for SMP.
+ * 'M' - Machine had a machine check experience.
+ * 'B' - System has hit bad_page.
+ *
+ * The string is overwritten by the next call to print_taint().
+ */
+char *print_tainted(char *str)
+{
+ if ( tainted )
+ {
+ snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c",
+ tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
+ tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
+ tainted & TAINT_BAD_PAGE ? 'B' : ' ');
+ }
+ else
+ {
+ snprintf(str, TAINT_STRING_MAX_LEN, "Not tainted");
+ }
+
+ return str;
+}
+
+void add_taint(unsigned flag)
+{
+ tainted |= flag;
}
/*
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/asm-x86/bitops.h
--- a/xen/include/asm-x86/bitops.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/asm-x86/bitops.h Tue Feb 14 15:23:43 2006
@@ -322,6 +322,58 @@
}
/**
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
+{
+ __asm__("bsf %1,%0"
+ :"=r" (word)
+ :"r" (~word));
+ return word;
+}
+
+#define fls64(x) generic_fls64(x)
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+ int r;
+
+ __asm__("bsfl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n"
+ "1:" : "=r" (r) : "rm" (x));
+ return r+1;
+}
+
+/**
+ * fls - find last bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ */
+static inline int fls(int x)
+{
+ int r;
+
+ __asm__("bsrl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n"
+ "1:" : "=r" (r) : "rm" (x));
+ return r+1;
+}
+
+/**
* hweightN - returns the hamming weight of a N-bit word
* @x: the word to weigh
*
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/asm-x86/config.h Tue Feb 14 15:23:43 2006
@@ -23,6 +23,7 @@
#define CONFIG_X86_GOOD_APIC 1
#define CONFIG_X86_IO_APIC 1
#define CONFIG_HPET_TIMER 1
+#define CONFIG_X86_MCE_P4THERMAL 1
/* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
#define CONFIG_X86_L1_CACHE_SHIFT 7
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/asm-x86/cpufeature.h Tue Feb 14 15:23:43 2006
@@ -69,6 +69,7 @@
#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */
#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
+#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -88,9 +89,9 @@
#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM (5*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY (5*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVME (5*32+ 2) /* Secure Virtual Machine */
+#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVME (6*32+ 2) /* Secure Virtual Machine */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/asm-x86/msr.h
--- a/xen/include/asm-x86/msr.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/asm-x86/msr.h Tue Feb 14 15:23:43 2006
@@ -28,34 +28,36 @@
wrmsr(msr, lo, hi);
}
-#define rdmsr_user(msr,val1,val2) ({\
+/* rdmsr with exception handling */
+#define rdmsr_safe(msr,val1,val2) ({\
int _rc; \
__asm__ __volatile__( \
"1: rdmsr\n2:\n" \
".section .fixup,\"ax\"\n" \
- "3: movl $1,%2\n; jmp 2b\n" \
+ "3: movl %5,%2\n; jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" "__FIXUP_ALIGN"\n" \
" "__FIXUP_WORD" 1b,3b\n" \
".previous\n" \
: "=a" (val1), "=d" (val2), "=&r" (_rc) \
- : "c" (msr), "2" (0)); \
+ : "c" (msr), "2" (0), "i" (-EFAULT)); \
_rc; })
-#define wrmsr_user(msr,val1,val2) ({\
+/* wrmsr with exception handling */
+#define wrmsr_safe(msr,val1,val2) ({\
int _rc; \
__asm__ __volatile__( \
"1: wrmsr\n2:\n" \
".section .fixup,\"ax\"\n" \
- "3: movl $1,%0\n; jmp 2b\n" \
+ "3: movl %5,%0\n; jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" "__FIXUP_ALIGN"\n" \
" "__FIXUP_WORD" 1b,3b\n" \
".previous\n" \
: "=&r" (_rc) \
- : "c" (msr), "a" (val1), "d" (val2), "0" (0)); \
+ : "c" (msr), "a" (val1), "d" (val2), "0" (0), "i" (-EFAULT)); \
_rc; })
#define rdtsc(low,high) \
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/asm-x86/processor.h Tue Feb 14 15:23:43 2006
@@ -147,24 +147,28 @@
#endif
struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
- __u8 x86_mask;
- char wp_works_ok; /* It doesn't on 386's */
- char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
- char hard_math;
- char rfu;
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+ char wp_works_ok; /* It doesn't on 386's */
+ char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
+ char hard_math;
+ char rfu;
int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
- unsigned int x86_capability[NCAPINTS];
- char x86_vendor_id[16];
- char x86_model_id[64];
- int x86_cache_size; /* in KB - valid for CPUS which support this call
*/
- int x86_cache_alignment; /* In bytes */
- int fdiv_bug;
- int f00f_bug;
- int coma_bug;
- unsigned char x86_num_cores;
+ unsigned int x86_capability[NCAPINTS];
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+ int x86_cache_size; /* in KB - valid for CPUS which support this call */
+ int x86_cache_alignment; /* In bytes */
+ char fdiv_bug;
+ char f00f_bug;
+ char coma_bug;
+ char pad0;
+ int x86_power;
+ unsigned char x86_max_cores; /* cpuid returned max cores value */
+ unsigned char booted_cores; /* number of cores as seen by OS */
+ unsigned char apicid;
} __cacheline_aligned;
/*
@@ -207,6 +211,18 @@
"=c" (*(int *)(_ecx)), \
"=d" (*(int *)(_edx)) \
: "0" (_op), "2" (0))
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+ int *edx)
+{
+ __asm__("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (op), "c" (count));
+}
/*
* CPUID functions returning a single datum
@@ -502,6 +518,11 @@
void show_page_walk(unsigned long addr);
asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
+extern void mtrr_ap_init(void);
+extern void mtrr_bp_init(void);
+
+extern void mcheck_init(struct cpuinfo_x86 *c);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/asm-x86/x86_32/asm_defns.h Tue Feb 14 15:23:43 2006
@@ -61,22 +61,13 @@
STR(x) ":\n\t" \
"pushl $"#v"<<16\n\t" \
STR(SAVE_ALL(a)) \
- "call "STR(smp_##x)"\n\t" \
- "jmp ret_from_intr\n");
-
-#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
-#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
-asmlinkage void x(struct cpu_user_regs * regs); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-STR(x) ":\n\t" \
- "pushl $"#v"<<16\n\t" \
- STR(SAVE_ALL(a)) \
"movl %esp,%eax\n\t" \
"pushl %eax\n\t" \
"call "STR(smp_##x)"\n\t" \
"addl $4,%esp\n\t" \
"jmp ret_from_intr\n");
+
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) BUILD_SMP_INTERRUPT(x,v)
#define BUILD_COMMON_IRQ() \
__asm__( \
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/asm-x86/x86_64/asm_defns.h
--- a/xen/include/asm-x86/x86_64/asm_defns.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/asm-x86/x86_64/asm_defns.h Tue Feb 14 15:23:43 2006
@@ -69,21 +69,11 @@
"pushq $0\n\t" \
"movl $"#v",4(%rsp)\n\t" \
STR(SAVE_ALL) \
+ "movq %rsp,%rdi\n\t" \
"callq "STR(smp_##x)"\n\t" \
"jmp ret_from_intr\n");
-#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
-#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
-asmlinkage void x(struct cpu_user_regs * regs); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-STR(x) ":\n\t" \
- "pushq $0\n\t" \
- "movl $"#v",4(%rsp)\n\t" \
- STR(SAVE_ALL) \
- "movq %rsp,%rdi\n\t" \
- "callq "STR(smp_##x)"\n\t" \
- "jmp ret_from_intr\n");
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) BUILD_SMP_INTERRUPT(x,v)
#define BUILD_COMMON_IRQ() \
__asm__( \
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/xen/bitops.h
--- a/xen/include/xen/bitops.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/xen/bitops.h Tue Feb 14 15:23:43 2006
@@ -76,6 +76,33 @@
*/
#include <asm/bitops.h>
+
+static inline int generic_fls64(__u64 x)
+{
+ __u32 h = x >> 32;
+ if (h)
+ return fls(x) + 32;
+ return fls(x);
+}
+
+static __inline__ int get_bitmask_order(unsigned int count)
+{
+ int order;
+
+ order = fls(count);
+ return order; /* We could be slightly more clever with -1 here... */
+}
+
+static __inline__ int get_count_order(unsigned int count)
+{
+ int order;
+
+ order = fls(count) - 1;
+ if (count & (count - 1))
+ order++;
+ return order;
+}
+
/*
* hweightN: returns the hamming weight (i.e. the number
* of bits set) of a N-bit word
@@ -126,4 +153,26 @@
return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w);
}
+/*
+ * rol32 - rotate a 32-bit value left
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> (32 - shift));
+}
+
+/*
+ * ror32 - rotate a 32-bit value right
+ *
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 ror32(__u32 word, unsigned int shift)
+{
+ return (word >> shift) | (word << (32 - shift));
+}
+
#endif
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/xen/init.h
--- a/xen/include/xen/init.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/xen/init.h Tue Feb 14 15:23:43 2006
@@ -100,4 +100,6 @@
#define __devexitdata __exitdata
#endif
+#define fastcall
+
#endif /* _LINUX_INIT_H */
diff -r fcc833cbaf82 -r b9b411b50587 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h Mon Feb 13 16:41:23 2006
+++ b/xen/include/xen/lib.h Tue Feb 14 15:23:43 2006
@@ -75,4 +75,12 @@
unsigned long long parse_size_and_unit(char *s);
+#define TAINT_UNSAFE_SMP (1<<0)
+#define TAINT_MACHINE_CHECK (1<<1)
+#define TAINT_BAD_PAGE (1<<2)
+extern int tainted;
+#define TAINT_STRING_MAX_LEN 20
+extern char *print_tainted(char *str);
+extern void add_taint(unsigned);
+
#endif /* __LIB_H__ */
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/k7.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/k7.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,95 @@
+/*
+ * Athlon/Hammer specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@xxxxxxxxxxxxxxxxx>
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/config.h>
+#include <xen/smp.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine Check Handler For AMD Athlon/Duron */
+static fastcall void k7_machine_check(struct cpu_user_regs * regs, long
error_code)
+{
+ int recover=1;
+ u32 alow, ahigh, high, low;
+ u32 mcgstl, mcgsth;
+ int i;
+
+ rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+ if (mcgstl & (1<<0)) /* Recoverable ? */
+ recover=0;
+
+ printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+ smp_processor_id(), mcgsth, mcgstl);
+
+ for (i=1; i<nr_mce_banks; i++) {
+ rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+ if (high&(1<<31)) {
+ if (high & (1<<29))
+ recover |= 1;
+ if (high & (1<<25))
+ recover |= 2;
+ printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+ high &= ~(1<<31);
+ if (high & (1<<27)) {
+ rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+ printk ("[%08x%08x]", ahigh, alow);
+ }
+ if (high & (1<<26)) {
+ rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+ printk (" at %08x%08x", ahigh, alow);
+ }
+ printk ("\n");
+ /* Clear it */
+ wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+ /* Serialize */
+ wmb();
+ add_taint(TAINT_MACHINE_CHECK);
+ }
+ }
+
+ if (recover&2)
+ panic ("CPU context corrupt");
+ if (recover&1)
+ panic ("Unable to continue");
+ printk (KERN_EMERG "Attempting to continue.\n");
+ mcgstl &= ~(1<<2);
+ wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+
+/* AMD K7 machine check is Intel like */
+void amd_mcheck_init(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ int i;
+
+ machine_check_vector = k7_machine_check;
+ wmb();
+
+ printk (KERN_INFO "Intel machine check architecture supported.\n");
+ rdmsr (MSR_IA32_MCG_CAP, l, h);
+ if (l & (1<<8)) /* Control register present ? */
+ wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+ nr_mce_banks = l & 0xff;
+
+ /* Clear status for MC index 0 separately, we don't touch CTL,
+ * as some Athlons cause spurious MCEs when its enabled. */
+ wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+ for (i=1; i<nr_mce_banks; i++) {
+ wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+ wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+ }
+
+ set_in_cr4 (X86_CR4_MCE);
+ printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+ smp_processor_id());
+}
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/mce.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/mce.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,73 @@
+/*
+ * mce.c - x86 Machine Check Exception Reporting
+ * (c) 2002 Alan Cox <alan@xxxxxxxxxx>, Dave Jones <davej@xxxxxxxxxxxxxxxxx>
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/config.h>
+#include <xen/smp.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+
+#include "mce.h"
+
+int mce_disabled = 0;
+int nr_mce_banks;
+
+/* Handle unconfigured int18 (should never happen) */
+static fastcall void unexpected_machine_check(struct cpu_user_regs * regs,
long error_code)
+{
+ printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void fastcall (*machine_check_vector)(struct cpu_user_regs *, long error_code)
= unexpected_machine_check;
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+ if (mce_disabled==1)
+ return;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (c->x86==6 || c->x86==15)
+ amd_mcheck_init(c);
+ break;
+
+ case X86_VENDOR_INTEL:
+ if (c->x86==5)
+ intel_p5_mcheck_init(c);
+ if (c->x86==6)
+ intel_p6_mcheck_init(c);
+ if (c->x86==15)
+ intel_p4_mcheck_init(c);
+ break;
+
+ case X86_VENDOR_CENTAUR:
+ if (c->x86==5)
+ winchip_mcheck_init(c);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int __init mcheck_disable(char *str)
+{
+ mce_disabled = 1;
+ return 0;
+}
+
+static int __init mcheck_enable(char *str)
+{
+ mce_disabled = -1;
+ return 0;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/mce.h
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Feb 14 15:23:43 2006
@@ -0,0 +1,14 @@
+#include <xen/init.h>
+
+void amd_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void winchip_mcheck_init(struct cpuinfo_x86 *c);
+
+/* Call the installed machine check handler for this CPU setup. */
+extern fastcall void (*machine_check_vector)(struct cpu_user_regs *, long
error_code);
+
+extern int mce_disabled __initdata;
+extern int nr_mce_banks;
+
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/non-fatal.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,86 @@
+/*
+ * Non Fatal Machine Check Exception Reporting
+ *
+ * (C) Copyright 2002 Dave Jones. <davej@xxxxxxxxxxxxxxxxx>
+ *
+ * This file contains routines to check for non-fatal MCEs every 15s
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <xen/timer.h>
+#include <xen/errno.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+static int firstbank;
+static struct timer mce_timer;
+
+#define MCE_PERIOD MILLISECS(15000)
+
+static void mce_checkregs (void *info)
+{
+ u32 low, high;
+ int i;
+
+ for (i=firstbank; i<nr_mce_banks; i++) {
+ rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
+
+ if (high & (1<<31)) {
+ printk(KERN_INFO "MCE: The hardware reports a non "
+ "fatal, correctable incident occurred on "
+ "CPU %d.\n",
+ smp_processor_id());
+ printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+
+ /* Scrub the error so we don't pick it up in MCE_RATE
seconds time. */
+ wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+ /* Serialize */
+ wmb();
+ add_taint(TAINT_MACHINE_CHECK);
+ }
+ }
+}
+
+static void mce_work_fn(void *data)
+{
+ on_each_cpu(mce_checkregs, NULL, 1, 1);
+ set_timer(&mce_timer, NOW() + MCE_PERIOD);
+}
+
+static int __init init_nonfatal_mce_checker(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ /* Check for MCE support */
+ if (!cpu_has(c, X86_FEATURE_MCE))
+ return -ENODEV;
+
+ /* Check for PPro style MCA */
+ if (!cpu_has(c, X86_FEATURE_MCA))
+ return -ENODEV;
+
+ /* Some Athlons misbehave when we frob bank 0 */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 == 6)
+ firstbank = 1;
+ else
+ firstbank = 0;
+
+ /*
+ * Check for non-fatal errors every MCE_RATE s
+ */
+ init_timer(&mce_timer, mce_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + MCE_PERIOD);
+ printk(KERN_INFO "Machine check exception polling timer started.\n");
+ return 0;
+}
+__initcall(init_nonfatal_mce_checker);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/p4.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/p4.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,270 @@
+/*
+ * P4 specific Machine Check Exception Reporting
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/config.h>
+#include <xen/smp.h>
+#include <xen/irq.h>
+#include <xen/time.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+
+#include "mce.h"
+
+/* as supported by the P4/Xeon family */
+struct intel_mce_extended_msrs {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ u32 esi;
+ u32 edi;
+ u32 ebp;
+ u32 esp;
+ u32 eflags;
+ u32 eip;
+ /* u32 *reserved[]; */
+};
+
+static int mce_num_extended_msrs = 0;
+
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
+{
+ printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
+ smp_processor_id());
+ add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct cpu_user_regs *regs)
+{
+ u32 l, h;
+ unsigned int cpu = smp_processor_id();
+ static s_time_t next[NR_CPUS];
+
+ ack_APIC_irq();
+
+ if (NOW() > next[cpu])
+ return;
+
+ next[cpu] = NOW() + MILLISECS(5000);
+ rdmsr(MSR_IA32_THERM_STATUS, l, h);
+ if (l & 0x1) {
+ printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
+ printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
+ cpu);
+ add_taint(TAINT_MACHINE_CHECK);
+ } else {
+ printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+ }
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) =
unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
+{
+ irq_enter(smp_processor_id());
+ vendor_thermal_interrupt(regs);
+ irq_exit(smp_processor_id());
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ unsigned int cpu = smp_processor_id();
+
+ /* Thermal monitoring */
+ if (!cpu_has(c, X86_FEATURE_ACPI))
+ return; /* -ENODEV */
+
+ /* Clock modulation */
+ if (!cpu_has(c, X86_FEATURE_ACC))
+ return; /* -ENODEV */
+
+ /* first check if its enabled already, in which case there might
+ * be some SMM goo which handles it, so we can't even put a handler
+ * since it might be delivered via SMI already -zwanem.
+ */
+ rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+ h = apic_read(APIC_LVTTHMR);
+ if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+ printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
+ cpu);
+ return; /* -EBUSY */
+ }
+
+ /* check whether a vector already exists, temporarily masked? */
+ if (h & APIC_VECTOR_MASK) {
+ printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
+ "installed\n",
+ cpu, (h & APIC_VECTOR_MASK));
+ return; /* -EBUSY */
+ }
+
+ /* The temperature transition interrupt handler setup */
+ h = THERMAL_APIC_VECTOR; /* our delivery vector */
+ h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready
*/
+ apic_write_around(APIC_LVTTHMR, h);
+
+ rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+ wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+ /* ok we're good to go... */
+ vendor_thermal_interrupt = intel_thermal_interrupt;
+
+ rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+ wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+ l = apic_read (APIC_LVTTHMR);
+ apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+ printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+ return;
+}
+#endif /* CONFIG_X86_MCE_P4THERMAL */
+
+
+/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
+static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+{
+ u32 h;
+
+ if (mce_num_extended_msrs == 0)
+ goto done;
+
+ rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
+ rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
+ rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
+ rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
+ rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
+ rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
+ rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
+ rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
+ rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
+ rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
+
+ /* can we rely on kmalloc to do a dynamic
+ * allocation for the reserved registers?
+ */
+done:
+ return mce_num_extended_msrs;
+}
+
+static fastcall void intel_machine_check(struct cpu_user_regs * regs, long
error_code)
+{
+ int recover=1;
+ u32 alow, ahigh, high, low;
+ u32 mcgstl, mcgsth;
+ int i;
+ struct intel_mce_extended_msrs dbg;
+
+ rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+ if (mcgstl & (1<<0)) /* Recoverable ? */
+ recover=0;
+
+ printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+ smp_processor_id(), mcgsth, mcgstl);
+
+ if (intel_get_extended_msrs(&dbg)) {
+ printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
+ smp_processor_id(), dbg.eip, dbg.eflags);
+ printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx:
%08x\n",
+ dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
+ printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp:
%08x\n",
+ dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
+ }
+
+ for (i=0; i<nr_mce_banks; i++) {
+ rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+ if (high & (1<<31)) {
+ if (high & (1<<29))
+ recover |= 1;
+ if (high & (1<<25))
+ recover |= 2;
+ printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+ high &= ~(1<<31);
+ if (high & (1<<27)) {
+ rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+ printk ("[%08x%08x]", ahigh, alow);
+ }
+ if (high & (1<<26)) {
+ rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+ printk (" at %08x%08x", ahigh, alow);
+ }
+ printk ("\n");
+ }
+ }
+
+ if (recover & 2)
+ panic ("CPU context corrupt");
+ if (recover & 1)
+ panic ("Unable to continue");
+
+ printk(KERN_EMERG "Attempting to continue.\n");
+ /*
+ * Do not clear the MSR_IA32_MCi_STATUS if the error is not
+ * recoverable/continuable.This will allow BIOS to look at the MSRs
+ * for errors if the OS could not log the error.
+ */
+ for (i=0; i<nr_mce_banks; i++) {
+ u32 msr;
+ msr = MSR_IA32_MC0_STATUS+i*4;
+ rdmsr (msr, low, high);
+ if (high&(1<<31)) {
+ /* Clear it */
+ wrmsr(msr, 0UL, 0UL);
+ /* Serialize */
+ wmb();
+ add_taint(TAINT_MACHINE_CHECK);
+ }
+ }
+ mcgstl &= ~(1<<2);
+ wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ int i;
+
+ machine_check_vector = intel_machine_check;
+ wmb();
+
+ printk (KERN_INFO "Intel machine check architecture supported.\n");
+ rdmsr (MSR_IA32_MCG_CAP, l, h);
+ if (l & (1<<8)) /* Control register present ? */
+ wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+ nr_mce_banks = l & 0xff;
+
+ for (i=0; i<nr_mce_banks; i++) {
+ wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+ wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+ }
+
+ set_in_cr4 (X86_CR4_MCE);
+ printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+ smp_processor_id());
+
+ /* Check for P4/Xeon extended MCE MSRs */
+ rdmsr (MSR_IA32_MCG_CAP, l, h);
+ if (l & (1<<9)) {/* MCG_EXT_P */
+ mce_num_extended_msrs = (l >> 16) & 0xff;
+ printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
+ " available\n",
+ smp_processor_id(), mce_num_extended_msrs);
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+ /* Check for P4/Xeon Thermal monitor */
+ intel_init_thermal(c);
+#endif
+ }
+}
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/p5.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/p5.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,52 @@
+/*
+ * P5 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@xxxxxxxxxx>
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine check handler for Pentium class Intel */
+static fastcall void pentium_machine_check(struct cpu_user_regs * regs, long
error_code)
+{
+ u32 loaddr, hi, lotype;
+ rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
+ rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
+ printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type
0x%8X).\n", smp_processor_id(), loaddr, lotype);
+ if(lotype&(1<<5))
+ printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on
fire ?).\n", smp_processor_id());
+ add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* Set up machine check reporting for processors with Intel style MCE */
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+
+ /*Check for MCE support */
+ if( !cpu_has(c, X86_FEATURE_MCE) )
+ return;
+
+ /* Default P5 to off as its often misconnected */
+ if(mce_disabled != -1)
+ return;
+ machine_check_vector = pentium_machine_check;
+ wmb();
+
+ /* Read registers before enabling */
+ rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
+ rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
+ printk(KERN_INFO "Intel old style machine check architecture
supported.\n");
+
+ /* Enable MCE */
+ set_in_cr4(X86_CR4_MCE);
+ printk(KERN_INFO "Intel old style machine check reporting enabled on
CPU#%d.\n", smp_processor_id());
+}
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/p6.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/p6.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,118 @@
+/*
+ * P6 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@xxxxxxxxxx>
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine Check Handler For PII/PIII */
+static fastcall void intel_machine_check(struct cpu_user_regs * regs, long
error_code)
+{
+ int recover=1;
+ u32 alow, ahigh, high, low;
+ u32 mcgstl, mcgsth;
+ int i;
+
+ rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+ if (mcgstl & (1<<0)) /* Recoverable ? */
+ recover=0;
+
+ printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+ smp_processor_id(), mcgsth, mcgstl);
+
+ for (i=0; i<nr_mce_banks; i++) {
+ rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+ if (high & (1<<31)) {
+ if (high & (1<<29))
+ recover |= 1;
+ if (high & (1<<25))
+ recover |= 2;
+ printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+ high &= ~(1<<31);
+ if (high & (1<<27)) {
+ rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+ printk ("[%08x%08x]", ahigh, alow);
+ }
+ if (high & (1<<26)) {
+ rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+ printk (" at %08x%08x", ahigh, alow);
+ }
+ printk ("\n");
+ }
+ }
+
+ if (recover & 2)
+ panic ("CPU context corrupt");
+ if (recover & 1)
+ panic ("Unable to continue");
+
+ printk (KERN_EMERG "Attempting to continue.\n");
+ /*
+ * Do not clear the MSR_IA32_MCi_STATUS if the error is not
+ * recoverable/continuable.This will allow BIOS to look at the MSRs
+ * for errors if the OS could not log the error.
+ */
+ for (i=0; i<nr_mce_banks; i++) {
+ unsigned int msr;
+ msr = MSR_IA32_MC0_STATUS+i*4;
+ rdmsr (msr,low, high);
+ if (high & (1<<31)) {
+ /* Clear it */
+ wrmsr (msr, 0UL, 0UL);
+ /* Serialize */
+ wmb();
+ add_taint(TAINT_MACHINE_CHECK);
+ }
+ }
+ mcgstl &= ~(1<<2);
+ wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+/* Set up machine check reporting for processors with Intel style MCE */
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ int i;
+
+ /* Check for MCE support */
+ if (!cpu_has(c, X86_FEATURE_MCE))
+ return;
+
+ /* Check for PPro style MCA */
+ if (!cpu_has(c, X86_FEATURE_MCA))
+ return;
+
+ /* Ok machine check is available */
+ machine_check_vector = intel_machine_check;
+ wmb();
+
+ printk (KERN_INFO "Intel machine check architecture supported.\n");
+ rdmsr (MSR_IA32_MCG_CAP, l, h);
+ if (l & (1<<8)) /* Control register present ? */
+ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+ nr_mce_banks = l & 0xff;
+
+ /*
+ * Following the example in IA-32 SDM Vol 3:
+ * - MC0_CTL should not be written
+ * - Status registers on all banks should be cleared on reset
+ */
+ for (i=1; i<nr_mce_banks; i++)
+ wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+
+ for (i=0; i<nr_mce_banks; i++)
+ wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+
+ set_in_cr4 (X86_CR4_MCE);
+ printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+ smp_processor_id());
+}
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mcheck/winchip.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mcheck/winchip.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,37 @@
+/*
+ * IDT Winchip specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@xxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+/* Machine check handler for WinChip C6 */
+static fastcall void winchip_machine_check(struct cpu_user_regs * regs, long
error_code)
+{
+ printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+ add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* Set up machine check reporting on the Winchip C6 series */
+void winchip_mcheck_init(struct cpuinfo_x86 *c)
+{
+ u32 lo, hi;
+ machine_check_vector = winchip_machine_check;
+ wmb();
+ rdmsr(MSR_IDT_FCR1, lo, hi);
+ lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */
+ lo&= ~(1<<4); /* Enable MCE */
+ wrmsr(MSR_IDT_FCR1, lo, hi);
+ set_in_cr4(X86_CR4_MCE);
+ printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+}
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mtrr/amd.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mtrr/amd.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,121 @@
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+
+#include "mtrr.h"
+
+static void
+amd_get_mtrr(unsigned int reg, unsigned long *base,
+ unsigned int *size, mtrr_type * type)
+{
+ unsigned long low, high;
+
+ rdmsr(MSR_K6_UWCCR, low, high);
+ /* Upper dword is region 1, lower is region 0 */
+ if (reg == 1)
+ low = high;
+ /* The base masks off on the right alignment */
+ *base = (low & 0xFFFE0000) >> PAGE_SHIFT;
+ *type = 0;
+ if (low & 1)
+ *type = MTRR_TYPE_UNCACHABLE;
+ if (low & 2)
+ *type = MTRR_TYPE_WRCOMB;
+ if (!(low & 3)) {
+ *size = 0;
+ return;
+ }
+ /*
+ * This needs a little explaining. The size is stored as an
+ * inverted mask of bits of 128K granularity 15 bits long offset
+ * 2 bits
+ *
+ * So to get a size we do invert the mask and add 1 to the lowest
+ * mask bit (4 as its 2 bits in). This gives us a size we then shift
+ * to turn into 128K blocks
+ *
+ * eg 111 1111 1111 1100 is 512K
+ *
+ * invert 000 0000 0000 0011
+ * +1 000 0000 0000 0100
+ * *128K ...
+ */
+ low = (~low) & 0x1FFFC;
+ *size = (low + 4) << (15 - PAGE_SHIFT);
+ return;
+}
+
+static void amd_set_mtrr(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+/* [SUMMARY] Set variable MTRR register on the local CPU.
+ <reg> The register to set.
+ <base> The base address of the region.
+ <size> The size of the region. If this is 0 the region is disabled.
+ <type> The type of the region.
+ <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+ be done externally.
+ [RETURNS] Nothing.
+*/
+{
+ u32 regs[2];
+
+ /*
+ * Low is MTRR0 , High MTRR 1
+ */
+ rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
+ /*
+ * Blank to disable
+ */
+ if (size == 0)
+ regs[reg] = 0;
+ else
+ /* Set the register to the base, the type (off by one) and an
+ inverted bitmask of the size The size is the only odd
+ bit. We are fed say 512K We invert this and we get 111 1111
+ 1111 1011 but if you subtract one and invert you get the
+ desired 111 1111 1111 1100 mask
+
+ But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */
+ regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
+ | (base << PAGE_SHIFT) | (type + 1);
+
+ /*
+ * The writeback rule is quite specific. See the manual. Its
+ * disable local interrupts, write back the cache, set the mtrr
+ */
+ wbinvd();
+ wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
+}
+
+static int amd_validate_add_page(unsigned long base, unsigned long size,
unsigned int type)
+{
+ /* Apply the K6 block alignment and size rules
+ In order
+ o Uncached or gathering only
+ o 128K or bigger block
+ o Power of 2 block
+ o base suitably aligned to the power
+ */
+ if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
+ || (size & ~(size - 1)) - size || (base & (size - 1)))
+ return -EINVAL;
+ return 0;
+}
+
+static struct mtrr_ops amd_mtrr_ops = {
+ .vendor = X86_VENDOR_AMD,
+ .set = amd_set_mtrr,
+ .get = amd_get_mtrr,
+ .get_free_region = generic_get_free_region,
+ .validate_add_page = amd_validate_add_page,
+ .have_wrcomb = positive_have_wrcomb,
+};
+
+int __init amd_init_mtrr(void)
+{
+ set_mtrr_ops(&amd_mtrr_ops);
+ return 0;
+}
+
+//arch_initcall(amd_mtrr_init);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mtrr/centaur.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mtrr/centaur.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,223 @@
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+static struct {
+ unsigned long high;
+ unsigned long low;
+} centaur_mcr[8];
+
+static u8 centaur_mcr_reserved;
+static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
+
+/*
+ * Report boot time MCR setups
+ */
+
+static int
+centaur_get_free_region(unsigned long base, unsigned long size)
+/* [SUMMARY] Get a free MTRR.
+ <base> The starting (base) address of the region.
+ <size> The size (in bytes) of the region.
+ [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+ int i, max;
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned int lsize;
+
+ max = num_var_ranges;
+ for (i = 0; i < max; ++i) {
+ if (centaur_mcr_reserved & (1 << i))
+ continue;
+ mtrr_if->get(i, &lbase, &lsize, <ype);
+ if (lsize == 0)
+ return i;
+ }
+ return -ENOSPC;
+}
+
+void
+mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+{
+ centaur_mcr[mcr].low = lo;
+ centaur_mcr[mcr].high = hi;
+}
+
+static void
+centaur_get_mcr(unsigned int reg, unsigned long *base,
+ unsigned int *size, mtrr_type * type)
+{
+ *base = centaur_mcr[reg].high >> PAGE_SHIFT;
+ *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
+ *type = MTRR_TYPE_WRCOMB; /* If it is there, it is
write-combining */
+ if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
+ *type = MTRR_TYPE_UNCACHABLE;
+ if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
+ *type = MTRR_TYPE_WRBACK;
+ if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
+ *type = MTRR_TYPE_WRBACK;
+
+}
+
+static void centaur_set_mcr(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+{
+ unsigned long low, high;
+
+ if (size == 0) {
+ /* Disable */
+ high = low = 0;
+ } else {
+ high = base << PAGE_SHIFT;
+ if (centaur_mcr_type == 0)
+ low = -size << PAGE_SHIFT | 0x1f; /* only support
write-combining... */
+ else {
+ if (type == MTRR_TYPE_UNCACHABLE)
+ low = -size << PAGE_SHIFT | 0x02; /* NC */
+ else
+ low = -size << PAGE_SHIFT | 0x09; /*
WWO,WC */
+ }
+ }
+ centaur_mcr[reg].high = high;
+ centaur_mcr[reg].low = low;
+ wrmsr(MSR_IDT_MCR0 + reg, low, high);
+}
+
+#if 0
+/*
+ * Initialise the later (saner) Winchip MCR variant. In this version
+ * the BIOS can pass us the registers it has used (but not their values)
+ * and the control register is read/write
+ */
+
+static void __init
+centaur_mcr1_init(void)
+{
+ unsigned i;
+ u32 lo, hi;
+
+ /* Unfortunately, MCR's are read-only, so there is no way to
+ * find out what the bios might have done.
+ */
+
+ rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+ if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
+ lo &= ~0x1C0; /* clear key */
+ lo |= 0x040; /* set key to 1 */
+ wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
+ }
+
+ centaur_mcr_type = 1;
+
+ /*
+ * Clear any unconfigured MCR's.
+ */
+
+ for (i = 0; i < 8; ++i) {
+ if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
+ if (!(lo & (1 << (9 + i))))
+ wrmsr(MSR_IDT_MCR0 + i, 0, 0);
+ else
+ /*
+ * If the BIOS set up an MCR we cannot see
it
+ * but we don't wish to obliterate it
+ */
+ centaur_mcr_reserved |= (1 << i);
+ }
+ }
+ /*
+ * Throw the main write-combining switch...
+ * However if OOSTORE is enabled then people have already done far
+ * cleverer things and we should behave.
+ */
+
+ lo |= 15; /* Write combine enables */
+ wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+}
+
+/*
+ * Initialise the original winchip with read only MCR registers
+ * no used bitmask for the BIOS to pass on and write only control
+ */
+
+static void __init
+centaur_mcr0_init(void)
+{
+ unsigned i;
+
+ /* Unfortunately, MCR's are read-only, so there is no way to
+ * find out what the bios might have done.
+ */
+
+ /* Clear any unconfigured MCR's.
+ * This way we are sure that the centaur_mcr array contains the actual
+ * values. The disadvantage is that any BIOS tweaks are thus undone.
+ *
+ */
+ for (i = 0; i < 8; ++i) {
+ if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
+ wrmsr(MSR_IDT_MCR0 + i, 0, 0);
+ }
+
+ wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
+}
+
+/*
+ * Initialise Winchip series MCR registers
+ */
+
+static void __init
+centaur_mcr_init(void)
+{
+ struct set_mtrr_context ctxt;
+
+ set_mtrr_prepare_save(&ctxt);
+ set_mtrr_cache_disable(&ctxt);
+
+ if (boot_cpu_data.x86_model == 4)
+ centaur_mcr0_init();
+ else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
+ centaur_mcr1_init();
+
+ set_mtrr_done(&ctxt);
+}
+#endif
+
+static int centaur_validate_add_page(unsigned long base,
+ unsigned long size, unsigned int type)
+{
+ /*
+ * FIXME: Winchip2 supports uncached
+ */
+ if (type != MTRR_TYPE_WRCOMB &&
+ (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
+ printk(KERN_WARNING
+ "mtrr: only write-combining%s supported\n",
+ centaur_mcr_type ? " and uncacheable are"
+ : " is");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct mtrr_ops centaur_mtrr_ops = {
+ .vendor = X86_VENDOR_CENTAUR,
+// .init = centaur_mcr_init,
+ .set = centaur_set_mcr,
+ .get = centaur_get_mcr,
+ .get_free_region = centaur_get_free_region,
+ .validate_add_page = centaur_validate_add_page,
+ .have_wrcomb = positive_have_wrcomb,
+};
+
+int __init centaur_init_mtrr(void)
+{
+ set_mtrr_ops(¢aur_mtrr_ops);
+ return 0;
+}
+
+//arch_initcall(centaur_init_mtrr);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mtrr/cyrix.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mtrr/cyrix.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,364 @@
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include "mtrr.h"
+
+int arr3_protected;
+
+static void
+cyrix_get_arr(unsigned int reg, unsigned long *base,
+ unsigned int *size, mtrr_type * type)
+{
+ unsigned long flags;
+ unsigned char arr, ccr3, rcr, shift;
+
+ arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
+
+ /* Save flags and disable interrupts */
+ local_irq_save(flags);
+
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
+ ((unsigned char *) base)[3] = getCx86(arr);
+ ((unsigned char *) base)[2] = getCx86(arr + 1);
+ ((unsigned char *) base)[1] = getCx86(arr + 2);
+ rcr = getCx86(CX86_RCR_BASE + reg);
+ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+
+ /* Enable interrupts if it was enabled previously */
+ local_irq_restore(flags);
+ shift = ((unsigned char *) base)[1] & 0x0f;
+ *base >>= PAGE_SHIFT;
+
+ /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
+ * Note: shift==0xf means 4G, this is unsupported.
+ */
+ if (shift)
+ *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
+ else
+ *size = 0;
+
+ /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
+ if (reg < 7) {
+ switch (rcr) {
+ case 1:
+ *type = MTRR_TYPE_UNCACHABLE;
+ break;
+ case 8:
+ *type = MTRR_TYPE_WRBACK;
+ break;
+ case 9:
+ *type = MTRR_TYPE_WRCOMB;
+ break;
+ case 24:
+ default:
+ *type = MTRR_TYPE_WRTHROUGH;
+ break;
+ }
+ } else {
+ switch (rcr) {
+ case 0:
+ *type = MTRR_TYPE_UNCACHABLE;
+ break;
+ case 8:
+ *type = MTRR_TYPE_WRCOMB;
+ break;
+ case 9:
+ *type = MTRR_TYPE_WRBACK;
+ break;
+ case 25:
+ default:
+ *type = MTRR_TYPE_WRTHROUGH;
+ break;
+ }
+ }
+}
+
+static int
+cyrix_get_free_region(unsigned long base, unsigned long size)
+/* [SUMMARY] Get a free ARR.
+ <base> The starting (base) address of the region.
+ <size> The size (in bytes) of the region.
+ [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+ int i;
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned int lsize;
+
+ /* If we are to set up a region >32M then look at ARR7 immediately */
+ if (size > 0x2000) {
+ cyrix_get_arr(7, &lbase, &lsize, <ype);
+ if (lsize == 0)
+ return 7;
+ /* Else try ARR0-ARR6 first */
+ } else {
+ for (i = 0; i < 7; i++) {
+ cyrix_get_arr(i, &lbase, &lsize, <ype);
+ if ((i == 3) && arr3_protected)
+ continue;
+ if (lsize == 0)
+ return i;
+ }
+ /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least
256K */
+ cyrix_get_arr(i, &lbase, &lsize, <ype);
+ if ((lsize == 0) && (size >= 0x40))
+ return i;
+ }
+ return -ENOSPC;
+}
+
+static u32 cr4 = 0;
+static u32 ccr3;
+
+static void prepare_set(void)
+{
+ u32 cr0;
+
+ /* Save value of CR4 and clear Page Global Enable (bit 7) */
+ if ( cpu_has_pge ) {
+ cr4 = read_cr4();
+ write_cr4(cr4 & (unsigned char) ~(1 << 7));
+ }
+
+ /* Disable and flush caches. Note that wbinvd flushes the TLBs as
+ a side-effect */
+ cr0 = read_cr0() | 0x40000000;
+ wbinvd();
+ write_cr0(cr0);
+ wbinvd();
+
+ /* Cyrix ARRs - everything else were excluded at the top */
+ ccr3 = getCx86(CX86_CCR3);
+
+ /* Cyrix ARRs - everything else were excluded at the top */
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
+
+}
+
+static void post_set(void)
+{
+ /* Flush caches and TLBs */
+ wbinvd();
+
+ /* Cyrix ARRs - everything else was excluded at the top */
+ setCx86(CX86_CCR3, ccr3);
+
+ /* Enable caches */
+ write_cr0(read_cr0() & 0xbfffffff);
+
+ /* Restore value of CR4 */
+ if ( cpu_has_pge )
+ write_cr4(cr4);
+}
+
+static void cyrix_set_arr(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+{
+ unsigned char arr, arr_type, arr_size;
+
+ arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
+
+ /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
+ if (reg >= 7)
+ size >>= 6;
+
+ size &= 0x7fff; /* make sure arr_size <= 14 */
+ for (arr_size = 0; size; arr_size++, size >>= 1) ;
+
+ if (reg < 7) {
+ switch (type) {
+ case MTRR_TYPE_UNCACHABLE:
+ arr_type = 1;
+ break;
+ case MTRR_TYPE_WRCOMB:
+ arr_type = 9;
+ break;
+ case MTRR_TYPE_WRTHROUGH:
+ arr_type = 24;
+ break;
+ default:
+ arr_type = 8;
+ break;
+ }
+ } else {
+ switch (type) {
+ case MTRR_TYPE_UNCACHABLE:
+ arr_type = 0;
+ break;
+ case MTRR_TYPE_WRCOMB:
+ arr_type = 8;
+ break;
+ case MTRR_TYPE_WRTHROUGH:
+ arr_type = 25;
+ break;
+ default:
+ arr_type = 9;
+ break;
+ }
+ }
+
+ prepare_set();
+
+ base <<= PAGE_SHIFT;
+ setCx86(arr, ((unsigned char *) &base)[3]);
+ setCx86(arr + 1, ((unsigned char *) &base)[2]);
+ setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
+ setCx86(CX86_RCR_BASE + reg, arr_type);
+
+ post_set();
+}
+
+typedef struct {
+ unsigned long base;
+ unsigned int size;
+ mtrr_type type;
+} arr_state_t;
+
+static arr_state_t arr_state[8] __devinitdata = {
+ {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
+ {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
+};
+
+static unsigned char ccr_state[7] __devinitdata = { 0, 0, 0, 0, 0, 0, 0 };
+
+static void cyrix_set_all(void)
+{
+ int i;
+
+ prepare_set();
+
+ /* the CCRs are not contiguous */
+ for (i = 0; i < 4; i++)
+ setCx86(CX86_CCR0 + i, ccr_state[i]);
+ for (; i < 7; i++)
+ setCx86(CX86_CCR4 + i, ccr_state[i]);
+ for (i = 0; i < 8; i++)
+ cyrix_set_arr(i, arr_state[i].base,
+ arr_state[i].size, arr_state[i].type);
+
+ post_set();
+}
+
+#if 0
+/*
+ * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
+ * with the SMM (System Management Mode) mode. So we need the following:
+ * Check whether SMI_LOCK (CCR3 bit 0) is set
+ * if it is set, write a warning message: ARR3 cannot be changed!
+ * (it cannot be changed until the next processor reset)
+ * if it is reset, then we can change it, set all the needed bits:
+ * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
+ * - disable access to SMM memory (CCR1 bit 2 reset)
+ * - disable SMM mode (CCR1 bit 1 reset)
+ * - disable write protection of ARR3 (CCR6 bit 1 reset)
+ * - (maybe) disable ARR3
+ * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
+ */
+static void __init
+cyrix_arr_init(void)
+{
+ struct set_mtrr_context ctxt;
+ unsigned char ccr[7];
+ int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
+#ifdef CONFIG_SMP
+ int i;
+#endif
+
+ /* flush cache and enable MAPEN */
+ set_mtrr_prepare_save(&ctxt);
+ set_mtrr_cache_disable(&ctxt);
+
+ /* Save all CCRs locally */
+ ccr[0] = getCx86(CX86_CCR0);
+ ccr[1] = getCx86(CX86_CCR1);
+ ccr[2] = getCx86(CX86_CCR2);
+ ccr[3] = ctxt.ccr3;
+ ccr[4] = getCx86(CX86_CCR4);
+ ccr[5] = getCx86(CX86_CCR5);
+ ccr[6] = getCx86(CX86_CCR6);
+
+ if (ccr[3] & 1) {
+ ccrc[3] = 1;
+ arr3_protected = 1;
+ } else {
+ /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
+ * access to SMM memory through ARR3 (bit 7).
+ */
+ if (ccr[1] & 0x80) {
+ ccr[1] &= 0x7f;
+ ccrc[1] |= 0x80;
+ }
+ if (ccr[1] & 0x04) {
+ ccr[1] &= 0xfb;
+ ccrc[1] |= 0x04;
+ }
+ if (ccr[1] & 0x02) {
+ ccr[1] &= 0xfd;
+ ccrc[1] |= 0x02;
+ }
+ arr3_protected = 0;
+ if (ccr[6] & 0x02) {
+ ccr[6] &= 0xfd;
+ ccrc[6] = 1; /* Disable write protection of ARR3 */
+ setCx86(CX86_CCR6, ccr[6]);
+ }
+ /* Disable ARR3. This is safe now that we disabled SMM. */
+ /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
+ }
+ /* If we changed CCR1 in memory, change it in the processor, too. */
+ if (ccrc[1])
+ setCx86(CX86_CCR1, ccr[1]);
+
+ /* Enable ARR usage by the processor */
+ if (!(ccr[5] & 0x20)) {
+ ccr[5] |= 0x20;
+ ccrc[5] = 1;
+ setCx86(CX86_CCR5, ccr[5]);
+ }
+#ifdef CONFIG_SMP
+ for (i = 0; i < 7; i++)
+ ccr_state[i] = ccr[i];
+ for (i = 0; i < 8; i++)
+ cyrix_get_arr(i,
+ &arr_state[i].base, &arr_state[i].size,
+ &arr_state[i].type);
+#endif
+
+ set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
+
+ if (ccrc[5])
+ printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled
manually\n");
+ if (ccrc[3])
+ printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
+/*
+ if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3
disabled\n");
+ if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
+ if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
+*/
+ if (ccrc[6])
+ printk(KERN_INFO "mtrr: ARR3 was write protected,
unprotected\n");
+}
+#endif
+
+static struct mtrr_ops cyrix_mtrr_ops = {
+ .vendor = X86_VENDOR_CYRIX,
+// .init = cyrix_arr_init,
+ .set_all = cyrix_set_all,
+ .set = cyrix_set_arr,
+ .get = cyrix_get_arr,
+ .get_free_region = cyrix_get_free_region,
+ .validate_add_page = generic_validate_add_page,
+ .have_wrcomb = positive_have_wrcomb,
+};
+
+int __init cyrix_init_mtrr(void)
+{
+ set_mtrr_ops(&cyrix_mtrr_ops);
+ return 0;
+}
+
+//arch_initcall(cyrix_init_mtrr);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mtrr/generic.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mtrr/generic.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,418 @@
+/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
+ because MTRRs can span upto 40 bits (36bits on most modern x86) */
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <asm/flushtlb.h>
+#include <asm/io.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <asm/cpufeature.h>
+#include "mtrr.h"
+
+struct mtrr_state {
+ struct mtrr_var_range *var_ranges;
+ mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+ unsigned char enabled;
+ mtrr_type def_type;
+};
+
+static unsigned long smp_changes_mask;
+static struct mtrr_state mtrr_state = {};
+
+/* Get the MSR pair relating to a var range */
+static void __init
+get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
+{
+ rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+ rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+}
+
+static void __init
+get_fixed_ranges(mtrr_type * frs)
+{
+ unsigned int *p = (unsigned int *) frs;
+ int i;
+
+ rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+
+ for (i = 0; i < 2; i++)
+ rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
+ for (i = 0; i < 8; i++)
+ rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
+}
+
+/* Grab all of the MTRR state for this CPU into *state */
+void __init get_mtrr_state(void)
+{
+ unsigned int i;
+ struct mtrr_var_range *vrs;
+ unsigned lo, dummy;
+
+ if (!mtrr_state.var_ranges) {
+ mtrr_state.var_ranges = xmalloc_array(struct mtrr_var_range,
+ num_var_ranges);
+ if (!mtrr_state.var_ranges)
+ return;
+ }
+ vrs = mtrr_state.var_ranges;
+
+ for (i = 0; i < num_var_ranges; i++)
+ get_mtrr_var_range(i, &vrs[i]);
+ get_fixed_ranges(mtrr_state.fixed_ranges);
+
+ rdmsr(MTRRdefType_MSR, lo, dummy);
+ mtrr_state.def_type = (lo & 0xff);
+ mtrr_state.enabled = (lo & 0xc00) >> 10;
+}
+
+/* Some BIOS's are fucked and don't set all MTRRs the same! */
+void __init mtrr_state_warn(void)
+{
+ unsigned long mask = smp_changes_mask;
+
+ if (!mask)
+ return;
+ if (mask & MTRR_CHANGE_MASK_FIXED)
+ printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed
MTRR settings\n");
+ if (mask & MTRR_CHANGE_MASK_VARIABLE)
+ printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable
MTRR settings\n");
+ if (mask & MTRR_CHANGE_MASK_DEFTYPE)
+ printk(KERN_WARNING "mtrr: your CPUs had inconsistent
MTRRdefType settings\n");
+ printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
+ printk(KERN_INFO "mtrr: corrected configuration.\n");
+}
+
+/* Doesn't attempt to pass an error out to MTRR users
+ because it's quite complicated in some cases and probably not
+ worth it because the best error handling is to ignore it. */
+void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
+{
+ if (wrmsr_safe(msr, a, b) < 0)
+ printk(KERN_ERR
+ "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+ smp_processor_id(), msr, a, b);
+}
+
+int generic_get_free_region(unsigned long base, unsigned long size)
+/* [SUMMARY] Get a free MTRR.
+ <base> The starting (base) address of the region.
+ <size> The size (in bytes) of the region.
+ [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+ int i, max;
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned lsize;
+
+ max = num_var_ranges;
+ for (i = 0; i < max; ++i) {
+ mtrr_if->get(i, &lbase, &lsize, <ype);
+ if (lsize == 0)
+ return i;
+ }
+ return -ENOSPC;
+}
+
+static void generic_get_mtrr(unsigned int reg, unsigned long *base,
+ unsigned int *size, mtrr_type * type)
+{
+ unsigned int mask_lo, mask_hi, base_lo, base_hi;
+
+ rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
+ if ((mask_lo & 0x800) == 0) {
+ /* Invalid (i.e. free) range */
+ *base = 0;
+ *size = 0;
+ *type = 0;
+ return;
+ }
+
+ rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
+
+ /* Work out the shifted address mask. */
+ mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
+ | mask_lo >> PAGE_SHIFT;
+
+ /* This works correctly if size is a power of two, i.e. a
+ contiguous range. */
+ *size = -mask_lo;
+ *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ *type = base_lo & 0xff;
+}
+
+static int set_fixed_ranges(mtrr_type * frs)
+{
+ unsigned int *p = (unsigned int *) frs;
+ int changed = FALSE;
+ int i;
+ unsigned int lo, hi;
+
+ rdmsr(MTRRfix64K_00000_MSR, lo, hi);
+ if (p[0] != lo || p[1] != hi) {
+ mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+ changed = TRUE;
+ }
+
+ for (i = 0; i < 2; i++) {
+ rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
+ if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
+ mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
+ p[3 + i * 2]);
+ changed = TRUE;
+ }
+ }
+
+ for (i = 0; i < 8; i++) {
+ rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
+ if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
+ mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
+ p[7 + i * 2]);
+ changed = TRUE;
+ }
+ }
+ return changed;
+}
+
+/* Set the MSR pair relating to a var range. Returns TRUE if
+ changes are made */
+static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
+{
+ unsigned int lo, hi;
+ int changed = FALSE;
+
+ rdmsr(MTRRphysBase_MSR(index), lo, hi);
+ if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
+ || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
+ (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+ mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+ changed = TRUE;
+ }
+
+ rdmsr(MTRRphysMask_MSR(index), lo, hi);
+
+ if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
+ || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
+ (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+ mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+ changed = TRUE;
+ }
+ return changed;
+}
+
+static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi)
+/* [SUMMARY] Set the MTRR state for this CPU.
+ <state> The MTRR state information to read.
+ <ctxt> Some relevant CPU context.
+ [NOTE] The CPU must already be in a safe state for MTRR changes.
+ [RETURNS] 0 if no changes made, else a mask indication what was changed.
+*/
+{
+ unsigned int i;
+ unsigned long change_mask = 0;
+
+ for (i = 0; i < num_var_ranges; i++)
+ if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
+ change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+
+ if (set_fixed_ranges(mtrr_state.fixed_ranges))
+ change_mask |= MTRR_CHANGE_MASK_FIXED;
+
+ /* Set_mtrr_restore restores the old value of MTRRdefType,
+ so to set it we fiddle with the saved value */
+ if ((deftype_lo & 0xff) != mtrr_state.def_type
+ || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
+ deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10);
+ change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
+ }
+
+ return change_mask;
+}
+
+
+static unsigned long cr4 = 0;
+static u32 deftype_lo, deftype_hi;
+static DEFINE_SPINLOCK(set_atomicity_lock);
+
+/*
+ * Since we are disabling the cache don't allow any interrupts - they
+ * would run extremely slow and would only increase the pain. The caller must
+ * ensure that local interrupts are disabled and are reenabled after post_set()
+ * has been called.
+ */
+
+static void prepare_set(void)
+{
+ unsigned long cr0;
+
+ /* Note that this is not ideal, since the cache is only
flushed/disabled
+ for this CPU while the MTRRs are changed, but changing this requires
+ more invasive changes to the way the kernel boots */
+
+ spin_lock(&set_atomicity_lock);
+
+ /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
+ cr0 = read_cr0() | 0x40000000; /* set CD flag */
+ write_cr0(cr0);
+ wbinvd();
+
+ /* Save value of CR4 and clear Page Global Enable (bit 7) */
+ if ( cpu_has_pge ) {
+ cr4 = read_cr4();
+ write_cr4(cr4 & ~X86_CR4_PGE);
+ }
+
+ /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
+ local_flush_tlb();
+
+ /* Save MTRR state */
+ rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+
+ /* Disable MTRRs, and set the default type to uncached */
+ mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
+}
+
+static void post_set(void)
+{
+ /* Flush TLBs (no need to flush caches - they are disabled) */
+ local_flush_tlb();
+
+ /* Intel (P6) standard MTRRs */
+ mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+
+ /* Enable caches */
+ write_cr0(read_cr0() & 0xbfffffff);
+
+ /* Restore value of CR4 */
+ if ( cpu_has_pge )
+ write_cr4(cr4);
+ spin_unlock(&set_atomicity_lock);
+}
+
+static void generic_set_all(void)
+{
+ unsigned long mask, count;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ prepare_set();
+
+ /* Actually set the state */
+ mask = set_mtrr_state(deftype_lo,deftype_hi);
+
+ post_set();
+ local_irq_restore(flags);
+
+ /* Use the atomic bitops to update the global mask */
+ for (count = 0; count < sizeof mask * 8; ++count) {
+ if (mask & 0x01)
+ set_bit(count, &smp_changes_mask);
+ mask >>= 1;
+ }
+
+}
+
+static void generic_set_mtrr(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+/* [SUMMARY] Set variable MTRR register on the local CPU.
+ <reg> The register to set.
+ <base> The base address of the region.
+ <size> The size of the region. If this is 0 the region is disabled.
+ <type> The type of the region.
+ <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+ be done externally.
+ [RETURNS] Nothing.
+*/
+{
+ unsigned long flags;
+ struct mtrr_var_range *vr;
+
+ vr = &mtrr_state.var_ranges[reg];
+
+ local_irq_save(flags);
+ prepare_set();
+
+ if (size == 0) {
+ /* The invalid bit is kept in the mask, so we simply clear the
+ relevant mask register to disable a range. */
+ mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
+ memset(vr, 0, sizeof(struct mtrr_var_range));
+ } else {
+ vr->base_lo = base << PAGE_SHIFT | type;
+ vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
+ vr->mask_lo = -size << PAGE_SHIFT | 0x800;
+ vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+
+ mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
+ mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
+ }
+
+ post_set();
+ local_irq_restore(flags);
+}
+
+int generic_validate_add_page(unsigned long base, unsigned long size, unsigned
int type)
+{
+ unsigned long lbase, last;
+
+ /* For Intel PPro stepping <= 7, must be 4 MiB aligned
+ and not touch 0x70000000->0x7003FFFF */
+ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
+ boot_cpu_data.x86_model == 1 &&
+ boot_cpu_data.x86_mask <= 7) {
+ if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
+ printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB
aligned\n", base);
+ return -EINVAL;
+ }
+ if (!(base + size < 0x70000000 || base > 0x7003FFFF) &&
+ (type == MTRR_TYPE_WRCOMB
+ || type == MTRR_TYPE_WRBACK)) {
+ printk(KERN_WARNING "mtrr: writable mtrr between
0x70000000 and 0x7003FFFF may hang the CPU.\n");
+ return -EINVAL;
+ }
+ }
+
+ if (base + size < 0x100) {
+ printk(KERN_WARNING "mtrr: cannot set region below 1 MiB
(0x%lx000,0x%lx000)\n",
+ base, size);
+ return -EINVAL;
+ }
+ /* Check upper bits of base and last are equal and lower bits are 0
+ for base and 1 for last */
+ last = base + size - 1;
+ for (lbase = base; !(lbase & 1) && (last & 1);
+ lbase = lbase >> 1, last = last >> 1) ;
+ if (lbase != last) {
+ printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a
size(0x%lx000) boundary\n",
+ base, size);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+
+static int generic_have_wrcomb(void)
+{
+ unsigned long config, dummy;
+ rdmsr(MTRRcap_MSR, config, dummy);
+ return (config & (1 << 10));
+}
+
+int positive_have_wrcomb(void)
+{
+ return 1;
+}
+
+/* generic structure...
+ */
+struct mtrr_ops generic_mtrr_ops = {
+ .use_intel_if = 1,
+ .set_all = generic_set_all,
+ .get = generic_get_mtrr,
+ .get_free_region = generic_get_free_region,
+ .set = generic_set_mtrr,
+ .validate_add_page = generic_validate_add_page,
+ .have_wrcomb = generic_have_wrcomb,
+};
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mtrr/main.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mtrr/main.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,661 @@
+/* Generic MTRR (Memory Type Range Register) driver.
+
+ Copyright (C) 1997-2000 Richard Gooch
+ Copyright (c) 2002 Patrick Mochel
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@xxxxxxxxxxxxx
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+
+ Source: "Pentium Pro Family Developer's Manual, Volume 3:
+ Operating System Writer's Guide" (Intel document number 242692),
+ section 11.11.7
+
+ This was cleaned and made readable by Patrick Mochel <mochel@xxxxxxxx>
+ on 6-7 March 2002.
+ Source: Intel Architecture Software Developers Manual, Volume 3:
+ System Programming Guide; Section 9.11. (1997 edition - PPro).
+*/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/smp.h>
+#include <xen/spinlock.h>
+#include <asm/mtrr.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+/* No blocking mutexes in Xen. Spin instead. */
+#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
+#define down(_m) spin_lock(_m)
+#define up(_m) spin_unlock(_m)
+#define lock_cpu_hotplug() ((void)0)
+#define unlock_cpu_hotplug() ((void)0)
+#define dump_stack() ((void)0)
+
+u32 num_var_ranges = 0;
+
+unsigned int *usage_table;
+static DECLARE_MUTEX(mtrr_sem);
+
+u32 size_or_mask, size_and_mask;
+
+static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
+
+struct mtrr_ops * mtrr_if = NULL;
+
+static void set_mtrr(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type);
+
+extern int arr3_protected;
+
+static char *mtrr_strings[MTRR_NUM_TYPES] =
+{
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+};
+
+char *mtrr_attrib_to_str(int x)
+{
+ return (x <= 6) ? mtrr_strings[x] : "?";
+}
+
+void set_mtrr_ops(struct mtrr_ops * ops)
+{
+ if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
+ mtrr_ops[ops->vendor] = ops;
+}
+
+/* Returns non-zero if we have the write-combining memory type */
+static int have_wrcomb(void)
+{
+ return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
+}
+
+/* This function returns the number of variable MTRRs */
+static void __init set_num_var_ranges(void)
+{
+ unsigned long config = 0, dummy;
+
+ if (use_intel()) {
+ rdmsr(MTRRcap_MSR, config, dummy);
+ } else if (is_cpu(AMD))
+ config = 2;
+ else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
+ config = 8;
+ num_var_ranges = config & 0xff;
+}
+
+static void __init init_table(void)
+{
+ int i, max;
+
+ max = num_var_ranges;
+ if ((usage_table = xmalloc_array(unsigned int, max)) == NULL) {
+ printk(KERN_ERR "mtrr: could not allocate\n");
+ return;
+ }
+ for (i = 0; i < max; i++)
+ usage_table[i] = 1;
+}
+
+struct set_mtrr_data {
+ atomic_t count;
+ atomic_t gate;
+ unsigned long smp_base;
+ unsigned long smp_size;
+ unsigned int smp_reg;
+ mtrr_type smp_type;
+};
+
+#ifdef CONFIG_SMP
+
+static void ipi_handler(void *info)
+/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
+ [RETURNS] Nothing.
+*/
+{
+ struct set_mtrr_data *data = info;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ atomic_dec(&data->count);
+ while(!atomic_read(&data->gate))
+ cpu_relax();
+
+ /* The master has cleared me to execute */
+ if (data->smp_reg != ~0U)
+ mtrr_if->set(data->smp_reg, data->smp_base,
+ data->smp_size, data->smp_type);
+ else
+ mtrr_if->set_all();
+
+ atomic_dec(&data->count);
+ while(atomic_read(&data->gate))
+ cpu_relax();
+
+ atomic_dec(&data->count);
+ local_irq_restore(flags);
+}
+
+#endif
+
+/**
+ * set_mtrr - update mtrrs on all processors
+ * @reg: mtrr in question
+ * @base: mtrr base
+ * @size: mtrr size
+ * @type: mtrr type
+ *
+ * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
+ *
+ * 1. Send IPI to do the following:
+ * 2. Disable Interrupts
+ * 3. Wait for all procs to do so
+ * 4. Enter no-fill cache mode
+ * 5. Flush caches
+ * 6. Clear PGE bit
+ * 7. Flush all TLBs
+ * 8. Disable all range registers
+ * 9. Update the MTRRs
+ * 10. Enable all range registers
+ * 11. Flush all TLBs and caches again
+ * 12. Enter normal cache mode and reenable caching
+ * 13. Set PGE
+ * 14. Wait for buddies to catch up
+ * 15. Enable interrupts.
+ *
+ * What does that mean for us? Well, first we set data.count to the number
+ * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
+ * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
+ * Meanwhile, they are waiting for that flag to be set. Once it's set, each
+ * CPU goes through the transition of updating MTRRs. The CPU vendors may each
do it
+ * differently, so we call mtrr_if->set() callback and let them take care of
it.
+ * When they're done, they again decrement data->count and wait for data.gate
to
+ * be reset.
+ * When we finish, we wait for data.count to hit 0 and toggle the data.gate
flag.
+ * Everyone then enables interrupts and we all continue on.
+ *
+ * Note that the mechanism is the same for UP systems, too; all the SMP stuff
+ * becomes nops.
+ */
+static void set_mtrr(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+{
+ struct set_mtrr_data data;
+ unsigned long flags;
+
+ data.smp_reg = reg;
+ data.smp_base = base;
+ data.smp_size = size;
+ data.smp_type = type;
+ atomic_set(&data.count, num_booting_cpus() - 1);
+ atomic_set(&data.gate,0);
+
+ /* Start the ball rolling on other CPUs */
+ if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+ panic("mtrr: timed out waiting for other CPUs\n");
+
+ local_irq_save(flags);
+
+ while(atomic_read(&data.count))
+ cpu_relax();
+
+ /* ok, reset count and toggle gate */
+ atomic_set(&data.count, num_booting_cpus() - 1);
+ atomic_set(&data.gate,1);
+
+ /* do our MTRR business */
+
+ /* HACK!
+ * We use this same function to initialize the mtrrs on boot.
+ * The state of the boot cpu's mtrrs has been saved, and we want
+ * to replicate across all the APs.
+ * If we're doing that @reg is set to something special...
+ */
+ if (reg != ~0U)
+ mtrr_if->set(reg,base,size,type);
+
+ /* wait for the others */
+ while(atomic_read(&data.count))
+ cpu_relax();
+
+ atomic_set(&data.count, num_booting_cpus() - 1);
+ atomic_set(&data.gate,0);
+
+ /*
+ * Wait here for everyone to have seen the gate change
+ * So we're the last ones to touch 'data'
+ */
+ while(atomic_read(&data.count))
+ cpu_relax();
+
+ local_irq_restore(flags);
+}
+
+/**
+ * mtrr_add_page - Add a memory type region
+ * @base: Physical base address of region in pages (4 KB)
+ * @size: Physical size of region in pages (4 KB)
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
+ *
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
+ *
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
+ *
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
+ *
+ * The available types are
+ *
+ * %MTRR_TYPE_UNCACHABLE - No caching
+ *
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ *
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ *
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ *
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
+ */
+
+int mtrr_add_page(unsigned long base, unsigned long size,
+ unsigned int type, char increment)
+{
+ int i;
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned int lsize;
+ int error;
+
+ if (!mtrr_if)
+ return -ENXIO;
+
+ if ((error = mtrr_if->validate_add_page(base,size,type)))
+ return error;
+
+ if (type >= MTRR_NUM_TYPES) {
+ printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
+ return -EINVAL;
+ }
+
+ /* If the type is WC, check that this processor supports it */
+ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
+ printk(KERN_WARNING
+ "mtrr: your processor doesn't support
write-combining\n");
+ return -ENOSYS;
+ }
+
+ if (base & size_or_mask || size & size_or_mask) {
+ printk(KERN_WARNING "mtrr: base or size exceeds the MTRR
width\n");
+ return -EINVAL;
+ }
+
+ error = -EINVAL;
+
+ /* No CPU hotplug when we change MTRR entries */
+ lock_cpu_hotplug();
+ /* Search for existing MTRR */
+ down(&mtrr_sem);
+ for (i = 0; i < num_var_ranges; ++i) {
+ mtrr_if->get(i, &lbase, &lsize, <ype);
+ if (base >= lbase + lsize)
+ continue;
+ if ((base < lbase) && (base + size <= lbase))
+ continue;
+ /* At this point we know there is some kind of
overlap/enclosure */
+ if ((base < lbase) || (base + size > lbase + lsize)) {
+ printk(KERN_WARNING
+ "mtrr: 0x%lx000,0x%lx000 overlaps existing"
+ " 0x%lx000,0x%x000\n", base, size, lbase,
+ lsize);
+ goto out;
+ }
+ /* New region is enclosed by an existing region */
+ if (ltype != type) {
+ if (type == MTRR_TYPE_UNCACHABLE)
+ continue;
+ printk (KERN_WARNING "mtrr: type mismatch for
%lx000,%lx000 old: %s new: %s\n",
+ base, size, mtrr_attrib_to_str(ltype),
+ mtrr_attrib_to_str(type));
+ goto out;
+ }
+ if (increment)
+ ++usage_table[i];
+ error = i;
+ goto out;
+ }
+ /* Search for an empty MTRR */
+ i = mtrr_if->get_free_region(base, size);
+ if (i >= 0) {
+ set_mtrr(i, base, size, type);
+ usage_table[i] = 1;
+ } else
+ printk(KERN_INFO "mtrr: no more MTRRs available\n");
+ error = i;
+ out:
+ up(&mtrr_sem);
+ unlock_cpu_hotplug();
+ return error;
+}
+
+static int mtrr_check(unsigned long base, unsigned long size)
+{
+ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+ printk(KERN_WARNING
+ "mtrr: size and base must be multiples of 4 kiB\n");
+ printk(KERN_DEBUG
+ "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
+ dump_stack();
+ return -1;
+ }
+ return 0;
+}
+
+/**
+ * mtrr_add - Add a memory type region
+ * @base: Physical base address of region
+ * @size: Physical size of region
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
+ *
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
+ *
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
+ *
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
+ *
+ * The available types are
+ *
+ * %MTRR_TYPE_UNCACHABLE - No caching
+ *
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ *
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ *
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ *
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
+ */
+
+int
+mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+ char increment)
+{
+ if (mtrr_check(base, size))
+ return -EINVAL;
+ return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+ increment);
+}
+
+/**
+ * mtrr_del_page - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
+ *
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
+ */
+
+int mtrr_del_page(int reg, unsigned long base, unsigned long size)
+{
+ int i, max;
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned int lsize;
+ int error = -EINVAL;
+
+ if (!mtrr_if)
+ return -ENXIO;
+
+ max = num_var_ranges;
+ /* No CPU hotplug when we change MTRR entries */
+ lock_cpu_hotplug();
+ down(&mtrr_sem);
+ if (reg < 0) {
+ /* Search for existing MTRR */
+ for (i = 0; i < max; ++i) {
+ mtrr_if->get(i, &lbase, &lsize, <ype);
+ if (lbase == base && lsize == size) {
+ reg = i;
+ break;
+ }
+ }
+ if (reg < 0) {
+ printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000
found\n", base,
+ size);
+ goto out;
+ }
+ }
+ if (reg >= max) {
+ printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
+ goto out;
+ }
+ if (is_cpu(CYRIX) && !use_intel()) {
+ if ((reg == 3) && arr3_protected) {
+ printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
+ goto out;
+ }
+ }
+ mtrr_if->get(reg, &lbase, &lsize, <ype);
+ if (lsize < 1) {
+ printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
+ goto out;
+ }
+ if (usage_table[reg] < 1) {
+ printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+ goto out;
+ }
+ if (--usage_table[reg] < 1)
+ set_mtrr(reg, 0, 0, 0);
+ error = reg;
+ out:
+ up(&mtrr_sem);
+ unlock_cpu_hotplug();
+ return error;
+}
+/**
+ * mtrr_del - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
+ *
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
+ */
+
+int
+mtrr_del(int reg, unsigned long base, unsigned long size)
+{
+ if (mtrr_check(base, size))
+ return -EINVAL;
+ return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+/* HACK ALERT!
+ * These should be called implicitly, but we can't yet until all the initcall
+ * stuff is done...
+ */
+extern void amd_init_mtrr(void);
+extern void cyrix_init_mtrr(void);
+extern void centaur_init_mtrr(void);
+
+static void __init init_ifs(void)
+{
+ amd_init_mtrr();
+ cyrix_init_mtrr();
+ centaur_init_mtrr();
+}
+
+/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
+ * MTRR driver doesn't require this
+ */
+struct mtrr_value {
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned int lsize;
+};
+
+/**
+ * mtrr_bp_init - initialize mtrrs on the boot CPU
+ *
+ * This needs to be called early; before any of the other CPUs are
+ * initialized (i.e. before smp_init()).
+ *
+ */
+void __init mtrr_bp_init(void)
+{
+ init_ifs();
+
+ if (cpu_has_mtrr) {
+ mtrr_if = &generic_mtrr_ops;
+ size_or_mask = 0xff000000; /* 36 bits */
+ size_and_mask = 0x00f00000;
+
+ /* This is an AMD specific MSR, but we assume(hope?) that
+ Intel will implement it to when they extend the address
+ bus of the Xeon. */
+ if (cpuid_eax(0x80000000) >= 0x80000008) {
+ u32 phys_addr;
+ phys_addr = cpuid_eax(0x80000008) & 0xff;
+ /* CPUID workaround for Intel 0F33/0F34 CPU */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 0xF &&
+ boot_cpu_data.x86_model == 0x3 &&
+ (boot_cpu_data.x86_mask == 0x3 ||
+ boot_cpu_data.x86_mask == 0x4))
+ phys_addr = 36;
+
+ size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+ size_and_mask = ~size_or_mask & 0xfff00000;
+ } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
+ boot_cpu_data.x86 == 6) {
+ /* VIA C* family have Intel style MTRRs, but
+ don't support PAE */
+ size_or_mask = 0xfff00000; /* 32 bits */
+ size_and_mask = 0;
+ }
+ } else {
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (cpu_has_k6_mtrr) {
+ /* Pre-Athlon (K6) AMD CPU MTRRs */
+ mtrr_if = mtrr_ops[X86_VENDOR_AMD];
+ size_or_mask = 0xfff00000; /* 32 bits */
+ size_and_mask = 0;
+ }
+ break;
+ case X86_VENDOR_CENTAUR:
+ if (cpu_has_centaur_mcr) {
+ mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
+ size_or_mask = 0xfff00000; /* 32 bits */
+ size_and_mask = 0;
+ }
+ break;
+ case X86_VENDOR_CYRIX:
+ if (cpu_has_cyrix_arr) {
+ mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
+ size_or_mask = 0xfff00000; /* 32 bits */
+ size_and_mask = 0;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (mtrr_if) {
+ set_num_var_ranges();
+ init_table();
+ if (use_intel())
+ get_mtrr_state();
+ }
+}
+
+void mtrr_ap_init(void)
+{
+ unsigned long flags;
+
+ if (!mtrr_if || !use_intel())
+ return;
+ /*
+ * Ideally we should hold mtrr_sem here to avoid mtrr entries changed,
+ * but this routine will be called in cpu boot time, holding the lock
+ * breaks it. This routine is called in two cases: 1.very earily time
+ * of software resume, when there absolutely isn't mtrr entry changes;
+ * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
+ * prevent mtrr entry changes
+ */
+ local_irq_save(flags);
+
+ mtrr_if->set_all();
+
+ local_irq_restore(flags);
+}
+
+static int __init mtrr_init_finialize(void)
+{
+ if (!mtrr_if)
+ return 0;
+ if (use_intel())
+ mtrr_state_warn();
+ return 0;
+}
+__initcall(mtrr_init_finialize);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mtrr/mtrr.h
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mtrr/mtrr.h Tue Feb 14 15:23:43 2006
@@ -0,0 +1,97 @@
+/*
+ * local mtrr defines.
+ */
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+#define MTRRcap_MSR 0x0fe
+#define MTRRdefType_MSR 0x2ff
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+#define MTRRfix64K_00000_MSR 0x250
+#define MTRRfix16K_80000_MSR 0x258
+#define MTRRfix16K_A0000_MSR 0x259
+#define MTRRfix4K_C0000_MSR 0x268
+#define MTRRfix4K_C8000_MSR 0x269
+#define MTRRfix4K_D0000_MSR 0x26a
+#define MTRRfix4K_D8000_MSR 0x26b
+#define MTRRfix4K_E0000_MSR 0x26c
+#define MTRRfix4K_E8000_MSR 0x26d
+#define MTRRfix4K_F0000_MSR 0x26e
+#define MTRRfix4K_F8000_MSR 0x26f
+
+#define MTRR_CHANGE_MASK_FIXED 0x01
+#define MTRR_CHANGE_MASK_VARIABLE 0x02
+#define MTRR_CHANGE_MASK_DEFTYPE 0x04
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+ an 8 bit field: */
+typedef u8 mtrr_type;
+
+struct mtrr_ops {
+ u32 vendor;
+ u32 use_intel_if;
+// void (*init)(void);
+ void (*set)(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type);
+ void (*set_all)(void);
+
+ void (*get)(unsigned int reg, unsigned long *base,
+ unsigned int *size, mtrr_type * type);
+ int (*get_free_region) (unsigned long base, unsigned long size);
+
+ int (*validate_add_page)(unsigned long base, unsigned long size,
+ unsigned int type);
+ int (*have_wrcomb)(void);
+};
+
+extern int generic_get_free_region(unsigned long base, unsigned long size);
+extern int generic_validate_add_page(unsigned long base, unsigned long size,
+ unsigned int type);
+
+extern struct mtrr_ops generic_mtrr_ops;
+
+extern int positive_have_wrcomb(void);
+
+/* library functions for processor-specific routines */
+struct set_mtrr_context {
+ unsigned long flags;
+ unsigned long deftype_lo;
+ unsigned long deftype_hi;
+ unsigned long cr4val;
+ unsigned long ccr3;
+};
+
+struct mtrr_var_range {
+ unsigned long base_lo;
+ unsigned long base_hi;
+ unsigned long mask_lo;
+ unsigned long mask_hi;
+};
+
+void set_mtrr_done(struct set_mtrr_context *ctxt);
+void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
+void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
+
+void get_mtrr_state(void);
+
+extern void set_mtrr_ops(struct mtrr_ops * ops);
+
+extern u32 size_or_mask, size_and_mask;
+extern struct mtrr_ops * mtrr_if;
+
+#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
+#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
+
+extern unsigned int num_var_ranges;
+
+void mtrr_state_warn(void);
+char *mtrr_attrib_to_str(int x);
+void mtrr_wrmsr(unsigned, unsigned, unsigned);
+
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/cpu/mtrr/state.c
--- /dev/null Mon Feb 13 16:41:23 2006
+++ b/xen/arch/x86/cpu/mtrr/state.c Tue Feb 14 15:23:43 2006
@@ -0,0 +1,78 @@
+#include <xen/mm.h>
+#include <xen/init.h>
+#include <asm/io.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include "mtrr.h"
+
+
+/* Put the processor into a state where MTRRs can be safely set */
+void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
+{
+ unsigned int cr0;
+
+ /* Disable interrupts locally */
+ local_irq_save(ctxt->flags);
+
+ if (use_intel() || is_cpu(CYRIX)) {
+
+ /* Save value of CR4 and clear Page Global Enable (bit 7) */
+ if ( cpu_has_pge ) {
+ ctxt->cr4val = read_cr4();
+ write_cr4(ctxt->cr4val & (unsigned char) ~(1 << 7));
+ }
+
+ /* Disable and flush caches. Note that wbinvd flushes the TLBs
as
+ a side-effect */
+ cr0 = read_cr0() | 0x40000000;
+ wbinvd();
+ write_cr0(cr0);
+ wbinvd();
+
+ if (use_intel())
+ /* Save MTRR state */
+ rdmsr(MTRRdefType_MSR, ctxt->deftype_lo,
ctxt->deftype_hi);
+ else
+ /* Cyrix ARRs - everything else were excluded at the
top */
+ ctxt->ccr3 = getCx86(CX86_CCR3);
+ }
+}
+
+void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
+{
+ if (use_intel())
+ /* Disable MTRRs, and set the default type to uncached */
+ mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
+ ctxt->deftype_hi);
+ else if (is_cpu(CYRIX))
+ /* Cyrix ARRs - everything else were excluded at the top */
+ setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+}
+
+/* Restore the processor after a set_mtrr_prepare */
+void set_mtrr_done(struct set_mtrr_context *ctxt)
+{
+ if (use_intel() || is_cpu(CYRIX)) {
+
+ /* Flush caches and TLBs */
+ wbinvd();
+
+ /* Restore MTRRdefType */
+ if (use_intel())
+ /* Intel (P6) standard MTRRs */
+ mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo,
ctxt->deftype_hi);
+ else
+ /* Cyrix ARRs - everything else was excluded at the top
*/
+ setCx86(CX86_CCR3, ctxt->ccr3);
+
+ /* Enable caches */
+ write_cr0(read_cr0() & 0xbfffffff);
+
+ /* Restore value of CR4 */
+ if ( cpu_has_pge )
+ write_cr4(ctxt->cr4val);
+ }
+ /* Re-enable interrupts locally (if enabled previously) */
+ local_irq_restore(ctxt->flags);
+}
+
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/mtrr/amd.c
--- a/xen/arch/x86/mtrr/amd.c Mon Feb 13 16:41:23 2006
+++ /dev/null Tue Feb 14 15:23:43 2006
@@ -1,121 +0,0 @@
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-
-#include "mtrr.h"
-
-static void
-amd_get_mtrr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- unsigned long low, high;
-
- rdmsr(MSR_K6_UWCCR, low, high);
- /* Upper dword is region 1, lower is region 0 */
- if (reg == 1)
- low = high;
- /* The base masks off on the right alignment */
- *base = (low & 0xFFFE0000) >> PAGE_SHIFT;
- *type = 0;
- if (low & 1)
- *type = MTRR_TYPE_UNCACHABLE;
- if (low & 2)
- *type = MTRR_TYPE_WRCOMB;
- if (!(low & 3)) {
- *size = 0;
- return;
- }
- /*
- * This needs a little explaining. The size is stored as an
- * inverted mask of bits of 128K granularity 15 bits long offset
- * 2 bits
- *
- * So to get a size we do invert the mask and add 1 to the lowest
- * mask bit (4 as its 2 bits in). This gives us a size we then shift
- * to turn into 128K blocks
- *
- * eg 111 1111 1111 1100 is 512K
- *
- * invert 000 0000 0000 0011
- * +1 000 0000 0000 0100
- * *128K ...
- */
- low = (~low) & 0x1FFFC;
- *size = (low + 4) << (15 - PAGE_SHIFT);
- return;
-}
-
-static void amd_set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-/* [SUMMARY] Set variable MTRR register on the local CPU.
- <reg> The register to set.
- <base> The base address of the region.
- <size> The size of the region. If this is 0 the region is disabled.
- <type> The type of the region.
- <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
- be done externally.
- [RETURNS] Nothing.
-*/
-{
- u32 regs[2];
-
- /*
- * Low is MTRR0 , High MTRR 1
- */
- rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
- /*
- * Blank to disable
- */
- if (size == 0)
- regs[reg] = 0;
- else
- /* Set the register to the base, the type (off by one) and an
- inverted bitmask of the size The size is the only odd
- bit. We are fed say 512K We invert this and we get 111 1111
- 1111 1011 but if you subtract one and invert you get the
- desired 111 1111 1111 1100 mask
-
- But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */
- regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
- | (base << PAGE_SHIFT) | (type + 1);
-
- /*
- * The writeback rule is quite specific. See the manual. Its
- * disable local interrupts, write back the cache, set the mtrr
- */
- wbinvd();
- wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
-}
-
-static int amd_validate_add_page(unsigned long base, unsigned long size,
unsigned int type)
-{
- /* Apply the K6 block alignment and size rules
- In order
- o Uncached or gathering only
- o 128K or bigger block
- o Power of 2 block
- o base suitably aligned to the power
- */
- if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
- || (size & ~(size - 1)) - size || (base & (size - 1)))
- return -EINVAL;
- return 0;
-}
-
-static struct mtrr_ops amd_mtrr_ops = {
- .vendor = X86_VENDOR_AMD,
- .set = amd_set_mtrr,
- .get = amd_get_mtrr,
- .get_free_region = generic_get_free_region,
- .validate_add_page = amd_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-};
-
-int __init amd_init_mtrr(void)
-{
- set_mtrr_ops(&amd_mtrr_ops);
- return 0;
-}
-
-//arch_initcall(amd_mtrr_init);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/mtrr/centaur.c
--- a/xen/arch/x86/mtrr/centaur.c Mon Feb 13 16:41:23 2006
+++ /dev/null Tue Feb 14 15:23:43 2006
@@ -1,220 +0,0 @@
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include "mtrr.h"
-
-static struct {
- unsigned long high;
- unsigned long low;
-} centaur_mcr[8];
-
-static u8 centaur_mcr_reserved;
-static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
-
-/*
- * Report boot time MCR setups
- */
-
-static int
-centaur_get_free_region(unsigned long base, unsigned long size)
-/* [SUMMARY] Get a free MTRR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
-{
- int i, max;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
-
- max = num_var_ranges;
- for (i = 0; i < max; ++i) {
- if (centaur_mcr_reserved & (1 << i))
- continue;
- mtrr_if->get(i, &lbase, &lsize, <ype);
- if (lsize == 0)
- return i;
- }
- return -ENOSPC;
-}
-
-void
-mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
-{
- centaur_mcr[mcr].low = lo;
- centaur_mcr[mcr].high = hi;
-}
-
-static void
-centaur_get_mcr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- *base = centaur_mcr[reg].high >> PAGE_SHIFT;
- *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
- *type = MTRR_TYPE_WRCOMB; /* If it is there, it is
write-combining */
- if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
- *type = MTRR_TYPE_UNCACHABLE;
- if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
- *type = MTRR_TYPE_WRBACK;
- if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
- *type = MTRR_TYPE_WRBACK;
-
-}
-
-static void centaur_set_mcr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-{
- unsigned long low, high;
-
- if (size == 0) {
- /* Disable */
- high = low = 0;
- } else {
- high = base << PAGE_SHIFT;
- if (centaur_mcr_type == 0)
- low = -size << PAGE_SHIFT | 0x1f; /* only support
write-combining... */
- else {
- if (type == MTRR_TYPE_UNCACHABLE)
- low = -size << PAGE_SHIFT | 0x02; /* NC */
- else
- low = -size << PAGE_SHIFT | 0x09; /*
WWO,WC */
- }
- }
- centaur_mcr[reg].high = high;
- centaur_mcr[reg].low = low;
- wrmsr(MSR_IDT_MCR0 + reg, low, high);
-}
-/*
- * Initialise the later (saner) Winchip MCR variant. In this version
- * the BIOS can pass us the registers it has used (but not their values)
- * and the control register is read/write
- */
-
-static void __init
-centaur_mcr1_init(void)
-{
- unsigned i;
- u32 lo, hi;
-
- /* Unfortunately, MCR's are read-only, so there is no way to
- * find out what the bios might have done.
- */
-
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
- lo &= ~0x1C0; /* clear key */
- lo |= 0x040; /* set key to 1 */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
- }
-
- centaur_mcr_type = 1;
-
- /*
- * Clear any unconfigured MCR's.
- */
-
- for (i = 0; i < 8; ++i) {
- if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
- if (!(lo & (1 << (9 + i))))
- wrmsr(MSR_IDT_MCR0 + i, 0, 0);
- else
- /*
- * If the BIOS set up an MCR we cannot see
it
- * but we don't wish to obliterate it
- */
- centaur_mcr_reserved |= (1 << i);
- }
- }
- /*
- * Throw the main write-combining switch...
- * However if OOSTORE is enabled then people have already done far
- * cleverer things and we should behave.
- */
-
- lo |= 15; /* Write combine enables */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-
-/*
- * Initialise the original winchip with read only MCR registers
- * no used bitmask for the BIOS to pass on and write only control
- */
-
-static void __init
-centaur_mcr0_init(void)
-{
- unsigned i;
-
- /* Unfortunately, MCR's are read-only, so there is no way to
- * find out what the bios might have done.
- */
-
- /* Clear any unconfigured MCR's.
- * This way we are sure that the centaur_mcr array contains the actual
- * values. The disadvantage is that any BIOS tweaks are thus undone.
- *
- */
- for (i = 0; i < 8; ++i) {
- if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
- wrmsr(MSR_IDT_MCR0 + i, 0, 0);
- }
-
- wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
-}
-
-/*
- * Initialise Winchip series MCR registers
- */
-
-static void __init
-centaur_mcr_init(void)
-{
- struct set_mtrr_context ctxt;
-
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- if (boot_cpu_data.x86_model == 4)
- centaur_mcr0_init();
- else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
- centaur_mcr1_init();
-
- set_mtrr_done(&ctxt);
-}
-
-static int centaur_validate_add_page(unsigned long base,
- unsigned long size, unsigned int type)
-{
- /*
- * FIXME: Winchip2 supports uncached
- */
- if (type != MTRR_TYPE_WRCOMB &&
- (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
- printk(KERN_WARNING
- "mtrr: only write-combining%s supported\n",
- centaur_mcr_type ? " and uncacheable are"
- : " is");
- return -EINVAL;
- }
- return 0;
-}
-
-static struct mtrr_ops centaur_mtrr_ops = {
- .vendor = X86_VENDOR_CENTAUR,
- .init = centaur_mcr_init,
- .set = centaur_set_mcr,
- .get = centaur_get_mcr,
- .get_free_region = centaur_get_free_region,
- .validate_add_page = centaur_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-};
-
-int __init centaur_init_mtrr(void)
-{
- set_mtrr_ops(¢aur_mtrr_ops);
- return 0;
-}
-
-//arch_initcall(centaur_init_mtrr);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/mtrr/cyrix.c
--- a/xen/arch/x86/mtrr/cyrix.c Mon Feb 13 16:41:23 2006
+++ /dev/null Tue Feb 14 15:23:43 2006
@@ -1,362 +0,0 @@
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/io.h>
-#include "mtrr.h"
-
-int arr3_protected;
-
-static void
-cyrix_get_arr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- unsigned long flags;
- unsigned char arr, ccr3, rcr, shift;
-
- arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
-
- /* Save flags and disable interrupts */
- local_irq_save(flags);
-
- ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- ((unsigned char *) base)[3] = getCx86(arr);
- ((unsigned char *) base)[2] = getCx86(arr + 1);
- ((unsigned char *) base)[1] = getCx86(arr + 2);
- rcr = getCx86(CX86_RCR_BASE + reg);
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
-
- /* Enable interrupts if it was enabled previously */
- local_irq_restore(flags);
- shift = ((unsigned char *) base)[1] & 0x0f;
- *base >>= PAGE_SHIFT;
-
- /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
- * Note: shift==0xf means 4G, this is unsupported.
- */
- if (shift)
- *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
- else
- *size = 0;
-
- /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
- if (reg < 7) {
- switch (rcr) {
- case 1:
- *type = MTRR_TYPE_UNCACHABLE;
- break;
- case 8:
- *type = MTRR_TYPE_WRBACK;
- break;
- case 9:
- *type = MTRR_TYPE_WRCOMB;
- break;
- case 24:
- default:
- *type = MTRR_TYPE_WRTHROUGH;
- break;
- }
- } else {
- switch (rcr) {
- case 0:
- *type = MTRR_TYPE_UNCACHABLE;
- break;
- case 8:
- *type = MTRR_TYPE_WRCOMB;
- break;
- case 9:
- *type = MTRR_TYPE_WRBACK;
- break;
- case 25:
- default:
- *type = MTRR_TYPE_WRTHROUGH;
- break;
- }
- }
-}
-
-static int
-cyrix_get_free_region(unsigned long base, unsigned long size)
-/* [SUMMARY] Get a free ARR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
-{
- int i;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
-
- /* If we are to set up a region >32M then look at ARR7 immediately */
- if (size > 0x2000) {
- cyrix_get_arr(7, &lbase, &lsize, <ype);
- if (lsize == 0)
- return 7;
- /* Else try ARR0-ARR6 first */
- } else {
- for (i = 0; i < 7; i++) {
- cyrix_get_arr(i, &lbase, &lsize, <ype);
- if ((i == 3) && arr3_protected)
- continue;
- if (lsize == 0)
- return i;
- }
- /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least
256K */
- cyrix_get_arr(i, &lbase, &lsize, <ype);
- if ((lsize == 0) && (size >= 0x40))
- return i;
- }
- return -ENOSPC;
-}
-
-static u32 cr4 = 0;
-static u32 ccr3;
-
-static void prepare_set(void)
-{
- u32 cr0;
-
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
- cr4 = read_cr4();
- write_cr4(cr4 & (unsigned char) ~(1 << 7));
- }
-
- /* Disable and flush caches. Note that wbinvd flushes the TLBs as
- a side-effect */
- cr0 = read_cr0() | 0x40000000;
- wbinvd();
- write_cr0(cr0);
- wbinvd();
-
- /* Cyrix ARRs - everything else were excluded at the top */
- ccr3 = getCx86(CX86_CCR3);
-
- /* Cyrix ARRs - everything else were excluded at the top */
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
-
-}
-
-static void post_set(void)
-{
- /* Flush caches and TLBs */
- wbinvd();
-
- /* Cyrix ARRs - everything else was excluded at the top */
- setCx86(CX86_CCR3, ccr3);
-
- /* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
-
- /* Restore value of CR4 */
- if ( cpu_has_pge )
- write_cr4(cr4);
-}
-
-static void cyrix_set_arr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-{
- unsigned char arr, arr_type, arr_size;
-
- arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
-
- /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
- if (reg >= 7)
- size >>= 6;
-
- size &= 0x7fff; /* make sure arr_size <= 14 */
- for (arr_size = 0; size; arr_size++, size >>= 1) ;
-
- if (reg < 7) {
- switch (type) {
- case MTRR_TYPE_UNCACHABLE:
- arr_type = 1;
- break;
- case MTRR_TYPE_WRCOMB:
- arr_type = 9;
- break;
- case MTRR_TYPE_WRTHROUGH:
- arr_type = 24;
- break;
- default:
- arr_type = 8;
- break;
- }
- } else {
- switch (type) {
- case MTRR_TYPE_UNCACHABLE:
- arr_type = 0;
- break;
- case MTRR_TYPE_WRCOMB:
- arr_type = 8;
- break;
- case MTRR_TYPE_WRTHROUGH:
- arr_type = 25;
- break;
- default:
- arr_type = 9;
- break;
- }
- }
-
- prepare_set();
-
- base <<= PAGE_SHIFT;
- setCx86(arr, ((unsigned char *) &base)[3]);
- setCx86(arr + 1, ((unsigned char *) &base)[2]);
- setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
- setCx86(CX86_RCR_BASE + reg, arr_type);
-
- post_set();
-}
-
-typedef struct {
- unsigned long base;
- unsigned int size;
- mtrr_type type;
-} arr_state_t;
-
-arr_state_t arr_state[8] __initdata = {
- {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL},
- {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}
-};
-
-unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 };
-
-static void cyrix_set_all(void)
-{
- int i;
-
- prepare_set();
-
- /* the CCRs are not contiguous */
- for (i = 0; i < 4; i++)
- setCx86(CX86_CCR0 + i, ccr_state[i]);
- for (; i < 7; i++)
- setCx86(CX86_CCR4 + i, ccr_state[i]);
- for (i = 0; i < 8; i++)
- cyrix_set_arr(i, arr_state[i].base,
- arr_state[i].size, arr_state[i].type);
-
- post_set();
-}
-
-/*
- * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
- * with the SMM (System Management Mode) mode. So we need the following:
- * Check whether SMI_LOCK (CCR3 bit 0) is set
- * if it is set, write a warning message: ARR3 cannot be changed!
- * (it cannot be changed until the next processor reset)
- * if it is reset, then we can change it, set all the needed bits:
- * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
- * - disable access to SMM memory (CCR1 bit 2 reset)
- * - disable SMM mode (CCR1 bit 1 reset)
- * - disable write protection of ARR3 (CCR6 bit 1 reset)
- * - (maybe) disable ARR3
- * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
- */
-static void __init
-cyrix_arr_init(void)
-{
- struct set_mtrr_context ctxt;
- unsigned char ccr[7];
- int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
-#ifdef CONFIG_SMP
- int i;
-#endif
-
- /* flush cache and enable MAPEN */
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- /* Save all CCRs locally */
- ccr[0] = getCx86(CX86_CCR0);
- ccr[1] = getCx86(CX86_CCR1);
- ccr[2] = getCx86(CX86_CCR2);
- ccr[3] = ctxt.ccr3;
- ccr[4] = getCx86(CX86_CCR4);
- ccr[5] = getCx86(CX86_CCR5);
- ccr[6] = getCx86(CX86_CCR6);
-
- if (ccr[3] & 1) {
- ccrc[3] = 1;
- arr3_protected = 1;
- } else {
- /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
- * access to SMM memory through ARR3 (bit 7).
- */
- if (ccr[1] & 0x80) {
- ccr[1] &= 0x7f;
- ccrc[1] |= 0x80;
- }
- if (ccr[1] & 0x04) {
- ccr[1] &= 0xfb;
- ccrc[1] |= 0x04;
- }
- if (ccr[1] & 0x02) {
- ccr[1] &= 0xfd;
- ccrc[1] |= 0x02;
- }
- arr3_protected = 0;
- if (ccr[6] & 0x02) {
- ccr[6] &= 0xfd;
- ccrc[6] = 1; /* Disable write protection of ARR3 */
- setCx86(CX86_CCR6, ccr[6]);
- }
- /* Disable ARR3. This is safe now that we disabled SMM. */
- /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
- }
- /* If we changed CCR1 in memory, change it in the processor, too. */
- if (ccrc[1])
- setCx86(CX86_CCR1, ccr[1]);
-
- /* Enable ARR usage by the processor */
- if (!(ccr[5] & 0x20)) {
- ccr[5] |= 0x20;
- ccrc[5] = 1;
- setCx86(CX86_CCR5, ccr[5]);
- }
-#ifdef CONFIG_SMP
- for (i = 0; i < 7; i++)
- ccr_state[i] = ccr[i];
- for (i = 0; i < 8; i++)
- cyrix_get_arr(i,
- &arr_state[i].base, &arr_state[i].size,
- &arr_state[i].type);
-#endif
-
- set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
-
- if (ccrc[5])
- printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled
manually\n");
- if (ccrc[3])
- printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
-/*
- if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3
disabled\n");
- if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
- if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
-*/
- if (ccrc[6])
- printk(KERN_INFO "mtrr: ARR3 was write protected,
unprotected\n");
-}
-
-static struct mtrr_ops cyrix_mtrr_ops = {
- .vendor = X86_VENDOR_CYRIX,
- .init = cyrix_arr_init,
- .set_all = cyrix_set_all,
- .set = cyrix_set_arr,
- .get = cyrix_get_arr,
- .get_free_region = cyrix_get_free_region,
- .validate_add_page = generic_validate_add_page,
- .have_wrcomb = positive_have_wrcomb,
-};
-
-int __init cyrix_init_mtrr(void)
-{
- set_mtrr_ops(&cyrix_mtrr_ops);
- return 0;
-}
-
-//arch_initcall(cyrix_init_mtrr);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/mtrr/generic.c
--- a/xen/arch/x86/mtrr/generic.c Mon Feb 13 16:41:23 2006
+++ /dev/null Tue Feb 14 15:23:43 2006
@@ -1,407 +0,0 @@
-/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
- because MTRRs can span upto 40 bits (36bits on most modern x86) */
-#include <xen/lib.h>
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <asm/flushtlb.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/cpufeature.h>
-#include "mtrr.h"
-
-struct mtrr_state {
- struct mtrr_var_range *var_ranges;
- mtrr_type fixed_ranges[NUM_FIXED_RANGES];
- unsigned char enabled;
- mtrr_type def_type;
-};
-
-static unsigned long smp_changes_mask;
-struct mtrr_state mtrr_state = {};
-
-
-/* Get the MSR pair relating to a var range */
-static void __init
-get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
-{
- rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
- rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
-}
-
-static void __init
-get_fixed_ranges(mtrr_type * frs)
-{
- unsigned int *p = (unsigned int *) frs;
- int i;
-
- rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
-
- for (i = 0; i < 2; i++)
- rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
- for (i = 0; i < 8; i++)
- rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
-}
-
-/* Grab all of the MTRR state for this CPU into *state */
-void __init get_mtrr_state(void)
-{
- unsigned int i;
- struct mtrr_var_range *vrs;
- unsigned lo, dummy;
-
- if (!mtrr_state.var_ranges) {
- mtrr_state.var_ranges = xmalloc_array(struct mtrr_var_range,
- num_var_ranges);
- if (!mtrr_state.var_ranges)
- return;
- }
- vrs = mtrr_state.var_ranges;
-
- for (i = 0; i < num_var_ranges; i++)
- get_mtrr_var_range(i, &vrs[i]);
- get_fixed_ranges(mtrr_state.fixed_ranges);
-
- rdmsr(MTRRdefType_MSR, lo, dummy);
- mtrr_state.def_type = (lo & 0xff);
- mtrr_state.enabled = (lo & 0xc00) >> 10;
-}
-
-/* Free resources associated with a struct mtrr_state */
-void __init finalize_mtrr_state(void)
-{
- xfree(mtrr_state.var_ranges);
- mtrr_state.var_ranges = NULL;
-}
-
-/* Some BIOS's are fucked and don't set all MTRRs the same! */
-void __init mtrr_state_warn(void)
-{
- unsigned long mask = smp_changes_mask;
-
- if (!mask)
- return;
- if (mask & MTRR_CHANGE_MASK_FIXED)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed
MTRR settings\n");
- if (mask & MTRR_CHANGE_MASK_VARIABLE)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable
MTRR settings\n");
- if (mask & MTRR_CHANGE_MASK_DEFTYPE)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent
MTRRdefType settings\n");
- printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
- printk(KERN_INFO "mtrr: corrected configuration.\n");
-}
-
-
-int generic_get_free_region(unsigned long base, unsigned long size)
-/* [SUMMARY] Get a free MTRR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
-{
- int i, max;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned lsize;
-
- max = num_var_ranges;
- for (i = 0; i < max; ++i) {
- mtrr_if->get(i, &lbase, &lsize, <ype);
- if (lsize == 0)
- return i;
- }
- return -ENOSPC;
-}
-
-void generic_get_mtrr(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type)
-{
- unsigned int mask_lo, mask_hi, base_lo, base_hi;
-
- rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
- if ((mask_lo & 0x800) == 0) {
- /* Invalid (i.e. free) range */
- *base = 0;
- *size = 0;
- *type = 0;
- return;
- }
-
- rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
-
- /* Work out the shifted address mask. */
- mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
- | mask_lo >> PAGE_SHIFT;
-
- /* This works correctly if size is a power of two, i.e. a
- contiguous range. */
- *size = -mask_lo;
- *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
- *type = base_lo & 0xff;
-}
-
-static int set_fixed_ranges(mtrr_type * frs)
-{
- unsigned int *p = (unsigned int *) frs;
- int changed = FALSE;
- int i;
- unsigned int lo, hi;
-
- rdmsr(MTRRfix64K_00000_MSR, lo, hi);
- if (p[0] != lo || p[1] != hi) {
- wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
- changed = TRUE;
- }
-
- for (i = 0; i < 2; i++) {
- rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
- if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
- wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
- p[3 + i * 2]);
- changed = TRUE;
- }
- }
-
- for (i = 0; i < 8; i++) {
- rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
- if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
- wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
- p[7 + i * 2]);
- changed = TRUE;
- }
- }
- return changed;
-}
-
-/* Set the MSR pair relating to a var range. Returns TRUE if
- changes are made */
-static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
-{
- unsigned int lo, hi;
- int changed = FALSE;
-
- rdmsr(MTRRphysBase_MSR(index), lo, hi);
- if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
- || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) {
- wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
- changed = TRUE;
- }
-
- rdmsr(MTRRphysMask_MSR(index), lo, hi);
-
- if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
- || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) {
- wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
- changed = TRUE;
- }
- return changed;
-}
-
-static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi)
-/* [SUMMARY] Set the MTRR state for this CPU.
- <state> The MTRR state information to read.
- <ctxt> Some relevant CPU context.
- [NOTE] The CPU must already be in a safe state for MTRR changes.
- [RETURNS] 0 if no changes made, else a mask indication what was changed.
-*/
-{
- unsigned int i;
- unsigned long change_mask = 0;
-
- for (i = 0; i < num_var_ranges; i++)
- if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
- change_mask |= MTRR_CHANGE_MASK_VARIABLE;
-
- if (set_fixed_ranges(mtrr_state.fixed_ranges))
- change_mask |= MTRR_CHANGE_MASK_FIXED;
-
- /* Set_mtrr_restore restores the old value of MTRRdefType,
- so to set it we fiddle with the saved value */
- if ((deftype_lo & 0xff) != mtrr_state.def_type
- || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
- deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10);
- change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
- }
-
- return change_mask;
-}
-
-
-static unsigned long cr4 = 0;
-static u32 deftype_lo, deftype_hi;
-static spinlock_t set_atomicity_lock = SPIN_LOCK_UNLOCKED;
-
-/*
- * Since we are disabling the cache don't allow any interrupts - they
- * would run extremely slow and would only increase the pain. The caller must
- * ensure that local interrupts are disabled and are reenabled after post_set()
- * has been called.
- */
-
-static void prepare_set(void)
-{
- unsigned long cr0;
-
- /* Note that this is not ideal, since the cache is only
flushed/disabled
- for this CPU while the MTRRs are changed, but changing this requires
- more invasive changes to the way the kernel boots */
-
- spin_lock(&set_atomicity_lock);
-
- /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
- cr0 = read_cr0() | 0x40000000; /* set CD flag */
- write_cr0(cr0);
- wbinvd();
-
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
- cr4 = read_cr4();
- write_cr4(cr4 & ~X86_CR4_PGE);
- }
-
- /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
- local_flush_tlb();
-
- /* Save MTRR state */
- rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
-
- /* Disable MTRRs, and set the default type to uncached */
- wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
-}
-
-static void post_set(void)
-{
- /* Flush TLBs (no need to flush caches - they are disabled) */
- local_flush_tlb();
-
- /* Intel (P6) standard MTRRs */
- wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
-
- /* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
-
- /* Restore value of CR4 */
- if ( cpu_has_pge )
- write_cr4(cr4);
- spin_unlock(&set_atomicity_lock);
-}
-
-static void generic_set_all(void)
-{
- unsigned long mask, count;
- unsigned long flags;
-
- local_irq_save(flags);
- prepare_set();
-
- /* Actually set the state */
- mask = set_mtrr_state(deftype_lo,deftype_hi);
-
- post_set();
- local_irq_restore(flags);
-
- /* Use the atomic bitops to update the global mask */
- for (count = 0; count < sizeof mask * 8; ++count) {
- if (mask & 0x01)
- set_bit(count, &smp_changes_mask);
- mask >>= 1;
- }
-
-}
-
-static void generic_set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-/* [SUMMARY] Set variable MTRR register on the local CPU.
- <reg> The register to set.
- <base> The base address of the region.
- <size> The size of the region. If this is 0 the region is disabled.
- <type> The type of the region.
- <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
- be done externally.
- [RETURNS] Nothing.
-*/
-{
- unsigned long flags;
-
- local_irq_save(flags);
- prepare_set();
-
- if (size == 0) {
- /* The invalid bit is kept in the mask, so we simply clear the
- relevant mask register to disable a range. */
- wrmsr(MTRRphysMask_MSR(reg), 0, 0);
- } else {
- wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type,
- (base & size_and_mask) >> (32 - PAGE_SHIFT));
- wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800,
- (-size & size_and_mask) >> (32 - PAGE_SHIFT));
- }
-
- post_set();
- local_irq_restore(flags);
-}
-
-int generic_validate_add_page(unsigned long base, unsigned long size, unsigned
int type)
-{
- unsigned long lbase, last;
-
- /* For Intel PPro stepping <= 7, must be 4 MiB aligned
- and not touch 0x70000000->0x7003FFFF */
- if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model == 1 &&
- boot_cpu_data.x86_mask <= 7) {
- if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
- printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB
aligned\n", base);
- return -EINVAL;
- }
- if (!(base + size < 0x70000000 || base > 0x7003FFFF) &&
- (type == MTRR_TYPE_WRCOMB
- || type == MTRR_TYPE_WRBACK)) {
- printk(KERN_WARNING "mtrr: writable mtrr between
0x70000000 and 0x7003FFFF may hang the CPU.\n");
- return -EINVAL;
- }
- }
-
- if (base + size < 0x100) {
- printk(KERN_WARNING "mtrr: cannot set region below 1 MiB
(0x%lx000,0x%lx000)\n",
- base, size);
- return -EINVAL;
- }
- /* Check upper bits of base and last are equal and lower bits are 0
- for base and 1 for last */
- last = base + size - 1;
- for (lbase = base; !(lbase & 1) && (last & 1);
- lbase = lbase >> 1, last = last >> 1) ;
- if (lbase != last) {
- printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a
size(0x%lx000) boundary\n",
- base, size);
- return -EINVAL;
- }
- return 0;
-}
-
-
-int generic_have_wrcomb(void)
-{
- unsigned long config, dummy;
- rdmsr(MTRRcap_MSR, config, dummy);
- return (config & (1 << 10));
-}
-
-int positive_have_wrcomb(void)
-{
- return 1;
-}
-
-/* generic structure...
- */
-struct mtrr_ops generic_mtrr_ops = {
- .use_intel_if = 1,
- .set_all = generic_set_all,
- .get = generic_get_mtrr,
- .get_free_region = generic_get_free_region,
- .set = generic_set_mtrr,
- .validate_add_page = generic_validate_add_page,
- .have_wrcomb = generic_have_wrcomb,
-};
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/mtrr/main.c
--- a/xen/arch/x86/mtrr/main.c Mon Feb 13 16:41:23 2006
+++ /dev/null Tue Feb 14 15:23:43 2006
@@ -1,647 +0,0 @@
-/* Generic MTRR (Memory Type Range Register) driver.
-
- Copyright (C) 1997-2000 Richard Gooch
- Copyright (c) 2002 Patrick Mochel
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Richard Gooch may be reached by email at rgooch@xxxxxxxxxxxxx
- The postal address is:
- Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
-
- Source: "Pentium Pro Family Developer's Manual, Volume 3:
- Operating System Writer's Guide" (Intel document number 242692),
- section 11.11.7
-
- This was cleaned and made readable by Patrick Mochel <mochel@xxxxxxxx>
- on 6-7 March 2002.
- Source: Intel Architecture Software Developers Manual, Volume 3:
- System Programming Guide; Section 9.11. (1997 edition - PPro).
-*/
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/smp.h>
-#include <xen/spinlock.h>
-#include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include "mtrr.h"
-
-#define MTRR_VERSION "2.0 (20020519)"
-
-/* No blocking mutexes in Xen. Spin instead. */
-#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED
-#define down(_m) spin_lock(_m)
-#define up(_m) spin_unlock(_m)
-
-u32 num_var_ranges = 0;
-
-unsigned int *usage_table;
-static DECLARE_MUTEX(main_lock);
-
-u32 size_or_mask, size_and_mask;
-
-static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
-
-struct mtrr_ops * mtrr_if = NULL;
-
-__initdata char *mtrr_if_name[] = {
- "none", "Intel", "AMD K6", "Cyrix ARR", "Centaur MCR"
-};
-
-static void set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type);
-
-extern int arr3_protected;
-
-static char *mtrr_strings[MTRR_NUM_TYPES] =
-{
- "uncachable", /* 0 */
- "write-combining", /* 1 */
- "?", /* 2 */
- "?", /* 3 */
- "write-through", /* 4 */
- "write-protect", /* 5 */
- "write-back", /* 6 */
-};
-
-char *mtrr_attrib_to_str(int x)
-{
- return (x <= 6) ? mtrr_strings[x] : "?";
-}
-
-void set_mtrr_ops(struct mtrr_ops * ops)
-{
- if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
- mtrr_ops[ops->vendor] = ops;
-}
-
-/* Returns non-zero if we have the write-combining memory type */
-static int have_wrcomb(void)
-{
- return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
-}
-
-/* This function returns the number of variable MTRRs */
-void __init set_num_var_ranges(void)
-{
- unsigned long config = 0, dummy;
-
- if (use_intel()) {
- rdmsr(MTRRcap_MSR, config, dummy);
- } else if (is_cpu(AMD))
- config = 2;
- else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
- config = 8;
- num_var_ranges = config & 0xff;
-}
-
-static void __init init_table(void)
-{
- int i, max;
-
- max = num_var_ranges;
- if ((usage_table = xmalloc_array(unsigned int, max)) == NULL) {
- printk(KERN_ERR "mtrr: could not allocate\n");
- return;
- }
- for (i = 0; i < max; i++)
- usage_table[i] = 1;
-}
-
-struct set_mtrr_data {
- atomic_t count;
- atomic_t gate;
- unsigned long smp_base;
- unsigned long smp_size;
- unsigned int smp_reg;
- mtrr_type smp_type;
-};
-
-#ifdef CONFIG_SMP
-
-static void ipi_handler(void *info)
-/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
- [RETURNS] Nothing.
-*/
-{
- struct set_mtrr_data *data = info;
- unsigned long flags;
-
- local_irq_save(flags);
-
- atomic_dec(&data->count);
- while(!atomic_read(&data->gate))
- cpu_relax();
-
- /* The master has cleared me to execute */
- if (data->smp_reg != ~0U)
- mtrr_if->set(data->smp_reg, data->smp_base,
- data->smp_size, data->smp_type);
- else
- mtrr_if->set_all();
-
- atomic_dec(&data->count);
- while(atomic_read(&data->gate))
- cpu_relax();
-
- atomic_dec(&data->count);
- local_irq_restore(flags);
-}
-
-#endif
-
-/**
- * set_mtrr - update mtrrs on all processors
- * @reg: mtrr in question
- * @base: mtrr base
- * @size: mtrr size
- * @type: mtrr type
- *
- * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
- *
- * 1. Send IPI to do the following:
- * 2. Disable Interrupts
- * 3. Wait for all procs to do so
- * 4. Enter no-fill cache mode
- * 5. Flush caches
- * 6. Clear PGE bit
- * 7. Flush all TLBs
- * 8. Disable all range registers
- * 9. Update the MTRRs
- * 10. Enable all range registers
- * 11. Flush all TLBs and caches again
- * 12. Enter normal cache mode and reenable caching
- * 13. Set PGE
- * 14. Wait for buddies to catch up
- * 15. Enable interrupts.
- *
- * What does that mean for us? Well, first we set data.count to the number
- * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
- * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
- * Meanwhile, they are waiting for that flag to be set. Once it's set, each
- * CPU goes through the transition of updating MTRRs. The CPU vendors may each
do it
- * differently, so we call mtrr_if->set() callback and let them take care of
it.
- * When they're done, they again decrement data->count and wait for data.gate
to
- * be reset.
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate
flag.
- * Everyone then enables interrupts and we all continue on.
- *
- * Note that the mechanism is the same for UP systems, too; all the SMP stuff
- * becomes nops.
- */
-static void set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-{
- struct set_mtrr_data data;
- unsigned long flags;
-
- data.smp_reg = reg;
- data.smp_base = base;
- data.smp_size = size;
- data.smp_type = type;
- atomic_set(&data.count, num_booting_cpus() - 1);
- atomic_set(&data.gate,0);
-
- /* Start the ball rolling on other CPUs */
- if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
- panic("mtrr: timed out waiting for other CPUs\n");
-
- local_irq_save(flags);
-
- while(atomic_read(&data.count))
- cpu_relax();
-
- /* ok, reset count and toggle gate */
- atomic_set(&data.count, num_booting_cpus() - 1);
- atomic_set(&data.gate,1);
-
- /* do our MTRR business */
-
- /* HACK!
- * We use this same function to initialize the mtrrs on boot.
- * The state of the boot cpu's mtrrs has been saved, and we want
- * to replicate across all the APs.
- * If we're doing that @reg is set to something special...
- */
- if (reg != ~0U)
- mtrr_if->set(reg,base,size,type);
-
- /* wait for the others */
- while(atomic_read(&data.count))
- cpu_relax();
-
- atomic_set(&data.count, num_booting_cpus() - 1);
- atomic_set(&data.gate,0);
-
- /*
- * Wait here for everyone to have seen the gate change
- * So we're the last ones to touch 'data'
- */
- while(atomic_read(&data.count))
- cpu_relax();
-
- local_irq_restore(flags);
-}
-
-/**
- * mtrr_add_page - Add a memory type region
- * @base: Physical base address of region in pages (4 KB)
- * @size: Physical size of region in pages (4 KB)
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
- *
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
- *
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
- *
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
- *
- * The available types are
- *
- * %MTRR_TYPE_UNCACHABLE - No caching
- *
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
- *
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
- *
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
- *
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
- */
-
-int mtrr_add_page(unsigned long base, unsigned long size,
- unsigned int type, char increment)
-{
- int i;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
- int error;
-
- if (!mtrr_if)
- return -ENXIO;
-
- if ((error = mtrr_if->validate_add_page(base,size,type)))
- return error;
-
- if (type >= MTRR_NUM_TYPES) {
- printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
- return -EINVAL;
- }
-
- /* If the type is WC, check that this processor supports it */
- if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- printk(KERN_WARNING
- "mtrr: your processor doesn't support
write-combining\n");
- return -ENOSYS;
- }
-
- if (base & size_or_mask || size & size_or_mask) {
- printk(KERN_WARNING "mtrr: base or size exceeds the MTRR
width\n");
- return -EINVAL;
- }
-
- error = -EINVAL;
-
- /* Search for existing MTRR */
- down(&main_lock);
- for (i = 0; i < num_var_ranges; ++i) {
- mtrr_if->get(i, &lbase, &lsize, <ype);
- if (base >= lbase + lsize)
- continue;
- if ((base < lbase) && (base + size <= lbase))
- continue;
- /* At this point we know there is some kind of
overlap/enclosure */
- if ((base < lbase) || (base + size > lbase + lsize)) {
- printk(KERN_WARNING
- "mtrr: 0x%lx000,0x%lx000 overlaps existing"
- " 0x%lx000,0x%x000\n", base, size, lbase,
- lsize);
- goto out;
- }
- /* New region is enclosed by an existing region */
- if (ltype != type) {
- if (type == MTRR_TYPE_UNCACHABLE)
- continue;
- printk (KERN_WARNING "mtrr: type mismatch for
%lx000,%lx000 old: %s new: %s\n",
- base, size, mtrr_attrib_to_str(ltype),
- mtrr_attrib_to_str(type));
- goto out;
- }
- if (increment)
- ++usage_table[i];
- error = i;
- goto out;
- }
- /* Search for an empty MTRR */
- i = mtrr_if->get_free_region(base, size);
- if (i >= 0) {
- set_mtrr(i, base, size, type);
- usage_table[i] = 1;
- } else
- printk(KERN_INFO "mtrr: no more MTRRs available\n");
- error = i;
- out:
- up(&main_lock);
- return error;
-}
-
-/**
- * mtrr_add - Add a memory type region
- * @base: Physical base address of region
- * @size: Physical size of region
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
- *
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
- *
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
- *
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
- *
- * The available types are
- *
- * %MTRR_TYPE_UNCACHABLE - No caching
- *
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
- *
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
- *
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
- *
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
- */
-
-int
-mtrr_add(unsigned long base, unsigned long size, unsigned int type,
- char increment)
-{
- if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- printk(KERN_WARNING "mtrr: size and base must be multiples of 4
kiB\n");
- printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size,
base);
- return -EINVAL;
- }
- return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
- increment);
-}
-
-/**
- * mtrr_del_page - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
- *
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
- *
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
- */
-
-int mtrr_del_page(int reg, unsigned long base, unsigned long size)
-{
- int i, max;
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
- int error = -EINVAL;
-
- if (!mtrr_if)
- return -ENXIO;
-
- max = num_var_ranges;
- down(&main_lock);
- if (reg < 0) {
- /* Search for existing MTRR */
- for (i = 0; i < max; ++i) {
- mtrr_if->get(i, &lbase, &lsize, <ype);
- if (lbase == base && lsize == size) {
- reg = i;
- break;
- }
- }
- if (reg < 0) {
- printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000
found\n", base,
- size);
- goto out;
- }
- }
- if (reg >= max) {
- printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
- goto out;
- }
- if (is_cpu(CYRIX) && !use_intel()) {
- if ((reg == 3) && arr3_protected) {
- printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
- goto out;
- }
- }
- mtrr_if->get(reg, &lbase, &lsize, <ype);
- if (lsize < 1) {
- printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
- goto out;
- }
- if (usage_table[reg] < 1) {
- printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
- goto out;
- }
- if (--usage_table[reg] < 1)
- set_mtrr(reg, 0, 0, 0);
- error = reg;
- out:
- up(&main_lock);
- return error;
-}
-/**
- * mtrr_del - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
- *
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
- *
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
- */
-
-int
-mtrr_del(int reg, unsigned long base, unsigned long size)
-{
- if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- printk(KERN_INFO "mtrr: size and base must be multiples of 4
kiB\n");
- printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size,
base);
- return -EINVAL;
- }
- return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
-}
-
-EXPORT_SYMBOL(mtrr_add);
-EXPORT_SYMBOL(mtrr_del);
-
-/* HACK ALERT!
- * These should be called implicitly, but we can't yet until all the initcall
- * stuff is done...
- */
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
-static void __init init_ifs(void)
-{
- amd_init_mtrr();
- cyrix_init_mtrr();
- centaur_init_mtrr();
-}
-
-static void __init init_other_cpus(void)
-{
- if (use_intel())
- get_mtrr_state();
-
- /* bring up the other processors */
- set_mtrr(~0U,0,0,0);
-
- if (use_intel()) {
- finalize_mtrr_state();
- mtrr_state_warn();
- }
-}
-
-
-struct mtrr_value {
- mtrr_type ltype;
- unsigned long lbase;
- unsigned int lsize;
-};
-
-/**
- * mtrr_init - initialize mtrrs on the boot CPU
- *
- * This needs to be called early; before any of the other CPUs are
- * initialized (i.e. before smp_init()).
- *
- */
-static int __init mtrr_init(void)
-{
- init_ifs();
-
- if (cpu_has_mtrr) {
- mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
- size_and_mask = 0x00f00000;
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- /* The original Athlon docs said that
- total addressable memory is 44 bits wide.
- It was not really clear whether its MTRRs
- follow this or not. (Read: 44 or 36 bits).
- However, "x86-64_overview.pdf" explicitly
- states that "previous implementations support
- 36 bit MTRRs" and also provides a way to
- query the width (in bits) of the physical
- addressable memory on the Hammer family.
- */
- if (boot_cpu_data.x86 == 15
- && (cpuid_eax(0x80000000) >= 0x80000008)) {
- u32 phys_addr;
- phys_addr = cpuid_eax(0x80000008) & 0xff;
- size_or_mask =
- ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
- size_and_mask = ~size_or_mask & 0xfff00000;
- }
- /* Athlon MTRRs use an Intel-compatible interface for
- * getting and setting */
- break;
- case X86_VENDOR_CENTAUR:
- if (boot_cpu_data.x86 == 6) {
- /* VIA Cyrix family have Intel style MTRRs, but
don't support PAE */
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
-
- default:
- break;
- }
- } else {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (cpu_has_k6_mtrr) {
- /* Pre-Athlon (K6) AMD CPU MTRRs */
- mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
- case X86_VENDOR_CENTAUR:
- if (cpu_has_centaur_mcr) {
- mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
- case X86_VENDOR_CYRIX:
- if (cpu_has_cyrix_arr) {
- mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
- size_and_mask = 0;
- }
- break;
- default:
- break;
- }
- }
- printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION);
-
- if (mtrr_if) {
- set_num_var_ranges();
- init_table();
- init_other_cpus();
- return 0;
- }
- return -ENXIO;
-}
-
-__initcall(mtrr_init);
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/mtrr/mtrr.h
--- a/xen/arch/x86/mtrr/mtrr.h Mon Feb 13 16:41:23 2006
+++ /dev/null Tue Feb 14 15:23:43 2006
@@ -1,99 +0,0 @@
-/*
- * local mtrr defines.
- */
-
-#ifndef TRUE
-#define TRUE 1
-#define FALSE 0
-#endif
-
-#define MTRRcap_MSR 0x0fe
-#define MTRRdefType_MSR 0x2ff
-
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-#define NUM_FIXED_RANGES 88
-#define MTRRfix64K_00000_MSR 0x250
-#define MTRRfix16K_80000_MSR 0x258
-#define MTRRfix16K_A0000_MSR 0x259
-#define MTRRfix4K_C0000_MSR 0x268
-#define MTRRfix4K_C8000_MSR 0x269
-#define MTRRfix4K_D0000_MSR 0x26a
-#define MTRRfix4K_D8000_MSR 0x26b
-#define MTRRfix4K_E0000_MSR 0x26c
-#define MTRRfix4K_E8000_MSR 0x26d
-#define MTRRfix4K_F0000_MSR 0x26e
-#define MTRRfix4K_F8000_MSR 0x26f
-
-#define MTRR_CHANGE_MASK_FIXED 0x01
-#define MTRR_CHANGE_MASK_VARIABLE 0x02
-#define MTRR_CHANGE_MASK_DEFTYPE 0x04
-
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
- an 8 bit field: */
-typedef u8 mtrr_type;
-
-struct mtrr_ops {
- u32 vendor;
- u32 use_intel_if;
- void (*init)(void);
- void (*set)(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type);
- void (*set_all)(void);
-
- void (*get)(unsigned int reg, unsigned long *base,
- unsigned int *size, mtrr_type * type);
- int (*get_free_region) (unsigned long base, unsigned long size);
-
- int (*validate_add_page)(unsigned long base, unsigned long size,
- unsigned int type);
- int (*have_wrcomb)(void);
-};
-
-extern int generic_get_free_region(unsigned long base, unsigned long size);
-extern int generic_validate_add_page(unsigned long base, unsigned long size,
- unsigned int type);
-
-extern struct mtrr_ops generic_mtrr_ops;
-
-extern int generic_have_wrcomb(void);
-extern int positive_have_wrcomb(void);
-
-/* library functions for processor-specific routines */
-struct set_mtrr_context {
- unsigned long flags;
- unsigned long deftype_lo;
- unsigned long deftype_hi;
- unsigned long cr4val;
- unsigned long ccr3;
-};
-
-struct mtrr_var_range {
- unsigned long base_lo;
- unsigned long base_hi;
- unsigned long mask_lo;
- unsigned long mask_hi;
-};
-
-void set_mtrr_done(struct set_mtrr_context *ctxt);
-void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
-void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
-
-void get_mtrr_state(void);
-
-extern void set_mtrr_ops(struct mtrr_ops * ops);
-
-extern u32 size_or_mask, size_and_mask;
-extern struct mtrr_ops * mtrr_if;
-
-#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
-#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
-
-extern unsigned int num_var_ranges;
-
-void finalize_mtrr_state(void);
-void mtrr_state_warn(void);
-char *mtrr_attrib_to_str(int x);
-
-extern char * mtrr_if_name[];
diff -r fcc833cbaf82 -r b9b411b50587 xen/arch/x86/mtrr/state.c
--- a/xen/arch/x86/mtrr/state.c Mon Feb 13 16:41:23 2006
+++ /dev/null Tue Feb 14 15:23:43 2006
@@ -1,78 +0,0 @@
-#include <xen/mm.h>
-#include <xen/init.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include "mtrr.h"
-
-
-/* Put the processor into a state where MTRRs can be safely set */
-void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
-{
- unsigned int cr0;
-
- /* Disable interrupts locally */
- local_irq_save(ctxt->flags);
-
- if (use_intel() || is_cpu(CYRIX)) {
-
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
- ctxt->cr4val = read_cr4();
- write_cr4(ctxt->cr4val & (unsigned char) ~(1 << 7));
- }
-
- /* Disable and flush caches. Note that wbinvd flushes the TLBs
as
- a side-effect */
- cr0 = read_cr0() | 0x40000000;
- wbinvd();
- write_cr0(cr0);
- wbinvd();
-
- if (use_intel())
- /* Save MTRR state */
- rdmsr(MTRRdefType_MSR, ctxt->deftype_lo,
ctxt->deftype_hi);
- else
- /* Cyrix ARRs - everything else were excluded at the
top */
- ctxt->ccr3 = getCx86(CX86_CCR3);
- }
-}
-
-void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
-{
- if (use_intel())
- /* Disable MTRRs, and set the default type to uncached */
- wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
- ctxt->deftype_hi);
- else if (is_cpu(CYRIX))
- /* Cyrix ARRs - everything else were excluded at the top */
- setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
-}
-
-/* Restore the processor after a set_mtrr_prepare */
-void set_mtrr_done(struct set_mtrr_context *ctxt)
-{
- if (use_intel() || is_cpu(CYRIX)) {
-
- /* Flush caches and TLBs */
- wbinvd();
-
- /* Restore MTRRdefType */
- if (use_intel())
- /* Intel (P6) standard MTRRs */
- wrmsr(MTRRdefType_MSR, ctxt->deftype_lo,
ctxt->deftype_hi);
- else
- /* Cyrix ARRs - everything else was excluded at the top
*/
- setCx86(CX86_CCR3, ctxt->ccr3);
-
- /* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
-
- /* Restore value of CR4 */
- if ( cpu_has_pge )
- write_cr4(ctxt->cr4val);
- }
- /* Re-enable interrupts locally (if enabled previously) */
- local_irq_restore(ctxt->flags);
-}
-
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|