# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1193059162 -3600
# Node ID 3e7c86602c70d2d587aafbef957d644f48cd6da9
# Parent 42d8dadb5864eac0140262b9475a7b1ed150b607
hvm/x86: MTRR/PAT virtualisation.
Signed-off-by: Disheng Su <disheng.su@xxxxxxxxx>
---
xen/arch/x86/cpu/common.c | 9
xen/arch/x86/cpu/mtrr/generic.c | 16
xen/arch/x86/cpu/mtrr/main.c | 7
xen/arch/x86/cpu/mtrr/mtrr.h | 11
xen/arch/x86/hvm/Makefile | 1
xen/arch/x86/hvm/hvm.c | 88 ++++
xen/arch/x86/hvm/mtrr.c | 687 ++++++++++++++++++++++++++++++++++++++
xen/arch/x86/hvm/vmx/vmcs.c | 17
xen/arch/x86/hvm/vmx/vmx.c | 70 +++
xen/arch/x86/mm.c | 9
xen/arch/x86/mm/shadow/common.c | 8
xen/arch/x86/mm/shadow/multi.c | 28 +
xen/include/asm-x86/cpufeature.h | 2
xen/include/asm-x86/hvm/domain.h | 6
xen/include/asm-x86/hvm/support.h | 1
xen/include/asm-x86/hvm/vcpu.h | 7
xen/include/asm-x86/msr-index.h | 19 +
xen/include/asm-x86/mtrr.h | 49 ++
18 files changed, 999 insertions(+), 36 deletions(-)
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/common.c Mon Oct 22 14:19:22 2007 +0100
@@ -22,6 +22,12 @@ static int disable_x86_serial_nr __devin
static int disable_x86_serial_nr __devinitdata = 0;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+/*
+ * Default host IA32_CR_PAT value to cover all memory types.
+ * BIOS usually sets it to 0x07040600070406.
+ */
+u64 host_pat = 0x050100070406;
static void default_init(struct cpuinfo_x86 * c)
{
@@ -557,6 +563,9 @@ void __devinit cpu_init(void)
}
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ if (cpu_has_pat)
+ wrmsrl(MSR_IA32_CR_PAT, host_pat);
+
*(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
*(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(current);
asm volatile ( "lgdt %0" : "=m" (gdt_load) );
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/mtrr/generic.c
--- a/xen/arch/x86/cpu/mtrr/generic.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/mtrr/generic.c Mon Oct 22 14:19:22 2007 +0100
@@ -11,14 +11,6 @@
#include <asm/cpufeature.h>
#include "mtrr.h"
-struct mtrr_state {
- struct mtrr_var_range *var_ranges;
- mtrr_type fixed_ranges[NUM_FIXED_RANGES];
- unsigned char enabled;
- unsigned char have_fixed;
- mtrr_type def_type;
-};
-
struct fixed_range_block {
int base_msr; /* start address of an MTRR block */
int ranges; /* number of MTRRs in this block */
@@ -32,7 +24,7 @@ static struct fixed_range_block fixed_ra
};
static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
+struct mtrr_state mtrr_state = {};
/* Get the MSR pair relating to a var range */
static void
@@ -88,6 +80,9 @@ void __init get_mtrr_state(void)
rdmsr(MTRRdefType_MSR, lo, dummy);
mtrr_state.def_type = (lo & 0xff);
mtrr_state.enabled = (lo & 0xc00) >> 10;
+
+ /* Store mtrr_cap for HVM MTRR virtualisation. */
+ rdmsrl(MTRRcap_MSR, mtrr_state.mtrr_cap);
}
/* Some BIOS's are fucked and don't set all MTRRs the same! */
@@ -107,6 +102,7 @@ void __init mtrr_state_warn(void)
printk(KERN_INFO "mtrr: corrected configuration.\n");
}
+extern bool_t is_var_mtrr_overlapped(struct mtrr_state *m);
/* Doesn't attempt to pass an error out to MTRR users
because it's quite complicated in some cases and probably not
worth it because the best error handling is to ignore it. */
@@ -116,6 +112,8 @@ void mtrr_wrmsr(unsigned msr, unsigned a
printk(KERN_ERR
"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
smp_processor_id(), msr, a, b);
+ /* Cache overlap status for efficient HVM MTRR virtualisation. */
+ mtrr_state.overlapped = is_var_mtrr_overlapped(&mtrr_state);
}
/**
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/mtrr/main.c
--- a/xen/arch/x86/cpu/mtrr/main.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/mtrr/main.c Mon Oct 22 14:19:22 2007 +0100
@@ -588,6 +588,8 @@ struct mtrr_value {
unsigned long lsize;
};
+extern void global_init_mtrr_pat(void);
+
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -654,8 +656,11 @@ void __init mtrr_bp_init(void)
if (mtrr_if) {
set_num_var_ranges();
init_table();
- if (use_intel())
+ if (use_intel()) {
get_mtrr_state();
+ /* initialize some global data for MTRR/PAT
virutalization */
+ global_init_mtrr_pat();
+ }
}
}
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/mtrr/mtrr.h
--- a/xen/arch/x86/cpu/mtrr/mtrr.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/mtrr/mtrr.h Mon Oct 22 14:19:22 2007 +0100
@@ -13,7 +13,6 @@
#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-#define NUM_FIXED_RANGES 88
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@@ -30,9 +29,6 @@
#define MTRR_CHANGE_MASK_VARIABLE 0x02
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
- an 8 bit field: */
-typedef u8 mtrr_type;
struct mtrr_ops {
u32 vendor;
@@ -69,13 +65,6 @@ struct set_mtrr_context {
u32 ccr3;
};
-struct mtrr_var_range {
- u32 base_lo;
- u32 base_hi;
- u32 mask_lo;
- u32 mask_hi;
-};
-
void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/Makefile
--- a/xen/arch/x86/hvm/Makefile Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/Makefile Mon Oct 22 14:19:22 2007 +0100
@@ -7,6 +7,7 @@ obj-y += intercept.o
obj-y += intercept.o
obj-y += io.o
obj-y += irq.o
+obj-y += mtrr.o
obj-y += platform.o
obj-y += pmtimer.o
obj-y += rtc.o
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/hvm.c Mon Oct 22 14:19:22 2007 +0100
@@ -226,6 +226,7 @@ int hvm_domain_initialise(struct domain
spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
spin_lock_init(&d->arch.hvm_domain.irq_lock);
+ spin_lock_init(&d->arch.hvm_domain.uc_lock);
rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
if ( rc != 0 )
@@ -417,27 +418,22 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_
HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
1, HVMSR_PER_VCPU);
+extern int reset_vmsr(struct mtrr_state *m, u64 *p);
+
int hvm_vcpu_initialise(struct vcpu *v)
{
int rc;
if ( (rc = vlapic_init(v)) != 0 )
- return rc;
+ goto fail1;
if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
- {
- vlapic_destroy(v);
- return rc;
- }
+ goto fail2;
/* Create ioreq event channel. */
rc = alloc_unbound_xen_event_channel(v, 0);
if ( rc < 0 )
- {
- hvm_funcs.vcpu_destroy(v);
- vlapic_destroy(v);
- return rc;
- }
+ goto fail3;
/* Register ioreq event channel. */
v->arch.hvm_vcpu.xen_port = rc;
@@ -448,6 +444,10 @@ int hvm_vcpu_initialise(struct vcpu *v)
spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
+
+ rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr);
+ if ( rc != 0 )
+ goto fail3;
v->arch.guest_context.user_regs.eflags = 2;
@@ -468,6 +468,13 @@ int hvm_vcpu_initialise(struct vcpu *v)
}
return 0;
+
+ fail3:
+ hvm_funcs.vcpu_destroy(v);
+ fail2:
+ vlapic_destroy(v);
+ fail1:
+ return rc;
}
void hvm_vcpu_destroy(struct vcpu *v)
@@ -604,6 +611,32 @@ int hvm_set_efer(uint64_t value)
hvm_update_guest_efer(v);
return 1;
+}
+
+extern void shadow_blow_tables_per_domain(struct domain *d);
+extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);
+
+/* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */
+static bool_t domain_exit_uc_mode(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct vcpu *vs;
+
+ for_each_vcpu ( d, vs )
+ {
+ if ( (vs == v) || !vs->is_initialised )
+ continue;
+ if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ||
+ mtrr_pat_not_equal(vs, v) )
+ return 0;
+ }
+
+ return 1;
+}
+
+static void local_flush_cache(void *info)
+{
+ wbinvd();
}
int hvm_set_cr0(unsigned long value)
@@ -683,6 +716,41 @@ int hvm_set_cr0(unsigned long value)
{
put_page(pagetable_get_page(v->arch.guest_table));
v->arch.guest_table = pagetable_null();
+ }
+ }
+
+ if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) )
+ {
+ if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
+ {
+ /* Entering no fill cache mode. */
+ spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+ v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+
+ if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
+ {
+ /* Flush physical caches. */
+ on_each_cpu(local_flush_cache, NULL, 1, 1);
+ /* Shadow pagetables must recognise UC mode. */
+ v->domain->arch.hvm_domain.is_in_uc_mode = 1;
+ shadow_blow_tables_per_domain(v->domain);
+ }
+ spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+ }
+ else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
+ (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+ {
+ /* Exit from no fill cache mode. */
+ spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+ v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+
+ if ( domain_exit_uc_mode(v) )
+ {
+ /* Shadow pagetables must recognise normal caching mode. */
+ v->domain->arch.hvm_domain.is_in_uc_mode = 0;
+ shadow_blow_tables_per_domain(v->domain);
+ }
+ spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
}
}
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/mtrr.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/mtrr.c Mon Oct 22 14:19:22 2007 +0100
@@ -0,0 +1,687 @@
+/*
+ * mtrr.c: MTRR/PAT virtualization
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <public/hvm/e820.h>
+#include <xen/types.h>
+#include <asm/e820.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
+#include <xen/domain_page.h>
+#include <stdbool.h>
+#include <asm/mtrr.h>
+#include <asm/hvm/support.h>
+
+/* Xen holds the native MTRR MSRs */
+extern struct mtrr_state mtrr_state;
+
+static u64 phys_base_msr_mask;
+static u64 phys_mask_msr_mask;
+static u32 size_or_mask;
+static u32 size_and_mask;
+
+static void init_pat_entry_tbl(u64 pat);
+static void init_mtrr_epat_tbl(void);
+static unsigned char get_mtrr_type(struct mtrr_state *m, paddr_t pa);
+/* get page attribute fields (PAn) from PAT MSR */
+#define pat_cr_2_paf(pat_cr,n) ((((u64)pat_cr) >> ((n)<<3)) & 0xff)
+/* pat entry to PTE flags (PAT, PCD, PWT bits) */
+static unsigned char pat_entry_2_pte_flags[8] = {
+ 0, _PAGE_PWT,
+ _PAGE_PCD, _PAGE_PCD | _PAGE_PWT,
+ _PAGE_PAT, _PAGE_PAT | _PAGE_PWT,
+ _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
+
+/* effective mm type lookup table, according to MTRR and PAT */
+static u8 mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
+/********PAT(UC,WC,RS,RS,WT,WP,WB,UC-)*/
+/* RS means reserved type(2,3), and type is hardcoded here */
+ /*MTRR(UC):(UC,WC,RS,RS,UC,UC,UC,UC)*/
+ {0, 1, 2, 2, 0, 0, 0, 0},
+ /*MTRR(WC):(UC,WC,RS,RS,UC,UC,WC,WC)*/
+ {0, 1, 2, 2, 0, 0, 1, 1},
+ /*MTRR(RS):(RS,RS,RS,RS,RS,RS,RS,RS)*/
+ {2, 2, 2, 2, 2, 2, 2, 2},
+ /*MTRR(RS):(RS,RS,RS,RS,RS,RS,RS,RS)*/
+ {2, 2, 2, 2, 2, 2, 2, 2},
+ /*MTRR(WT):(UC,WC,RS,RS,WT,WP,WT,UC)*/
+ {0, 1, 2, 2, 4, 5, 4, 0},
+ /*MTRR(WP):(UC,WC,RS,RS,WT,WP,WP,WC)*/
+ {0, 1, 2, 2, 4, 5, 5, 1},
+ /*MTRR(WB):(UC,WC,RS,RS,WT,WP,WB,UC)*/
+ {0, 1, 2, 2, 4, 5, 6, 0}
+};
+
+/* reverse lookup table, to find a pat type according to MTRR and effective
+ * memory type. This table is dynamically generated
+ */
+static u8 mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES];
+
+/* lookup table for PAT entry of a given PAT value in host pat */
+static u8 pat_entry_tbl[PAT_TYPE_NUMS];
+
+static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr,
+ uint64_t *base, uint64_t *end)
+{
+ uint32_t mask_lo = (uint32_t)mask_msr;
+ uint32_t mask_hi = (uint32_t)(mask_msr >> 32);
+ uint32_t base_lo = (uint32_t)base_msr;
+ uint32_t base_hi = (uint32_t)(base_msr >> 32);
+ uint32_t size;
+
+ if ( (mask_lo & 0x800) == 0 )
+ {
+ /* Invalid (i.e. free) range */
+ *base = 0;
+ *end = 0;
+ return;
+ }
+
+ /* Work out the shifted address mask. */
+ mask_lo = (size_or_mask | (mask_hi << (32 - PAGE_SHIFT)) |
+ (mask_lo >> PAGE_SHIFT));
+
+ /* This works correctly if size is a power of two (a contiguous range). */
+ size = -mask_lo;
+ *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ *end = *base + size - 1;
+}
+
+bool_t is_var_mtrr_overlapped(struct mtrr_state *m)
+{
+ int seg, i;
+ uint64_t phys_base, phys_mask, phys_base_pre, phys_mask_pre;
+ uint64_t base_pre, end_pre, base, end;
+ uint8_t num_var_ranges = (u8)m->mtrr_cap;
+
+ for ( i = 0; i < num_var_ranges; i++ )
+ {
+ phys_base_pre = ((u64*)m->var_ranges)[i*2];
+ phys_mask_pre = ((u64*)m->var_ranges)[i*2 + 1];
+
+ get_mtrr_range(phys_base_pre, phys_mask_pre,
+ &base_pre, &end_pre);
+
+ for ( seg = i + 1; seg < num_var_ranges; seg ++ )
+ {
+ phys_base = ((u64*)m->var_ranges)[seg*2];
+ phys_mask = ((u64*)m->var_ranges)[seg*2 + 1];
+
+ get_mtrr_range(phys_base, phys_mask,
+ &base, &end);
+
+ if ( ((base_pre != end_pre) && (base != end))
+ || ((base >= base_pre) && (base <= end_pre))
+ || ((end >= base_pre) && (end <= end_pre))
+ || ((base_pre >= base) && (base_pre <= end))
+ || ((end_pre >= base) && (end_pre <= end)) )
+ {
+ /* MTRR is overlapped. */
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* reserved mtrr for guest OS */
+#define RESERVED_MTRR 2
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+bool mtrr_var_range_msr_set(struct mtrr_state *m, u32 msr, u64 msr_content);
+bool mtrr_def_type_msr_set(struct mtrr_state *m, u64 msr_content);
+bool mtrr_fix_range_msr_set(struct mtrr_state *m, int row, u64 msr_content);
+static void set_var_mtrr(unsigned int reg, struct mtrr_state *m,
+ unsigned int base, unsigned int size,
+ unsigned int type)
+{
+ struct mtrr_var_range *vr;
+
+ vr = &m->var_ranges[reg];
+
+ if ( size == 0 )
+ {
+ /* The invalid bit is kept in the mask, so we simply clear the
+ * relevant mask register to disable a range.
+ */
+ mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), 0);
+ }
+ else
+ {
+ vr->base_lo = base << PAGE_SHIFT | type;
+ vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
+ vr->mask_lo = -size << PAGE_SHIFT | 0x800;
+ vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+
+ mtrr_var_range_msr_set(m, MTRRphysBase_MSR(reg), *(unsigned long *)vr);
+ mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg),
+ *((unsigned long *)vr + 1));
+ }
+}
+/* From Intel Vol. III Section 10.11.4, the Range Size and Base Alignment has
+ * some kind of requirement:
+ * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
+ * 2. The base address must be 2^N aligned, where the N here is equal to
+ * the N in previous requirement. So a 8K range must be 8K aligned not 4K
aligned.
+ */
+static unsigned int range_to_mtrr(unsigned int reg, struct mtrr_state *m,
+ unsigned int range_startk, unsigned int range_sizek, unsigned char type)
+{
+ if ( !range_sizek || (reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR)) )
+ return reg;
+
+ while ( range_sizek )
+ {
+ unsigned int max_align, align, sizek;
+
+ max_align = (range_startk == 0) ? 32 : ffs(range_startk);
+ align = min_t(unsigned int, fls(range_sizek), max_align);
+ sizek = 1 << (align - 1);
+
+ set_var_mtrr(reg++, m, range_startk, sizek, type);
+
+ range_startk += sizek;
+ range_sizek -= sizek;
+
+ if ( reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR) )
+ break;
+ }
+
+ return reg;
+}
+
+static void setup_fixed_mtrrs(struct vcpu *v)
+{
+ uint64_t content;
+ int i;
+ struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
+
+ /* 1. Map (0~A0000) as WB */
+ content = 0x0606060606060606ull;
+ mtrr_fix_range_msr_set(m, 0, content);
+ mtrr_fix_range_msr_set(m, 1, content);
+ /* 2. Map VRAM(A0000~C0000) as WC */
+ content = 0x0101010101010101;
+ mtrr_fix_range_msr_set(m, 2, content);
+ /* 3. Map (C0000~100000) as UC */
+ for ( i = 3; i < 11; i++)
+ mtrr_fix_range_msr_set(m, i, 0);
+}
+
+static void setup_var_mtrrs(struct vcpu *v)
+{
+ p2m_type_t p2m;
+ unsigned long e820_mfn;
+ char *p = NULL;
+ unsigned char nr = 0;
+ int i;
+ unsigned int reg = 0;
+ unsigned long size = 0;
+ unsigned long addr = 0;
+ struct e820entry *e820_table;
+
+ e820_mfn = mfn_x(gfn_to_mfn(v->domain,
+ HVM_E820_PAGE >> PAGE_SHIFT, &p2m));
+
+ p = (char *)map_domain_page(e820_mfn);
+
+ nr = *(unsigned char*)(p + HVM_E820_NR_OFFSET);
+ e820_table = (struct e820entry*)(p + HVM_E820_OFFSET);
+ /* search E820 table, set MTRR for RAM */
+ for ( i = 0; i < nr; i++)
+ {
+ if ( (e820_table[i].addr >= 0x100000) &&
+ (e820_table[i].type == E820_RAM) )
+ {
+ if ( e820_table[i].addr == 0x100000 )
+ {
+ size = e820_table[i].size + 0x100000 + PAGE_SIZE * 3;
+ addr = 0;
+ }
+ else
+ {
+ /* Larger than 4G */
+ size = e820_table[i].size;
+ addr = e820_table[i].addr;
+ }
+
+ reg = range_to_mtrr(reg, &v->arch.hvm_vcpu.mtrr,
+ addr >> PAGE_SHIFT, size >> PAGE_SHIFT,
+ MTRR_TYPE_WRBACK);
+ }
+ }
+}
+
+void init_mtrr_in_hyper(struct vcpu *v)
+{
+ /* TODO:MTRR should be initialized in BIOS or other places.
+ * workaround to do it in here
+ */
+ if ( v->arch.hvm_vcpu.mtrr.is_initialized )
+ return;
+
+ setup_fixed_mtrrs(v);
+ setup_var_mtrrs(v);
+ /* enable mtrr */
+ mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, 0xc00);
+
+ v->arch.hvm_vcpu.mtrr.is_initialized = 1;
+}
+
+static int reset_mtrr(struct mtrr_state *m)
+{
+ m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
+ if ( m->var_ranges == NULL )
+ return -ENOMEM;
+ memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
+ memset(m->fixed_ranges, 0, sizeof(m->fixed_ranges));
+ m->enabled = 0;
+ m->def_type = 0;/*mtrr is disabled*/
+ m->mtrr_cap = (0x5<<8)|MTRR_VCNT;/*wc,fix enabled, and vcnt=8*/
+ m->overlapped = 0;
+ return 0;
+}
+
+/* init global variables for MTRR and PAT */
+void global_init_mtrr_pat(void)
+{
+ extern u64 host_pat;
+ u32 phys_addr;
+
+ init_mtrr_epat_tbl();
+ init_pat_entry_tbl(host_pat);
+ /* Get max physical address, set some global variable */
+ if ( cpuid_eax(0x80000000) < 0x80000008 )
+ phys_addr = 36;
+ else
+ phys_addr = cpuid_eax(0x80000008);
+
+ phys_base_msr_mask = ~((((u64)1) << phys_addr) - 1) | 0xf00UL;
+ phys_mask_msr_mask = ~((((u64)1) << phys_addr) - 1) | 0x7ffUL;
+
+ size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+ size_and_mask = ~size_or_mask & 0xfff00000;
+}
+
+static void init_pat_entry_tbl(u64 pat)
+{
+ int i, j;
+
+ memset(&pat_entry_tbl, INVALID_MEM_TYPE,
+ PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0]));
+
+ for ( i = 0; i < PAT_TYPE_NUMS; i++ )
+ {
+ for ( j = 0; j < PAT_TYPE_NUMS; j++ )
+ {
+ if ( pat_cr_2_paf(pat, j) == i )
+ {
+ pat_entry_tbl[i] = j;
+ break;
+ }
+ }
+ }
+}
+
+unsigned char pat_type_2_pte_flags(unsigned char pat_type)
+{
+ int pat_entry = pat_entry_tbl[pat_type];
+
+ /* INVALID_MEM_TYPE, means doesn't find the pat_entry in host pat for
+ * a given pat_type. If host pat covers all the pat types,
+ * it can't happen.
+ */
+ if ( likely(pat_entry != INVALID_MEM_TYPE) )
+ return pat_entry_2_pte_flags[pat_entry];
+
+ return pat_entry_2_pte_flags[pat_entry_tbl[PAT_TYPE_UNCACHABLE]];
+}
+
+int reset_vmsr(struct mtrr_state *m, u64 *pat_ptr)
+{
+ int rc;
+
+ rc = reset_mtrr(m);
+ if ( rc != 0 )
+ return rc;
+
+ *pat_ptr = ( (u64)PAT_TYPE_WRBACK) | /* PAT0: WB */
+ ( (u64)PAT_TYPE_WRTHROUGH << 8 ) | /* PAT1: WT */
+ ( (u64)PAT_TYPE_UC_MINUS << 16 ) | /* PAT2: UC- */
+ ( (u64)PAT_TYPE_UNCACHABLE << 24 ) | /* PAT3: UC */
+ ( (u64)PAT_TYPE_WRBACK << 32 ) | /* PAT4: WB */
+ ( (u64)PAT_TYPE_WRTHROUGH << 40 ) | /* PAT5: WT */
+ ( (u64)PAT_TYPE_UC_MINUS << 48 ) | /* PAT6: UC- */
+ ( (u64)PAT_TYPE_UNCACHABLE << 56 ); /* PAT7: UC */
+
+ return 0;
+}
+
+/*
+ * Get MTRR memory type for physical address pa.
+ */
+static unsigned char get_mtrr_type(struct mtrr_state *m, paddr_t pa)
+{
+ int addr, seg, index;
+ u8 overlap_mtrr = 0;
+ u8 overlap_mtrr_pos = 0;
+ u64 phys_base;
+ u64 phys_mask;
+ u8 num_var_ranges = m->mtrr_cap & 0xff;
+
+ if ( unlikely(!(m->enabled & 0x2)) )
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ( (pa < 0x100000) && (m->enabled & 1) )
+ {
+ /* Fixed range MTRR takes effective */
+ addr = (unsigned int) pa;
+ if ( addr < 0x80000 )
+ {
+ seg = (addr >> 16);
+ return m->fixed_ranges[seg];
+ }
+ else if ( addr < 0xc0000 )
+ {
+ seg = (addr - 0x80000) >> 14;
+ index = (seg >> 3) + 1;
+ seg &= 7; /* select 0-7 segments */
+ return m->fixed_ranges[index*8 + seg];
+ }
+ else
+ {
+ /* 0xC0000 --- 0x100000 */
+ seg = (addr - 0xc0000) >> 12;
+ index = (seg >> 3) + 3;
+ seg &= 7; /* select 0-7 segments */
+ return m->fixed_ranges[index*8 + seg];
+ }
+ }
+
+ /* Match with variable MTRRs. */
+ for ( seg = 0; seg < num_var_ranges; seg++ )
+ {
+ phys_base = ((u64*)m->var_ranges)[seg*2];
+ phys_mask = ((u64*)m->var_ranges)[seg*2 + 1];
+ if ( phys_mask & (1 << MTRR_PHYSMASK_VALID_BIT) )
+ {
+ if ( ((u64) pa & phys_mask) >> MTRR_PHYSMASK_SHIFT ==
+ (phys_base & phys_mask) >> MTRR_PHYSMASK_SHIFT )
+ {
+ if ( unlikely(m->overlapped) )
+ {
+ overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
+ overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
+ }
+ else
+ {
+ /* If no overlap, return the found one */
+ return (phys_base & MTRR_PHYSBASE_TYPE_MASK);
+ }
+ }
+ }
+ }
+
+ /* Overlapped or not found. */
+ if ( unlikely(overlap_mtrr == 0) )
+ return m->def_type;
+
+ if ( likely(!(overlap_mtrr & ~( ((u8)1) << overlap_mtrr_pos ))) )
+ /* Covers both one variable memory range matches and
+ * two or more identical match.
+ */
+ return overlap_mtrr_pos;
+
+ if ( overlap_mtrr & 0x1 )
+ /* Two or more match, one is UC. */
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ( !(overlap_mtrr & 0xaf) )
+ /* Two or more match, WT and WB. */
+ return MTRR_TYPE_WRTHROUGH;
+
+ /* Behaviour is undefined, but return the last overlapped type. */
+ return overlap_mtrr_pos;
+}
+
+/*
+ * return the memory type from PAT.
+ * NOTE: valid only when paging is enabled.
+ * Only 4K page PTE is supported now.
+ */
+static unsigned char page_pat_type(u64 pat_cr, unsigned long pte_flags)
+{
+ int pat_entry;
+
+ /* PCD/PWT -> bit 1/0 of PAT entry */
+ pat_entry = ( pte_flags >> 3 ) & 0x3;
+ /* PAT bits as bit 2 of PAT entry */
+ if ( pte_flags & _PAGE_PAT )
+ pat_entry |= 4;
+
+ return (unsigned char)pat_cr_2_paf(pat_cr, pat_entry);
+}
+
+/*
+ * Effective memory type for leaf page.
+ */
+static u8 effective_mm_type(
+ struct mtrr_state *m,
+ u64 pat,
+ paddr_t gpa,
+ unsigned long pte_flags)
+{
+ unsigned char mtrr_mtype, pat_value, effective;
+
+ mtrr_mtype = get_mtrr_type(m, gpa);
+
+ pat_value = page_pat_type(pat, pte_flags);
+
+ effective = mm_type_tbl[mtrr_mtype][pat_value];
+
+ return effective;
+}
+
+static void init_mtrr_epat_tbl(void)
+{
+ int i, j;
+ /* set default value to an invalid type, just for checking conflict */
+ memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
+
+ for ( i = 0; i < MTRR_NUM_TYPES; i++ )
+ {
+ for ( j = 0; j < PAT_TYPE_NUMS; j++ )
+ {
+ int tmp = mm_type_tbl[i][j];
+ if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
+ mtrr_epat_tbl[i][tmp] = j;
+ }
+ }
+}
+
+u32 get_pat_flags(struct vcpu *v,
+ u32 gl1e_flags,
+ paddr_t gpaddr,
+ paddr_t spaddr)
+{
+ u8 guest_eff_mm_type;
+ u8 shadow_mtrr_type;
+ u8 pat_entry_value;
+ u64 pat = v->arch.hvm_vcpu.pat_cr;
+ struct mtrr_state *g = &v->arch.hvm_vcpu.mtrr;
+
+ /* 1. Get the effective memory type of guest physical address,
+ * with the pair of guest MTRR and PAT
+ */
+ guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, gl1e_flags);
+ /* 2. Get the memory type of host physical address, with MTRR */
+ shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr);
+
+ /* 3. Find the memory type in PAT, with host MTRR memory type
+ * and guest effective memory type.
+ */
+ pat_entry_value = mtrr_epat_tbl[shadow_mtrr_type][guest_eff_mm_type];
+ /* If conflit occurs(e.g host MTRR is UC, guest memory type is
+ * WB),set UC as effective memory. Here, returning PAT_TYPE_UNCACHABLE will
+ * always set effective memory as UC.
+ */
+ if ( pat_entry_value == INVALID_MEM_TYPE )
+ {
+ gdprintk(XENLOG_WARNING,
+ "Conflict occurs for a given guest l1e flags:%x "
+ "at %"PRIx64" (the effective mm type:%d), "
+ "because the host mtrr type is:%d\n",
+ gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
+ shadow_mtrr_type);
+ pat_entry_value = PAT_TYPE_UNCACHABLE;
+ }
+ /* 4. Get the pte flags */
+ return pat_type_2_pte_flags(pat_entry_value);
+}
+
+/* Helper funtions for seting mtrr/pat */
+bool pat_msr_set(u64 *pat, u64 msr_content)
+{
+ u8 *value = (u8*)&msr_content;
+ int i;
+
+ if ( *pat != msr_content )
+ {
+ for ( i = 0; i < 8; i++ )
+ if ( unlikely(!(value[i] == 0 || value[i] == 1 ||
+ value[i] == 4 || value[i] == 5 ||
+ value[i] == 6 || value[i] == 7)) )
+ return 0;
+
+ *pat = msr_content;
+ }
+
+ return 1;
+}
+
+bool mtrr_def_type_msr_set(struct mtrr_state *m, u64 msr_content)
+{
+ u8 def_type = msr_content & 0xff;
+ u8 enabled = (msr_content >> 10) & 0x3;
+
+ if ( unlikely(!(def_type == 0 || def_type == 1 || def_type == 4 ||
+ def_type == 5 || def_type == 6)) )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid MTRR def type:%x\n", def_type);
+ return 0;
+ }
+
+ if ( unlikely(msr_content && (msr_content & ~0xcffUL)) )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
+ msr_content);
+ return 0;
+ }
+
+ m->enabled = enabled;
+ m->def_type = def_type;
+
+ return 1;
+}
+
+bool mtrr_fix_range_msr_set(struct mtrr_state *m, int row, u64 msr_content)
+{
+ u64 *fixed_range_base = (u64 *)m->fixed_ranges;
+
+ if ( fixed_range_base[row] != msr_content )
+ {
+ u8 *range = (u8*)&msr_content;
+ int i, type;
+
+ for ( i = 0; i < 8; i++ )
+ {
+ type = range[i];
+ if ( unlikely(!(type == 0 || type == 1 ||
+ type == 4 || type == 5 || type == 6)) )
+ return 0;
+ }
+
+ fixed_range_base[row] = msr_content;
+ }
+
+ return 1;
+}
+
+bool mtrr_var_range_msr_set(struct mtrr_state *m, u32 msr, u64 msr_content)
+{
+ u32 index;
+ u64 msr_mask;
+ u64 *var_range_base = (u64*)m->var_ranges;
+
+ index = msr - MSR_IA32_MTRR_PHYSBASE0;
+
+ if ( var_range_base[index] != msr_content )
+ {
+ u32 type = msr_content & 0xff;
+
+ msr_mask = (index & 1) ? phys_mask_msr_mask : phys_base_msr_mask;
+
+ if ( unlikely(!(type == 0 || type == 1 ||
+ type == 4 || type == 5 || type == 6)) )
+ return 0;
+
+ if ( unlikely(msr_content && (msr_content & msr_mask)) )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
+ msr_content);
+ return 0;
+ }
+
+ var_range_base[index] = msr_content;
+ }
+
+ m->overlapped = is_var_mtrr_overlapped(m);
+
+ return 1;
+}
+
+bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs)
+{
+ struct mtrr_state *md = &vd->arch.hvm_vcpu.mtrr;
+ struct mtrr_state *ms = &vs->arch.hvm_vcpu.mtrr;
+ int res;
+ u8 num_var_ranges = (u8)md->mtrr_cap;
+
+ /* Test fixed ranges. */
+ res = memcmp(md->fixed_ranges, ms->fixed_ranges,
+ NUM_FIXED_RANGES*sizeof(mtrr_type));
+ if ( res )
+ return 1;
+
+ /* Test var ranges. */
+ res = memcmp(md->var_ranges, ms->var_ranges,
+ num_var_ranges*sizeof(struct mtrr_var_range));
+ if ( res )
+ return 1;
+
+ /* Test default type MSR. */
+ if ( (md->def_type != ms->def_type)
+ && (md->enabled != ms->enabled) )
+ return 1;
+
+ /* Test PAT. */
+ if ( vd->arch.hvm_vcpu.pat_cr != vs->arch.hvm_vcpu.pat_cr )
+ return 1;
+
+ return 0;
+}
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Oct 22 14:19:22 2007 +0100
@@ -756,6 +756,11 @@ void vm_resume_fail(unsigned long eflags
domain_crash_synchronous();
}
+static void flush_cache(void *info)
+{
+ wbinvd();
+}
+
void vmx_do_resume(struct vcpu *v)
{
bool_t debug_state;
@@ -767,6 +772,18 @@ void vmx_do_resume(struct vcpu *v)
}
else
{
+ /* For pass-through domain, guest PCI-E device driver may leverage the
+ * "Non-Snoop" I/O, and explicitly "WBINVD" or "CFLUSH" to a RAM space.
+ * In that case, if migration occurs before "WBINVD" or "CFLUSH", need
+ * to maintain data consistency.
+ */
+ if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) )
+ {
+ int cpu = v->arch.hvm_vmx.active_cpu;
+ if ( cpu != -1 )
+ on_selected_cpus(cpumask_of_cpu(cpu), flush_cache, NULL, 1, 1);
+ }
+
vmx_clear_vmcs(v);
vmx_load_vmcs(v);
hvm_migrate_timers(v);
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Oct 22 14:19:22 2007 +0100
@@ -50,6 +50,7 @@
#include <asm/hvm/vpt.h>
#include <public/hvm/save.h>
#include <asm/hvm/trace.h>
+#include <stdbool.h>
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
@@ -2285,6 +2286,9 @@ static int vmx_do_msr_read(struct cpu_us
u64 msr_content = 0;
u32 ecx = regs->ecx, eax, edx;
struct vcpu *v = current;
+ int index;
+ u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges;
+ u64 *fixed_range_base = (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges;
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
@@ -2304,6 +2308,32 @@ static int vmx_do_msr_read(struct cpu_us
break;
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+ break;
+ case MSR_IA32_CR_PAT:
+ msr_content = v->arch.hvm_vcpu.pat_cr;
+ break;
+ case MSR_MTRRcap:
+ msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
+ break;
+ case MSR_MTRRdefType:
+ msr_content = v->arch.hvm_vcpu.mtrr.def_type
+ | (v->arch.hvm_vcpu.mtrr.enabled << 10);
+ break;
+ case MSR_MTRRfix64K_00000:
+ msr_content = fixed_range_base[0];
+ break;
+ case MSR_MTRRfix16K_80000:
+ case MSR_MTRRfix16K_A0000:
+ index = regs->ecx - MSR_MTRRfix16K_80000;
+ msr_content = fixed_range_base[index + 1];
+ break;
+ case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+ index = regs->ecx - MSR_MTRRfix4K_C0000;
+ msr_content = fixed_range_base[index + 3];
+ break;
+ case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+ index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
+ msr_content = var_range_base[index];
break;
case MSR_IA32_DEBUGCTLMSR:
if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 )
@@ -2428,11 +2458,19 @@ void vmx_vlapic_msr_changed(struct vcpu
vmx_vmcs_exit(v);
}
+extern bool mtrr_var_range_msr_set(struct mtrr_state *v,
+ u32 msr, u64 msr_content);
+extern bool mtrr_fix_range_msr_set(struct mtrr_state *v,
+ int row, u64 msr_content);
+extern bool mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
+extern bool pat_msr_set(u64 *pat, u64 msr);
+
static int vmx_do_msr_write(struct cpu_user_regs *regs)
{
u32 ecx = regs->ecx;
u64 msr_content;
struct vcpu *v = current;
+ int index;
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
ecx, (u32)regs->eax, (u32)regs->edx);
@@ -2459,6 +2497,38 @@ static int vmx_do_msr_write(struct cpu_u
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
+ case MSR_IA32_CR_PAT:
+ if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
+ goto gp_fault;
+ break;
+ case MSR_MTRRdefType:
+ if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
+ goto gp_fault;
+ break;
+ case MSR_MTRRfix64K_00000:
+ if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
+ goto gp_fault;
+ break;
+ case MSR_MTRRfix16K_80000:
+ case MSR_MTRRfix16K_A0000:
+ index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
+ if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+ index, msr_content) )
+ goto gp_fault;
+ break;
+ case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+ index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
+ if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+ index, msr_content) )
+ goto gp_fault;
+ break;
+ case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+ if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+ regs->ecx, msr_content) )
+ goto gp_fault;
+ break;
+ case MSR_MTRRcap:
+ goto gp_fault;
case MSR_IA32_DEBUGCTLMSR: {
int i, rc = 0;
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/mm.c Mon Oct 22 14:19:22 2007 +0100
@@ -3115,6 +3115,15 @@ long arch_memory_op(int op, XEN_GUEST_HA
case XENMAPSPACE_shared_info:
if ( xatp.idx == 0 )
mfn = virt_to_mfn(d->shared_info);
+ /* XXX: assumption here, this is called after E820 table is build
+ * need the E820 to initialize MTRR.
+ */
+ if ( is_hvm_domain(d) ) {
+ extern void init_mtrr_in_hyper(struct vcpu *);
+ struct vcpu *vs;
+ for_each_vcpu(d, vs)
+ init_mtrr_in_hyper(vs);
+ }
break;
case XENMAPSPACE_grant_table:
spin_lock(&d->grant_table->lock);
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/mm/shadow/common.c Mon Oct 22 14:19:22 2007 +0100
@@ -880,6 +880,14 @@ static void shadow_blow_tables(struct do
flush_tlb_mask(d->domain_dirty_cpumask);
}
+void shadow_blow_tables_per_domain(struct domain *d)
+{
+ if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL ) {
+ shadow_lock(d);
+ shadow_blow_tables(d);
+ shadow_unlock(d);
+ }
+}
#ifndef NDEBUG
/* Blow all shadows of all shadowed domains: this can be used to cause the
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c Mon Oct 22 14:19:22 2007 +0100
@@ -33,6 +33,7 @@
#include <asm/shadow.h>
#include <asm/flushtlb.h>
#include <asm/hvm/hvm.h>
+#include <asm/mtrr.h>
#include "private.h"
#include "types.h"
@@ -267,6 +268,11 @@ guest_walk_tables(struct vcpu *v, unsign
* us reflect l2 changes later without touching the l1s. */
int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
_PAGE_ACCESSED|_PAGE_DIRTY);
+ /* propagate PWT PCD to level 1 for PSE */
+ if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
+ flags |= _PAGE_PWT;
+ if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
+ flags |= _PAGE_PCD;
/* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
* of the level 1 */
if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) )
@@ -614,7 +620,12 @@ shadow_l4_index(mfn_t *smfn, u32 guest_i
#endif // GUEST_PAGING_LEVELS >= 4
-
+extern u32 get_pat_flags(struct vcpu *v,
+ u32 gl1e_flags,
+ paddr_t gpaddr,
+ paddr_t spaddr);
+
+unsigned char pat_type_2_pte_flags(unsigned char pat_type);
/**************************************************************************/
/* Function which computes shadow entries from their corresponding guest
* entries. This is the "heart" of the shadow code. It operates using
@@ -703,6 +714,17 @@ _sh_propagate(struct vcpu *v,
pass_thru_flags |= _PAGE_NX_BIT;
sflags = gflags & pass_thru_flags;
+ /* Only change memory caching type for pass-through domain */
+ if ( (level == 1) && !list_empty(&(domain_hvm_iommu(d)->pdev_list)) ) {
+ if ( v->domain->arch.hvm_domain.is_in_uc_mode )
+ sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
+ else
+ sflags |= get_pat_flags(v,
+ gflags,
+ guest_l1e_get_paddr(*gp),
+ mfn_x(target_mfn) << PAGE_SHIFT);
+ }
+
// Set the A&D bits for higher level shadows.
// Higher level entries do not, strictly speaking, have dirty bits, but
// since we use shadow linear tables, each of these entries may, at some
@@ -773,10 +795,6 @@ _sh_propagate(struct vcpu *v,
{
sflags |= _PAGE_USER;
}
-
- /* MMIO addresses should never be cached */
- if ( p2m_is_mmio(p2mt) )
- sflags |= _PAGE_PCD;
*sp = shadow_l1e_from_mfn(target_mfn, sflags);
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/cpufeature.h Mon Oct 22 14:19:22 2007 +0100
@@ -128,6 +128,7 @@
#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE)
#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
+#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
@@ -152,6 +153,7 @@
#define cpu_has_tsc 1
#define cpu_has_pae 1
#define cpu_has_pge 1
+#define cpu_has_pat 1
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
#define cpu_has_sep 0
#define cpu_has_mtrr 1
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/hvm/domain.h Mon Oct 22 14:19:22 2007 +0100
@@ -61,6 +61,12 @@ struct hvm_domain {
unsigned long vmx_apic_access_mfn;
+ /* If one of vcpus of this domain is in no_fill_mode or
+ * mtrr/pat between vcpus is not the same, set is_in_uc_mode
+ */
+ spinlock_t uc_lock;
+ bool_t is_in_uc_mode;
+
/* Pass-through */
struct hvm_iommu hvm_iommu;
};
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/hvm/support.h Mon Oct 22 14:19:22 2007 +0100
@@ -64,6 +64,7 @@ static inline vcpu_iodata_t *get_ioreq(s
#define DBG_LEVEL_VLAPIC_INTERRUPT (1 << 8)
#define DBG_LEVEL_IOAPIC (1 << 9)
#define DBG_LEVEL_HCALL (1 << 10)
+#define DBG_LEVEL_MSR (1 << 11)
extern unsigned int opt_hvm_debug_level;
#define HVM_DBG_LOG(level, _f, _a...) \
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/hvm/vcpu.h Mon Oct 22 14:19:22 2007 +0100
@@ -24,6 +24,7 @@
#include <asm/hvm/vlapic.h>
#include <asm/hvm/vmx/vmcs.h>
#include <asm/hvm/svm/vmcb.h>
+#include <asm/mtrr.h>
#define HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM 0
#define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1
@@ -62,6 +63,12 @@ struct hvm_vcpu {
struct arch_vmx_struct vmx;
struct arch_svm_struct svm;
} u;
+
+ struct mtrr_state mtrr;
+ u64 pat_cr;
+
+ /* Which cache mode is this VCPU in (CR0:CD/NW)? */
+ u8 cache_mode;
};
#define ARCH_HVM_IO_WAIT 1 /* Waiting for I/O completion */
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/msr-index.h Mon Oct 22 14:19:22 2007 +0100
@@ -68,6 +68,25 @@
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de
+
+#define MSR_IA32_MTRR_PHYSBASE0 0x00000200
+#define MSR_IA32_MTRR_PHYSMASK0 0x00000201
+#define MSR_IA32_MTRR_PHYSBASE1 0x00000202
+#define MSR_IA32_MTRR_PHYSMASK1 0x00000203
+#define MSR_IA32_MTRR_PHYSBASE2 0x00000204
+#define MSR_IA32_MTRR_PHYSMASK2 0x00000205
+#define MSR_IA32_MTRR_PHYSBASE3 0x00000206
+#define MSR_IA32_MTRR_PHYSMASK3 0x00000207
+#define MSR_IA32_MTRR_PHYSBASE4 0x00000208
+#define MSR_IA32_MTRR_PHYSMASK4 0x00000209
+#define MSR_IA32_MTRR_PHYSBASE5 0x0000020a
+#define MSR_IA32_MTRR_PHYSMASK5 0x0000020b
+#define MSR_IA32_MTRR_PHYSBASE6 0x0000020c
+#define MSR_IA32_MTRR_PHYSMASK6 0x0000020d
+#define MSR_IA32_MTRR_PHYSBASE7 0x0000020e
+#define MSR_IA32_MTRR_PHYSMASK7 0x0000020f
+
+#define MSR_IA32_CR_PAT 0x00000277
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/mtrr.h Mon Oct 22 14:19:22 2007 +0100
@@ -10,6 +10,55 @@
#define MTRR_TYPE_WRPROT 5
#define MTRR_TYPE_WRBACK 6
#define MTRR_NUM_TYPES 7
+#define MEMORY_NUM_TYPES MTRR_NUM_TYPES
+
+#define MTRR_PHYSMASK_VALID_BIT 11
+#define MTRR_PHYSMASK_SHIFT 12
+
+#define MTRR_PHYSBASE_TYPE_MASK 0xff /* lowest 8 bits */
+#define MTRR_PHYSBASE_SHIFT 12
+#define MTRR_VCNT 8
+
+#define NORMAL_CACHE_MODE 0
+#define NO_FILL_CACHE_MODE 2
+
+enum {
+ PAT_TYPE_UNCACHABLE=0,
+ PAT_TYPE_WRCOMB=1,
+ PAT_TYPE_RESERVED=2,
+ PAT_TYPE_WRTHROUGH=4,
+ PAT_TYPE_WRPROT=5,
+ PAT_TYPE_WRBACK=6,
+ PAT_TYPE_UC_MINUS=7,
+ PAT_TYPE_NUMS
+};
+
+#define INVALID_MEM_TYPE PAT_TYPE_NUMS
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+ an 8 bit field: */
+typedef u8 mtrr_type;
+
+struct mtrr_var_range {
+ u32 base_lo;
+ u32 base_hi;
+ u32 mask_lo;
+ u32 mask_hi;
+};
+
+#define NUM_FIXED_RANGES 88
+struct mtrr_state {
+ struct mtrr_var_range *var_ranges;
+ mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+ unsigned char enabled;
+ unsigned char have_fixed;
+ mtrr_type def_type;
+
+ u64 mtrr_cap;
+ /* ranges in var MSRs are overlapped or not:0(no overlapped) */
+ bool_t overlapped;
+ bool_t is_initialized;
+};
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|