WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] hvm/x86: MTRR/PAT virtualisation.

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] hvm/x86: MTRR/PAT virtualisation.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 22 Oct 2007 14:50:15 -0700
Delivery-date: Mon, 22 Oct 2007 14:51:03 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir@xxxxxxxxxxxxx>
# Date 1193059162 -3600
# Node ID 3e7c86602c70d2d587aafbef957d644f48cd6da9
# Parent  42d8dadb5864eac0140262b9475a7b1ed150b607
hvm/x86: MTRR/PAT virtualisation.
Signed-off-by: Disheng Su <disheng.su@xxxxxxxxx>
---
 xen/arch/x86/cpu/common.c         |    9 
 xen/arch/x86/cpu/mtrr/generic.c   |   16 
 xen/arch/x86/cpu/mtrr/main.c      |    7 
 xen/arch/x86/cpu/mtrr/mtrr.h      |   11 
 xen/arch/x86/hvm/Makefile         |    1 
 xen/arch/x86/hvm/hvm.c            |   88 ++++
 xen/arch/x86/hvm/mtrr.c           |  687 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmcs.c       |   17 
 xen/arch/x86/hvm/vmx/vmx.c        |   70 +++
 xen/arch/x86/mm.c                 |    9 
 xen/arch/x86/mm/shadow/common.c   |    8 
 xen/arch/x86/mm/shadow/multi.c    |   28 +
 xen/include/asm-x86/cpufeature.h  |    2 
 xen/include/asm-x86/hvm/domain.h  |    6 
 xen/include/asm-x86/hvm/support.h |    1 
 xen/include/asm-x86/hvm/vcpu.h    |    7 
 xen/include/asm-x86/msr-index.h   |   19 +
 xen/include/asm-x86/mtrr.h        |   49 ++
 18 files changed, 999 insertions(+), 36 deletions(-)

diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/common.c Mon Oct 22 14:19:22 2007 +0100
@@ -22,6 +22,12 @@ static int disable_x86_serial_nr __devin
 static int disable_x86_serial_nr __devinitdata = 0;
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+
+/*
+ * Default host IA32_CR_PAT value to cover all memory types.
+ * BIOS usually sets it to 0x07040600070406.
+ */
+u64 host_pat = 0x050100070406;
 
 static void default_init(struct cpuinfo_x86 * c)
 {
@@ -557,6 +563,9 @@ void __devinit cpu_init(void)
        }
        printk(KERN_INFO "Initializing CPU#%d\n", cpu);
 
+       if (cpu_has_pat)
+               wrmsrl(MSR_IA32_CR_PAT, host_pat);
+
        *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
        *(unsigned long  *)(&gdt_load[2]) = GDT_VIRT_START(current);
        asm volatile ( "lgdt %0" : "=m" (gdt_load) );
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/mtrr/generic.c
--- a/xen/arch/x86/cpu/mtrr/generic.c   Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/mtrr/generic.c   Mon Oct 22 14:19:22 2007 +0100
@@ -11,14 +11,6 @@
 #include <asm/cpufeature.h>
 #include "mtrr.h"
 
-struct mtrr_state {
-       struct mtrr_var_range *var_ranges;
-       mtrr_type fixed_ranges[NUM_FIXED_RANGES];
-       unsigned char enabled;
-       unsigned char have_fixed;
-       mtrr_type def_type;
-};
-
 struct fixed_range_block {
        int base_msr; /* start address of an MTRR block */
        int ranges;   /* number of MTRRs in this block  */
@@ -32,7 +24,7 @@ static struct fixed_range_block fixed_ra
 };
 
 static unsigned long smp_changes_mask;
-static struct mtrr_state mtrr_state = {};
+struct mtrr_state mtrr_state = {};
 
 /*  Get the MSR pair relating to a var range  */
 static void
@@ -88,6 +80,9 @@ void __init get_mtrr_state(void)
        rdmsr(MTRRdefType_MSR, lo, dummy);
        mtrr_state.def_type = (lo & 0xff);
        mtrr_state.enabled = (lo & 0xc00) >> 10;
+
+       /* Store mtrr_cap for HVM MTRR virtualisation. */
+       rdmsrl(MTRRcap_MSR, mtrr_state.mtrr_cap);
 }
 
 /*  Some BIOS's are fucked and don't set all MTRRs the same!  */
@@ -107,6 +102,7 @@ void __init mtrr_state_warn(void)
        printk(KERN_INFO "mtrr: corrected configuration.\n");
 }
 
+extern bool_t is_var_mtrr_overlapped(struct mtrr_state *m);
 /* Doesn't attempt to pass an error out to MTRR users
    because it's quite complicated in some cases and probably not
    worth it because the best error handling is to ignore it. */
@@ -116,6 +112,8 @@ void mtrr_wrmsr(unsigned msr, unsigned a
                printk(KERN_ERR
                        "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
                        smp_processor_id(), msr, a, b);
+       /* Cache overlap status for efficient HVM MTRR virtualisation. */
+       mtrr_state.overlapped = is_var_mtrr_overlapped(&mtrr_state);
 }
 
 /**
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/mtrr/main.c
--- a/xen/arch/x86/cpu/mtrr/main.c      Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/mtrr/main.c      Mon Oct 22 14:19:22 2007 +0100
@@ -588,6 +588,8 @@ struct mtrr_value {
        unsigned long   lsize;
 };
 
+extern void global_init_mtrr_pat(void);
+
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
  *
@@ -654,8 +656,11 @@ void __init mtrr_bp_init(void)
        if (mtrr_if) {
                set_num_var_ranges();
                init_table();
-               if (use_intel())
+               if (use_intel()) {
                        get_mtrr_state();
+                       /* initialize some global data for MTRR/PAT 
virutalization */
+                       global_init_mtrr_pat();
+               }
        }
 }
 
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/cpu/mtrr/mtrr.h
--- a/xen/arch/x86/cpu/mtrr/mtrr.h      Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/cpu/mtrr/mtrr.h      Mon Oct 22 14:19:22 2007 +0100
@@ -13,7 +13,6 @@
 #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 
-#define NUM_FIXED_RANGES 88
 #define MTRRfix64K_00000_MSR 0x250
 #define MTRRfix16K_80000_MSR 0x258
 #define MTRRfix16K_A0000_MSR 0x259
@@ -30,9 +29,6 @@
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
 
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
-   an 8 bit field: */
-typedef u8 mtrr_type;
 
 struct mtrr_ops {
        u32     vendor;
@@ -69,13 +65,6 @@ struct set_mtrr_context {
        u32 ccr3;
 };
 
-struct mtrr_var_range {
-       u32 base_lo;
-       u32 base_hi;
-       u32 mask_lo;
-       u32 mask_hi;
-};
-
 void set_mtrr_done(struct set_mtrr_context *ctxt);
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/Makefile
--- a/xen/arch/x86/hvm/Makefile Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/Makefile Mon Oct 22 14:19:22 2007 +0100
@@ -7,6 +7,7 @@ obj-y += intercept.o
 obj-y += intercept.o
 obj-y += io.o
 obj-y += irq.o
+obj-y += mtrr.o
 obj-y += platform.o
 obj-y += pmtimer.o
 obj-y += rtc.o
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Mon Oct 22 14:19:22 2007 +0100
@@ -226,6 +226,7 @@ int hvm_domain_initialise(struct domain 
 
     spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
+    spin_lock_init(&d->arch.hvm_domain.uc_lock);
 
     rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
     if ( rc != 0 )
@@ -417,27 +418,22 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_
 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
                           1, HVMSR_PER_VCPU);
 
+extern int reset_vmsr(struct mtrr_state *m, u64 *p);
+
 int hvm_vcpu_initialise(struct vcpu *v)
 {
     int rc;
 
     if ( (rc = vlapic_init(v)) != 0 )
-        return rc;
+        goto fail1;
 
     if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
-    {
-        vlapic_destroy(v);
-        return rc;
-    }
+        goto fail2;
 
     /* Create ioreq event channel. */
     rc = alloc_unbound_xen_event_channel(v, 0);
     if ( rc < 0 )
-    {
-        hvm_funcs.vcpu_destroy(v);
-        vlapic_destroy(v);
-        return rc;
-    }
+        goto fail3;
 
     /* Register ioreq event channel. */
     v->arch.hvm_vcpu.xen_port = rc;
@@ -448,6 +444,10 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
     spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
     INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
+
+    rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr);
+    if ( rc != 0 )
+        goto fail3;
 
     v->arch.guest_context.user_regs.eflags = 2;
 
@@ -468,6 +468,13 @@ int hvm_vcpu_initialise(struct vcpu *v)
     }
 
     return 0;
+
+ fail3:
+    hvm_funcs.vcpu_destroy(v);
+ fail2:
+    vlapic_destroy(v);
+ fail1:
+    return rc;
 }
 
 void hvm_vcpu_destroy(struct vcpu *v)
@@ -604,6 +611,32 @@ int hvm_set_efer(uint64_t value)
     hvm_update_guest_efer(v);
 
     return 1;
+}
+
+extern void shadow_blow_tables_per_domain(struct domain *d);
+extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);
+
+/* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */
+static bool_t domain_exit_uc_mode(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct vcpu *vs;
+
+    for_each_vcpu ( d, vs )
+    {
+        if ( (vs == v) || !vs->is_initialised )
+            continue;
+        if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ||
+             mtrr_pat_not_equal(vs, v) )
+            return 0;
+    }
+
+    return 1;
+}
+
+static void local_flush_cache(void *info)
+{
+    wbinvd();
 }
 
 int hvm_set_cr0(unsigned long value)
@@ -683,6 +716,41 @@ int hvm_set_cr0(unsigned long value)
         {
             put_page(pagetable_get_page(v->arch.guest_table));
             v->arch.guest_table = pagetable_null();
+        }
+    }
+
+    if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) )
+    {
+        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
+        {
+            /* Entering no fill cache mode. */
+            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+
+            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
+            {
+                /* Flush physical caches. */
+                on_each_cpu(local_flush_cache, NULL, 1, 1);
+                /* Shadow pagetables must recognise UC mode. */
+                v->domain->arch.hvm_domain.is_in_uc_mode = 1;
+                shadow_blow_tables_per_domain(v->domain);
+            }
+            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+        }
+        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
+                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+        {
+            /* Exit from no fill cache mode. */
+            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+
+            if ( domain_exit_uc_mode(v) )
+            {
+                /* Shadow pagetables must recognise normal caching mode. */
+                v->domain->arch.hvm_domain.is_in_uc_mode = 0;
+                shadow_blow_tables_per_domain(v->domain);
+            }
+            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
         }
     }
 
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/mtrr.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/mtrr.c   Mon Oct 22 14:19:22 2007 +0100
@@ -0,0 +1,687 @@
+/*
+ * mtrr.c: MTRR/PAT virtualization
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <public/hvm/e820.h>
+#include <xen/types.h>
+#include <asm/e820.h>
+#include <asm/paging.h>
+#include <asm/p2m.h>
+#include <xen/domain_page.h>
+#include <stdbool.h>
+#include <asm/mtrr.h>
+#include <asm/hvm/support.h>
+
+/* Xen holds the native MTRR MSRs */
+extern struct mtrr_state mtrr_state;
+
+static u64 phys_base_msr_mask;
+static u64 phys_mask_msr_mask;
+static u32 size_or_mask;
+static u32 size_and_mask;
+
+static void init_pat_entry_tbl(u64 pat);
+static void init_mtrr_epat_tbl(void);
+static unsigned char get_mtrr_type(struct mtrr_state *m, paddr_t pa);
+/* get page attribute fields (PAn) from PAT MSR */
+#define pat_cr_2_paf(pat_cr,n)  ((((u64)pat_cr) >> ((n)<<3)) & 0xff)
+/* pat entry to PTE flags (PAT, PCD, PWT bits) */
+static unsigned char pat_entry_2_pte_flags[8] = {
+    0,           _PAGE_PWT,
+    _PAGE_PCD,   _PAGE_PCD | _PAGE_PWT,
+    _PAGE_PAT,   _PAGE_PAT | _PAGE_PWT,
+    _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
+
+/* effective mm type lookup table, according to MTRR and PAT */
+static u8 mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
+/********PAT(UC,WC,RS,RS,WT,WP,WB,UC-)*/
+/* RS means reserved type(2,3), and type is hardcoded here */
+ /*MTRR(UC):(UC,WC,RS,RS,UC,UC,UC,UC)*/
+            {0, 1, 2, 2, 0, 0, 0, 0},
+ /*MTRR(WC):(UC,WC,RS,RS,UC,UC,WC,WC)*/
+            {0, 1, 2, 2, 0, 0, 1, 1},
+ /*MTRR(RS):(RS,RS,RS,RS,RS,RS,RS,RS)*/
+            {2, 2, 2, 2, 2, 2, 2, 2},
+ /*MTRR(RS):(RS,RS,RS,RS,RS,RS,RS,RS)*/
+            {2, 2, 2, 2, 2, 2, 2, 2},
+ /*MTRR(WT):(UC,WC,RS,RS,WT,WP,WT,UC)*/
+            {0, 1, 2, 2, 4, 5, 4, 0},
+ /*MTRR(WP):(UC,WC,RS,RS,WT,WP,WP,WC)*/
+            {0, 1, 2, 2, 4, 5, 5, 1},
+ /*MTRR(WB):(UC,WC,RS,RS,WT,WP,WB,UC)*/
+            {0, 1, 2, 2, 4, 5, 6, 0}
+};
+
+/* reverse lookup table, to find a pat type according to MTRR and effective
+ * memory type. This table is dynamically generated
+ */
+static u8 mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES];
+
+/* lookup table for PAT entry of a given PAT value in host pat */
+static u8 pat_entry_tbl[PAT_TYPE_NUMS];
+
+static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr,
+                           uint64_t *base, uint64_t *end)
+{
+    uint32_t mask_lo = (uint32_t)mask_msr;
+    uint32_t mask_hi = (uint32_t)(mask_msr >> 32);
+    uint32_t base_lo = (uint32_t)base_msr;
+    uint32_t base_hi = (uint32_t)(base_msr >> 32);
+    uint32_t size;
+
+    if ( (mask_lo & 0x800) == 0 )
+    {
+        /* Invalid (i.e. free) range */
+        *base = 0;
+        *end = 0;
+        return;
+    }
+
+    /* Work out the shifted address mask. */
+    mask_lo = (size_or_mask | (mask_hi << (32 - PAGE_SHIFT)) |
+               (mask_lo >> PAGE_SHIFT));
+
+    /* This works correctly if size is a power of two (a contiguous range). */
+    size = -mask_lo;
+    *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+    *end = *base + size - 1;
+}
+
+bool_t is_var_mtrr_overlapped(struct mtrr_state *m)
+{
+    int seg, i;
+    uint64_t phys_base, phys_mask, phys_base_pre, phys_mask_pre;
+    uint64_t base_pre, end_pre, base, end;
+    uint8_t num_var_ranges = (u8)m->mtrr_cap;
+
+    for ( i = 0; i < num_var_ranges; i++ )
+    {
+        phys_base_pre = ((u64*)m->var_ranges)[i*2];
+        phys_mask_pre = ((u64*)m->var_ranges)[i*2 + 1];
+
+        get_mtrr_range(phys_base_pre, phys_mask_pre,
+                        &base_pre, &end_pre);
+
+        for ( seg = i + 1; seg < num_var_ranges; seg ++ )
+        {
+            phys_base = ((u64*)m->var_ranges)[seg*2];
+            phys_mask = ((u64*)m->var_ranges)[seg*2 + 1];
+
+            get_mtrr_range(phys_base, phys_mask,
+                            &base, &end);
+
+            if ( ((base_pre != end_pre) && (base != end))
+                 || ((base >= base_pre) && (base <= end_pre))
+                 || ((end >= base_pre) && (end <= end_pre))
+                 || ((base_pre >= base) && (base_pre <= end))
+                 || ((end_pre >= base) && (end_pre <= end)) )
+            {
+                /* MTRR is overlapped. */
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
+/* reserved mtrr for guest OS */
+#define RESERVED_MTRR 2
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+bool mtrr_var_range_msr_set(struct mtrr_state *m, u32 msr, u64 msr_content);
+bool mtrr_def_type_msr_set(struct mtrr_state *m, u64 msr_content);
+bool mtrr_fix_range_msr_set(struct mtrr_state *m, int row, u64 msr_content);
+static void set_var_mtrr(unsigned int reg, struct mtrr_state *m,
+                    unsigned int base, unsigned int size,
+                    unsigned int type)
+{
+    struct mtrr_var_range *vr;
+
+    vr = &m->var_ranges[reg];
+
+    if ( size == 0 )
+    {
+        /* The invalid bit is kept in the mask, so we simply clear the
+         * relevant mask register to disable a range.
+         */
+        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), 0);
+    }
+    else
+    {
+        vr->base_lo = base << PAGE_SHIFT | type;
+        vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
+        vr->mask_lo = -size << PAGE_SHIFT | 0x800;
+        vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+
+        mtrr_var_range_msr_set(m, MTRRphysBase_MSR(reg), *(unsigned long *)vr);
+        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg),
+                               *((unsigned long *)vr + 1));
+    }
+}
+/* From Intel Vol. III Section 10.11.4, the Range Size and Base Alignment has
+ * some kind of requirement:
+ * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
+ * 2. The base address must be 2^N aligned, where the N here is equal to
+ * the N in previous requirement. So a 8K range must be 8K aligned not 4K 
aligned.
+ */
+static unsigned int range_to_mtrr(unsigned int reg, struct mtrr_state *m,
+    unsigned int range_startk, unsigned int range_sizek, unsigned char type)
+{
+    if ( !range_sizek || (reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR)) )
+        return reg;
+
+    while ( range_sizek )
+    {
+        unsigned int max_align, align, sizek;
+
+        max_align = (range_startk == 0) ? 32 : ffs(range_startk);
+        align = min_t(unsigned int, fls(range_sizek), max_align);
+        sizek = 1 << (align - 1);
+
+        set_var_mtrr(reg++, m, range_startk, sizek, type);
+
+        range_startk += sizek;
+        range_sizek  -= sizek;
+
+        if ( reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR) )
+            break;
+    }
+
+    return reg;
+}
+
+static void setup_fixed_mtrrs(struct vcpu *v)
+{
+    uint64_t content;
+    int i;
+    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
+
+    /* 1. Map (0~A0000) as WB */
+    content = 0x0606060606060606ull;
+    mtrr_fix_range_msr_set(m, 0, content);
+    mtrr_fix_range_msr_set(m, 1, content);
+    /* 2. Map VRAM(A0000~C0000) as WC */
+    content = 0x0101010101010101;
+    mtrr_fix_range_msr_set(m, 2, content);
+    /* 3. Map (C0000~100000) as UC */
+    for ( i = 3; i < 11; i++)
+        mtrr_fix_range_msr_set(m, i, 0);
+}
+
+static void setup_var_mtrrs(struct vcpu *v)
+{
+    p2m_type_t p2m;
+    unsigned long e820_mfn;
+    char *p = NULL;
+    unsigned char nr = 0;
+    int i;
+    unsigned int reg = 0;
+    unsigned long size = 0;
+    unsigned long addr = 0;
+    struct e820entry *e820_table;
+
+    e820_mfn = mfn_x(gfn_to_mfn(v->domain,
+                    HVM_E820_PAGE >> PAGE_SHIFT, &p2m));
+
+    p = (char *)map_domain_page(e820_mfn);
+
+    nr = *(unsigned char*)(p + HVM_E820_NR_OFFSET);
+    e820_table = (struct e820entry*)(p + HVM_E820_OFFSET);
+    /* search E820 table, set MTRR for RAM */
+    for ( i = 0; i < nr; i++)
+    {
+        if ( (e820_table[i].addr >= 0x100000) &&
+             (e820_table[i].type == E820_RAM) )
+        {
+            if ( e820_table[i].addr == 0x100000 )
+            {
+                size = e820_table[i].size + 0x100000 + PAGE_SIZE * 3;
+                addr = 0;
+            }
+            else
+            {
+                /* Larger than 4G */
+                size = e820_table[i].size;
+                addr = e820_table[i].addr;
+            }
+
+            reg = range_to_mtrr(reg, &v->arch.hvm_vcpu.mtrr,
+                                addr >> PAGE_SHIFT, size >> PAGE_SHIFT,
+                                MTRR_TYPE_WRBACK);
+        }
+    }
+}
+
+void init_mtrr_in_hyper(struct vcpu *v)
+{
+    /* TODO:MTRR should be initialized in BIOS or other places.
+     * workaround to do it in here
+     */
+    if ( v->arch.hvm_vcpu.mtrr.is_initialized )
+        return;
+
+    setup_fixed_mtrrs(v);
+    setup_var_mtrrs(v);
+    /* enable mtrr */
+    mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, 0xc00);
+
+    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
+}
+
+static int reset_mtrr(struct mtrr_state *m)
+{
+    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
+    if ( m->var_ranges == NULL )
+        return -ENOMEM;
+    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
+    memset(m->fixed_ranges, 0, sizeof(m->fixed_ranges));
+    m->enabled = 0;
+    m->def_type = 0;/*mtrr is disabled*/
+    m->mtrr_cap = (0x5<<8)|MTRR_VCNT;/*wc,fix enabled, and vcnt=8*/
+    m->overlapped = 0;
+    return 0;
+}
+
+/* init global variables for MTRR and PAT */
+void global_init_mtrr_pat(void)
+{
+    extern u64 host_pat;
+    u32 phys_addr;
+
+    init_mtrr_epat_tbl();
+    init_pat_entry_tbl(host_pat);
+    /* Get max physical address, set some global variable */
+    if ( cpuid_eax(0x80000000) < 0x80000008 )
+        phys_addr = 36;
+    else
+        phys_addr = cpuid_eax(0x80000008);
+
+    phys_base_msr_mask = ~((((u64)1) << phys_addr) - 1) | 0xf00UL;
+    phys_mask_msr_mask = ~((((u64)1) << phys_addr) - 1) | 0x7ffUL;
+
+    size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+    size_and_mask = ~size_or_mask & 0xfff00000;
+}
+
+static void init_pat_entry_tbl(u64 pat)
+{
+    int i, j;
+
+    memset(&pat_entry_tbl, INVALID_MEM_TYPE,
+           PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0]));
+
+    for ( i = 0; i < PAT_TYPE_NUMS; i++ )
+    {
+        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
+        {
+            if ( pat_cr_2_paf(pat, j) == i )
+            {
+                pat_entry_tbl[i] = j;
+                break;
+            }
+        }
+    }
+}
+
+unsigned char pat_type_2_pte_flags(unsigned char pat_type)
+{
+    int pat_entry = pat_entry_tbl[pat_type];
+
+    /* INVALID_MEM_TYPE, means doesn't find the pat_entry in host pat for
+     * a given pat_type. If host pat covers all the pat types,
+     * it can't happen.
+     */
+    if ( likely(pat_entry != INVALID_MEM_TYPE) )
+        return pat_entry_2_pte_flags[pat_entry];
+
+    return pat_entry_2_pte_flags[pat_entry_tbl[PAT_TYPE_UNCACHABLE]];
+}
+
+int reset_vmsr(struct mtrr_state *m, u64 *pat_ptr)
+{
+    int rc;
+
+    rc = reset_mtrr(m);
+    if ( rc != 0 )
+        return rc;
+
+    *pat_ptr = ( (u64)PAT_TYPE_WRBACK) |                /* PAT0: WB */
+        ( (u64)PAT_TYPE_WRTHROUGH << 8 ) |              /* PAT1: WT */
+        ( (u64)PAT_TYPE_UC_MINUS << 16 ) |              /* PAT2: UC- */
+        ( (u64)PAT_TYPE_UNCACHABLE << 24 ) |            /* PAT3: UC */
+        ( (u64)PAT_TYPE_WRBACK << 32 ) |                /* PAT4: WB */
+        ( (u64)PAT_TYPE_WRTHROUGH << 40 ) |             /* PAT5: WT */
+        ( (u64)PAT_TYPE_UC_MINUS << 48 ) |              /* PAT6: UC- */
+        ( (u64)PAT_TYPE_UNCACHABLE << 56 );             /* PAT7: UC */
+
+    return 0;
+}
+
+/*
+ * Get MTRR memory type for physical address pa.
+ */
+static unsigned char get_mtrr_type(struct mtrr_state *m, paddr_t pa)
+{
+   int    addr, seg, index;
+   u8     overlap_mtrr = 0;
+   u8     overlap_mtrr_pos = 0;
+   u64    phys_base;
+   u64    phys_mask;
+   u8     num_var_ranges = m->mtrr_cap & 0xff;
+
+   if ( unlikely(!(m->enabled & 0x2)) )
+       return MTRR_TYPE_UNCACHABLE;
+
+   if ( (pa < 0x100000) && (m->enabled & 1) )
+   {
+       /* Fixed range MTRR takes effective */
+       addr = (unsigned int) pa;
+       if ( addr < 0x80000 )
+       {
+           seg = (addr >> 16);
+           return m->fixed_ranges[seg];
+       }
+       else if ( addr < 0xc0000 )
+       {
+           seg = (addr - 0x80000) >> 14;
+           index = (seg >> 3) + 1;
+           seg &= 7;            /* select 0-7 segments */
+           return m->fixed_ranges[index*8 + seg];
+       }
+       else
+       {
+           /* 0xC0000 --- 0x100000 */
+           seg = (addr - 0xc0000) >> 12;
+           index = (seg >> 3) + 3;
+           seg &= 7;            /* select 0-7 segments */
+           return m->fixed_ranges[index*8 + seg];
+       }
+   }
+
+   /* Match with variable MTRRs. */
+   for ( seg = 0; seg < num_var_ranges; seg++ )
+   {
+       phys_base = ((u64*)m->var_ranges)[seg*2];
+       phys_mask = ((u64*)m->var_ranges)[seg*2 + 1];
+       if ( phys_mask & (1 << MTRR_PHYSMASK_VALID_BIT) )
+       {
+           if ( ((u64) pa & phys_mask) >> MTRR_PHYSMASK_SHIFT ==
+                (phys_base & phys_mask) >> MTRR_PHYSMASK_SHIFT )
+           {
+               if ( unlikely(m->overlapped) )
+               {
+                    overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
+                    overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
+               }
+               else
+               {
+                   /* If no overlap, return the found one */
+                   return (phys_base & MTRR_PHYSBASE_TYPE_MASK);
+               }
+           }
+       }
+   }
+
+   /* Overlapped or not found. */
+   if ( unlikely(overlap_mtrr == 0) )
+       return m->def_type;
+
+   if ( likely(!(overlap_mtrr & ~( ((u8)1) << overlap_mtrr_pos ))) )
+       /* Covers both one variable memory range matches and
+        * two or more identical match.
+        */
+       return overlap_mtrr_pos;
+
+   if ( overlap_mtrr & 0x1 )
+       /* Two or more match, one is UC. */
+       return MTRR_TYPE_UNCACHABLE;
+   
+   if ( !(overlap_mtrr & 0xaf) )
+       /* Two or more match, WT and WB. */
+       return MTRR_TYPE_WRTHROUGH;
+
+   /* Behaviour is undefined, but return the last overlapped type. */
+   return overlap_mtrr_pos;
+}
+
+/*
+ * return the memory type from PAT.
+ * NOTE: valid only when paging is enabled.
+ *       Only 4K page PTE is supported now.
+ */
+static unsigned char page_pat_type(u64 pat_cr, unsigned long pte_flags)
+{
+    int pat_entry;
+
+    /* PCD/PWT -> bit 1/0 of PAT entry */
+    pat_entry = ( pte_flags >> 3 ) & 0x3;
+    /* PAT bits as bit 2 of PAT entry */
+    if ( pte_flags & _PAGE_PAT )
+        pat_entry |= 4;
+
+    return (unsigned char)pat_cr_2_paf(pat_cr, pat_entry);
+}
+
+/*
+ * Effective memory type for leaf page.
+ */
+static u8 effective_mm_type(
+        struct mtrr_state *m,
+        u64 pat,
+        paddr_t gpa,
+        unsigned long pte_flags)
+{
+    unsigned char mtrr_mtype, pat_value, effective;
+
+    mtrr_mtype = get_mtrr_type(m, gpa);
+
+    pat_value = page_pat_type(pat, pte_flags);
+
+    effective = mm_type_tbl[mtrr_mtype][pat_value];
+
+    return effective;
+}
+
+static void init_mtrr_epat_tbl(void)
+{
+    int i, j;
+    /* set default value to an invalid type, just for checking conflict */
+    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
+
+    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
+    {
+        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
+        {
+            int tmp = mm_type_tbl[i][j];
+            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
+                mtrr_epat_tbl[i][tmp] = j;
+        }
+    }
+}
+
+u32 get_pat_flags(struct vcpu *v,
+                  u32 gl1e_flags,
+                  paddr_t gpaddr,
+                  paddr_t spaddr)
+{
+    u8 guest_eff_mm_type;
+    u8 shadow_mtrr_type;
+    u8 pat_entry_value;
+    u64 pat = v->arch.hvm_vcpu.pat_cr;
+    struct mtrr_state *g = &v->arch.hvm_vcpu.mtrr;
+
+    /* 1. Get the effective memory type of guest physical address,
+     * with the pair of guest MTRR and PAT
+     */
+    guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, gl1e_flags);
+    /* 2. Get the memory type of host physical address, with MTRR */
+    shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr);
+
+    /* 3. Find the memory type in PAT, with host MTRR memory type
+     * and guest effective memory type.
+     */
+    pat_entry_value = mtrr_epat_tbl[shadow_mtrr_type][guest_eff_mm_type];
+    /* If conflit occurs(e.g host MTRR is UC, guest memory type is
+     * WB),set UC as effective memory. Here, returning PAT_TYPE_UNCACHABLE will
+     * always set effective memory as UC.
+     */
+    if ( pat_entry_value == INVALID_MEM_TYPE )
+    {
+        gdprintk(XENLOG_WARNING,
+                 "Conflict occurs for a given guest l1e flags:%x "
+                 "at %"PRIx64" (the effective mm type:%d), "
+                 "because the host mtrr type is:%d\n",
+                 gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
+                 shadow_mtrr_type);
+        pat_entry_value = PAT_TYPE_UNCACHABLE;
+    }
+    /* 4. Get the pte flags */
+    return pat_type_2_pte_flags(pat_entry_value);
+}
+
+/* Helper funtions for seting mtrr/pat */
+bool pat_msr_set(u64 *pat, u64 msr_content)
+{
+    u8 *value = (u8*)&msr_content;
+    int i;
+
+    if ( *pat != msr_content )
+    {
+        for ( i = 0; i < 8; i++ )
+            if ( unlikely(!(value[i] == 0 || value[i] == 1 ||
+                            value[i] == 4 || value[i] == 5 ||
+                            value[i] == 6 || value[i] == 7)) )
+                return 0;
+
+        *pat = msr_content;
+    }
+
+    return 1;
+}
+
+bool mtrr_def_type_msr_set(struct mtrr_state *m, u64 msr_content)
+{
+    u8 def_type = msr_content & 0xff;
+    u8 enabled = (msr_content >> 10) & 0x3;
+
+    if ( unlikely(!(def_type == 0 || def_type == 1 || def_type == 4 ||
+                    def_type == 5 || def_type == 6)) )
+    {
+         HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid MTRR def type:%x\n", def_type);
+         return 0;
+    }
+
+    if ( unlikely(msr_content && (msr_content & ~0xcffUL)) )
+    {
+         HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
+                     msr_content);
+         return 0;
+    }
+
+    m->enabled = enabled;
+    m->def_type = def_type;
+
+    return 1;
+}
+
+bool mtrr_fix_range_msr_set(struct mtrr_state *m, int row, u64 msr_content)
+{
+    u64 *fixed_range_base = (u64 *)m->fixed_ranges;
+
+    if ( fixed_range_base[row] != msr_content )
+    {
+        u8 *range = (u8*)&msr_content;
+        int i, type;
+
+        for ( i = 0; i < 8; i++ )
+        {
+            type = range[i];
+            if ( unlikely(!(type == 0 || type == 1 ||
+                            type == 4 || type == 5 || type == 6)) )
+                return 0;
+        }
+
+        fixed_range_base[row] = msr_content;
+    }
+
+    return 1;
+}
+
+bool mtrr_var_range_msr_set(struct mtrr_state *m, u32 msr, u64 msr_content)
+{
+    u32 index;
+    u64 msr_mask;
+    u64 *var_range_base = (u64*)m->var_ranges;
+
+    index = msr - MSR_IA32_MTRR_PHYSBASE0;
+
+    if ( var_range_base[index] != msr_content )
+    {
+        u32 type = msr_content & 0xff;
+
+        msr_mask = (index & 1) ? phys_mask_msr_mask : phys_base_msr_mask;
+
+        if ( unlikely(!(type == 0 || type == 1 ||
+                        type == 4 || type == 5 || type == 6)) )
+            return 0;
+
+        if ( unlikely(msr_content && (msr_content & msr_mask)) )
+        {
+            HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
+                        msr_content);
+            return 0;
+        }
+
+        var_range_base[index] = msr_content;
+    }
+
+    m->overlapped = is_var_mtrr_overlapped(m);
+
+    return 1;
+}
+
+bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs)
+{
+    struct mtrr_state *md = &vd->arch.hvm_vcpu.mtrr;
+    struct mtrr_state *ms = &vs->arch.hvm_vcpu.mtrr;
+    int res;
+    u8 num_var_ranges = (u8)md->mtrr_cap;
+
+    /* Test fixed ranges. */
+    res = memcmp(md->fixed_ranges, ms->fixed_ranges,
+            NUM_FIXED_RANGES*sizeof(mtrr_type));
+    if ( res )
+        return 1;
+
+    /* Test var ranges. */
+    res = memcmp(md->var_ranges, ms->var_ranges,
+            num_var_ranges*sizeof(struct mtrr_var_range));
+    if ( res )
+        return 1;
+
+    /* Test default type MSR. */
+    if ( (md->def_type != ms->def_type)
+            && (md->enabled != ms->enabled) )
+        return 1;
+
+    /* Test PAT. */
+    if ( vd->arch.hvm_vcpu.pat_cr != vs->arch.hvm_vcpu.pat_cr )
+        return 1;
+
+    return 0;
+}
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Mon Oct 22 14:19:22 2007 +0100
@@ -756,6 +756,11 @@ void vm_resume_fail(unsigned long eflags
     domain_crash_synchronous();
 }
 
+static void flush_cache(void *info)
+{
+    wbinvd();
+}
+
 void vmx_do_resume(struct vcpu *v)
 {
     bool_t debug_state;
@@ -767,6 +772,18 @@ void vmx_do_resume(struct vcpu *v)
     }
     else
     {
+        /* For pass-through domain, guest PCI-E device driver may leverage the
+         * "Non-Snoop" I/O, and explicitly "WBINVD" or "CFLUSH" to a RAM space.
+         * In that case, if migration occurs before "WBINVD" or "CFLUSH", need
+         * to maintain data consistency.
+         */
+        if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) )
+        {
+            int cpu = v->arch.hvm_vmx.active_cpu;
+            if ( cpu != -1 )
+                on_selected_cpus(cpumask_of_cpu(cpu), flush_cache, NULL, 1, 1);
+        }
+
         vmx_clear_vmcs(v);
         vmx_load_vmcs(v);
         hvm_migrate_timers(v);
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Oct 22 14:19:22 2007 +0100
@@ -50,6 +50,7 @@
 #include <asm/hvm/vpt.h>
 #include <public/hvm/save.h>
 #include <asm/hvm/trace.h>
+#include <stdbool.h>
 
 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
 
@@ -2285,6 +2286,9 @@ static int vmx_do_msr_read(struct cpu_us
     u64 msr_content = 0;
     u32 ecx = regs->ecx, eax, edx;
     struct vcpu *v = current;
+    int index;
+    u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges;
+    u64 *fixed_range_base =  (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges;
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
 
@@ -2304,6 +2308,32 @@ static int vmx_do_msr_read(struct cpu_us
         break;
     case MSR_IA32_APICBASE:
         msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        break;
+    case MSR_IA32_CR_PAT:
+        msr_content = v->arch.hvm_vcpu.pat_cr;
+        break;
+    case MSR_MTRRcap:
+        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
+        break;
+    case MSR_MTRRdefType:
+        msr_content = v->arch.hvm_vcpu.mtrr.def_type
+                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
+        break;
+    case MSR_MTRRfix64K_00000:
+        msr_content = fixed_range_base[0];
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        index = regs->ecx - MSR_MTRRfix16K_80000;
+        msr_content = fixed_range_base[index + 1];
+        break;
+    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        index = regs->ecx - MSR_MTRRfix4K_C0000;
+        msr_content = fixed_range_base[index + 3];
+        break;
+    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
+        msr_content = var_range_base[index];
         break;
     case MSR_IA32_DEBUGCTLMSR:
         if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 )
@@ -2428,11 +2458,19 @@ void vmx_vlapic_msr_changed(struct vcpu 
     vmx_vmcs_exit(v);
 }
 
+extern bool mtrr_var_range_msr_set(struct mtrr_state *v,
+        u32 msr, u64 msr_content);
+extern bool mtrr_fix_range_msr_set(struct mtrr_state *v,
+        int row, u64 msr_content);
+extern bool mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
+extern bool pat_msr_set(u64 *pat, u64 msr);
+
 static int vmx_do_msr_write(struct cpu_user_regs *regs)
 {
     u32 ecx = regs->ecx;
     u64 msr_content;
     struct vcpu *v = current;
+    int index;
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
                 ecx, (u32)regs->eax, (u32)regs->edx);
@@ -2459,6 +2497,38 @@ static int vmx_do_msr_write(struct cpu_u
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
+    case MSR_IA32_CR_PAT:
+        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
+           goto gp_fault;
+        break;
+    case MSR_MTRRdefType:
+        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
+           goto gp_fault;
+        break;
+    case MSR_MTRRfix64K_00000:
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     index, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     index, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     regs->ecx, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_MTRRcap:
+        goto gp_fault;
     case MSR_IA32_DEBUGCTLMSR: {
         int i, rc = 0;
 
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/mm.c Mon Oct 22 14:19:22 2007 +0100
@@ -3115,6 +3115,15 @@ long arch_memory_op(int op, XEN_GUEST_HA
         case XENMAPSPACE_shared_info:
             if ( xatp.idx == 0 )
                 mfn = virt_to_mfn(d->shared_info);
+            /* XXX: assumption here, this is called after E820 table is build
+             * need the E820 to initialize MTRR.
+             */
+            if ( is_hvm_domain(d) ) {
+                extern void init_mtrr_in_hyper(struct vcpu *);
+                struct vcpu *vs;
+                for_each_vcpu(d, vs)
+                    init_mtrr_in_hyper(vs);
+            }
             break;
         case XENMAPSPACE_grant_table:
             spin_lock(&d->grant_table->lock);
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Oct 22 14:19:22 2007 +0100
@@ -880,6 +880,14 @@ static void shadow_blow_tables(struct do
     flush_tlb_mask(d->domain_dirty_cpumask);
 }
 
+void shadow_blow_tables_per_domain(struct domain *d)
+{
+    if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL ) {
+        shadow_lock(d);
+        shadow_blow_tables(d);
+        shadow_unlock(d);
+    }
+}
 
 #ifndef NDEBUG
 /* Blow all shadows of all shadowed domains: this can be used to cause the
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Oct 22 14:19:22 2007 +0100
@@ -33,6 +33,7 @@
 #include <asm/shadow.h>
 #include <asm/flushtlb.h>
 #include <asm/hvm/hvm.h>
+#include <asm/mtrr.h>
 #include "private.h"
 #include "types.h"
 
@@ -267,6 +268,11 @@ guest_walk_tables(struct vcpu *v, unsign
          * us reflect l2 changes later without touching the l1s. */
         int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
                      _PAGE_ACCESSED|_PAGE_DIRTY);
+        /* propagate PWT PCD to level 1 for PSE */
+        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
+            flags |= _PAGE_PWT;
+        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
+            flags |= _PAGE_PCD;
         /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
          * of the level 1 */
         if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
@@ -614,7 +620,12 @@ shadow_l4_index(mfn_t *smfn, u32 guest_i
 
 #endif // GUEST_PAGING_LEVELS >= 4
 
-
+extern u32 get_pat_flags(struct vcpu *v,
+                  u32 gl1e_flags,
+                  paddr_t gpaddr,
+                  paddr_t spaddr);
+
+unsigned char pat_type_2_pte_flags(unsigned char pat_type);
 /**************************************************************************/
 /* Function which computes shadow entries from their corresponding guest
  * entries.  This is the "heart" of the shadow code. It operates using
@@ -703,6 +714,17 @@ _sh_propagate(struct vcpu *v,
         pass_thru_flags |= _PAGE_NX_BIT;
     sflags = gflags & pass_thru_flags;
 
+    /* Only change memory caching type for pass-through domain */
+    if ( (level == 1) && !list_empty(&(domain_hvm_iommu(d)->pdev_list)) ) {
+        if ( v->domain->arch.hvm_domain.is_in_uc_mode )
+            sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
+        else
+            sflags |= get_pat_flags(v,
+                                    gflags,
+                                    guest_l1e_get_paddr(*gp),
+                                    mfn_x(target_mfn) << PAGE_SHIFT);
+    }
+
     // Set the A&D bits for higher level shadows.
     // Higher level entries do not, strictly speaking, have dirty bits, but
     // since we use shadow linear tables, each of these entries may, at some
@@ -773,10 +795,6 @@ _sh_propagate(struct vcpu *v,
     {
         sflags |= _PAGE_USER;
     }
-
-    /* MMIO addresses should never be cached */
-    if ( p2m_is_mmio(p2mt) )
-        sflags |= _PAGE_PCD;
 
     *sp = shadow_l1e_from_mfn(target_mfn, sflags);
 
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h  Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/cpufeature.h  Mon Oct 22 14:19:22 2007 +0100
@@ -128,6 +128,7 @@
 #define cpu_has_tsc            boot_cpu_has(X86_FEATURE_TSC)
 #define cpu_has_pae            boot_cpu_has(X86_FEATURE_PAE)
 #define cpu_has_pge            boot_cpu_has(X86_FEATURE_PGE)
+#define cpu_has_pat            boot_cpu_has(X86_FEATURE_PAT)
 #define cpu_has_apic           boot_cpu_has(X86_FEATURE_APIC)
 #define cpu_has_sep            boot_cpu_has(X86_FEATURE_SEP)
 #define cpu_has_mtrr           boot_cpu_has(X86_FEATURE_MTRR)
@@ -152,6 +153,7 @@
 #define cpu_has_tsc            1
 #define cpu_has_pae            1
 #define cpu_has_pge            1
+#define cpu_has_pat            1
 #define cpu_has_apic           boot_cpu_has(X86_FEATURE_APIC)
 #define cpu_has_sep            0
 #define cpu_has_mtrr           1
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h  Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/hvm/domain.h  Mon Oct 22 14:19:22 2007 +0100
@@ -61,6 +61,12 @@ struct hvm_domain {
 
     unsigned long          vmx_apic_access_mfn;
 
+    /* If one of vcpus of this domain is in no_fill_mode or
+     * mtrr/pat between vcpus is not the same, set is_in_uc_mode
+     */
+    spinlock_t       uc_lock;
+    bool_t           is_in_uc_mode;
+
     /* Pass-through */
     struct hvm_iommu       hvm_iommu;
 };
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/hvm/support.h Mon Oct 22 14:19:22 2007 +0100
@@ -64,6 +64,7 @@ static inline vcpu_iodata_t *get_ioreq(s
 #define DBG_LEVEL_VLAPIC_INTERRUPT  (1 << 8)
 #define DBG_LEVEL_IOAPIC            (1 << 9)
 #define DBG_LEVEL_HCALL             (1 << 10)
+#define DBG_LEVEL_MSR               (1 << 11)
 
 extern unsigned int opt_hvm_debug_level;
 #define HVM_DBG_LOG(level, _f, _a...)                                         \
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/hvm/vcpu.h    Mon Oct 22 14:19:22 2007 +0100
@@ -24,6 +24,7 @@
 #include <asm/hvm/vlapic.h>
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/svm/vmcb.h>
+#include <asm/mtrr.h>
 
 #define HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM          0
 #define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI     1
@@ -62,6 +63,12 @@ struct hvm_vcpu {
         struct arch_vmx_struct vmx;
         struct arch_svm_struct svm;
     } u;
+
+    struct mtrr_state   mtrr;
+    u64                 pat_cr;
+
+    /* Which cache mode is this VCPU in (CR0:CD/NW)? */
+    u8                  cache_mode;
 };
 
 #define ARCH_HVM_IO_WAIT         1   /* Waiting for I/O completion */
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/msr-index.h   Mon Oct 22 14:19:22 2007 +0100
@@ -68,6 +68,25 @@
 #define MSR_IA32_LASTBRANCHTOIP                0x000001dc
 #define MSR_IA32_LASTINTFROMIP         0x000001dd
 #define MSR_IA32_LASTINTTOIP           0x000001de
+ 
+#define MSR_IA32_MTRR_PHYSBASE0     0x00000200
+#define MSR_IA32_MTRR_PHYSMASK0     0x00000201
+#define MSR_IA32_MTRR_PHYSBASE1     0x00000202
+#define MSR_IA32_MTRR_PHYSMASK1     0x00000203
+#define MSR_IA32_MTRR_PHYSBASE2     0x00000204
+#define MSR_IA32_MTRR_PHYSMASK2     0x00000205
+#define MSR_IA32_MTRR_PHYSBASE3     0x00000206
+#define MSR_IA32_MTRR_PHYSMASK3     0x00000207
+#define MSR_IA32_MTRR_PHYSBASE4     0x00000208
+#define MSR_IA32_MTRR_PHYSMASK4     0x00000209
+#define MSR_IA32_MTRR_PHYSBASE5     0x0000020a
+#define MSR_IA32_MTRR_PHYSMASK5     0x0000020b
+#define MSR_IA32_MTRR_PHYSBASE6     0x0000020c
+#define MSR_IA32_MTRR_PHYSMASK6     0x0000020d
+#define MSR_IA32_MTRR_PHYSBASE7     0x0000020e
+#define MSR_IA32_MTRR_PHYSMASK7     0x0000020f
+
+#define MSR_IA32_CR_PAT             0x00000277
 
 #define MSR_IA32_MC0_CTL               0x00000400
 #define MSR_IA32_MC0_STATUS            0x00000401
diff -r 42d8dadb5864 -r 3e7c86602c70 xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h        Mon Oct 22 13:04:32 2007 +0100
+++ b/xen/include/asm-x86/mtrr.h        Mon Oct 22 14:19:22 2007 +0100
@@ -10,6 +10,55 @@
 #define MTRR_TYPE_WRPROT     5
 #define MTRR_TYPE_WRBACK     6
 #define MTRR_NUM_TYPES       7
+#define MEMORY_NUM_TYPES     MTRR_NUM_TYPES
+
+#define MTRR_PHYSMASK_VALID_BIT  11
+#define MTRR_PHYSMASK_SHIFT      12
+
+#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
+#define MTRR_PHYSBASE_SHIFT      12
+#define MTRR_VCNT            8
+
+#define NORMAL_CACHE_MODE          0
+#define NO_FILL_CACHE_MODE         2
+
+enum {
+    PAT_TYPE_UNCACHABLE=0,
+    PAT_TYPE_WRCOMB=1,
+    PAT_TYPE_RESERVED=2,
+    PAT_TYPE_WRTHROUGH=4,
+    PAT_TYPE_WRPROT=5,
+    PAT_TYPE_WRBACK=6,
+    PAT_TYPE_UC_MINUS=7,
+    PAT_TYPE_NUMS
+};
+
+#define INVALID_MEM_TYPE PAT_TYPE_NUMS
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+struct mtrr_var_range {
+       u32 base_lo;
+       u32 base_hi;
+       u32 mask_lo;
+       u32 mask_hi;
+};
+
+#define NUM_FIXED_RANGES 88
+struct mtrr_state {
+       struct mtrr_var_range *var_ranges;
+       mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+       unsigned char enabled;
+       unsigned char have_fixed;
+       mtrr_type def_type;
+
+       u64       mtrr_cap;
+       /* ranges in var MSRs are overlapped or not:0(no overlapped) */
+       bool_t    overlapped;
+       bool_t    is_initialized;
+};
 
 extern void mtrr_save_fixed_ranges(void *);
 extern void mtrr_save_state(void);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] hvm/x86: MTRR/PAT virtualisation., Xen patchbot-unstable <=