WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Merge

# HG changeset patch
# User Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
# Date 1278413680 -3600
# Node ID db6234d3eafbd2d7b1469d8b98a13d6ab0b89973
# Parent  ce278fdaced3ff898651657fda848c2b4daee648
# Parent  9d965ac1b0dbcb2f1fd4845e30753251d68d064f
Merge
---
 xen/arch/x86/cpu/amd.h                 |  103 -----------
 xen/arch/x86/hvm/vmx/vpmu.c            |  119 -------------
 tools/blktap2/drivers/tapdisk-vbd.c    |    2 
 xen/Rules.mk                           |    2 
 xen/arch/x86/Makefile                  |    2 
 xen/arch/x86/apic.c                    |  198 +++++++++++++++++++---
 xen/arch/x86/cpu/amd.c                 |   50 +++++
 xen/arch/x86/domctl.c                  |    9 -
 xen/arch/x86/genapic/x2apic.c          |   19 ++
 xen/arch/x86/hvm/mtrr.c                |    2 
 xen/arch/x86/hvm/svm/asid.c            |    4 
 xen/arch/x86/hvm/svm/svm.c             |    7 
 xen/arch/x86/hvm/vmx/vmcs.c            |    4 
 xen/arch/x86/hvm/vmx/vmx.c             |    5 
 xen/arch/x86/i8259.c                   |   20 ++
 xen/arch/x86/io_apic.c                 |  120 +++++++++++++
 xen/arch/x86/mm/hap/p2m-ept.c          |  297 ++++++++++++++++++---------------
 xen/arch/x86/setup.c                   |    9 -
 xen/common/memory.c                    |    2 
 xen/common/page_alloc.c                |    8 
 xen/common/trace.c                     |  216 +++++++++++++++---------
 xen/drivers/passthrough/vtd/dmar.c     |   20 +-
 xen/drivers/passthrough/vtd/dmar.h     |    1 
 xen/drivers/passthrough/vtd/extern.h   |    3 
 xen/drivers/passthrough/vtd/intremap.c |  125 ++++++++++++-
 xen/drivers/passthrough/vtd/iommu.c    |   54 ++----
 xen/drivers/passthrough/vtd/qinval.c   |   19 +-
 xen/drivers/passthrough/vtd/vtd.h      |    3 
 xen/include/asm-x86/amd.h              |  138 +++++++++++++++
 xen/include/asm-x86/apic.h             |    2 
 xen/include/asm-x86/debugger.h         |    2 
 xen/include/asm-x86/domain.h           |    2 
 xen/include/asm-x86/genapic.h          |    1 
 xen/include/asm-x86/io_apic.h          |    6 
 xen/include/asm-x86/irq.h              |    2 
 xen/include/asm-x86/msr-index.h        |    4 
 xen/include/asm-x86/mtrr.h             |    2 
 xen/include/public/io/ring.h           |   15 +
 xen/include/public/memory.h            |    3 
 xen/include/xen/iommu.h                |    2 
 xen/include/xen/mm.h                   |    2 
 xen/include/xen/trace.h                |   14 -
 42 files changed, 1073 insertions(+), 545 deletions(-)

diff -r ce278fdaced3 -r db6234d3eafb tools/blktap2/drivers/tapdisk-vbd.c
--- a/tools/blktap2/drivers/tapdisk-vbd.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/tools/blktap2/drivers/tapdisk-vbd.c       Tue Jul 06 11:54:40 2010 +0100
@@ -1684,7 +1684,7 @@ tapdisk_vbd_check_ring_message(td_vbd_t 
        if (!vbd->ring.sring)
                return -EINVAL;
 
-       switch (vbd->ring.sring->pad[0]) {
+       switch (vbd->ring.sring->private.tapif_user.msg) {
        case 0:
                return 0;
 
diff -r ce278fdaced3 -r db6234d3eafb xen/Rules.mk
--- a/xen/Rules.mk      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/Rules.mk      Tue Jul 06 11:54:40 2010 +0100
@@ -8,7 +8,6 @@ perfc_arrays  ?= n
 perfc_arrays  ?= n
 lock_profile  ?= n
 crash_debug   ?= n
-gdbsx         ?= n
 frame_pointer ?= n
 
 XEN_ROOT=$(BASEDIR)/..
@@ -53,7 +52,6 @@ CFLAGS-$(perfc_arrays)  += -DPERF_ARRAYS
 CFLAGS-$(perfc_arrays)  += -DPERF_ARRAYS
 CFLAGS-$(lock_profile)  += -DLOCK_PROFILE
 CFLAGS-$(frame_pointer) += -fno-omit-frame-pointer -DCONFIG_FRAME_POINTER
-CFLAGS-$(gdbsx)         += -DXEN_GDBSX_CONFIG
 
 ifneq ($(max_phys_cpus),)
 CFLAGS-y                += -DMAX_PHYS_CPUS=$(max_phys_cpus)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/Makefile     Tue Jul 06 11:54:40 2010 +0100
@@ -13,6 +13,7 @@ obj-y += clear_page.o
 obj-y += clear_page.o
 obj-y += copy_page.o
 obj-y += compat.o
+obj-y += debug.o
 obj-y += delay.o
 obj-y += dmi_scan.o
 obj-y += domctl.o
@@ -57,7 +58,6 @@ obj-y += bzimage.o
 obj-y += bzimage.o
 
 obj-$(crash_debug) += gdbstub.o
-obj-$(gdbsx) += debug.o
 
 x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
 
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/apic.c       Tue Jul 06 11:54:40 2010 +0100
@@ -70,6 +70,9 @@ int x2apic_enabled __read_mostly = 0;
 int x2apic_enabled __read_mostly = 0;
 int directed_eoi_enabled __read_mostly = 0;
 
+/* x2APIC is enabled in BIOS */
+static int x2apic_preenabled;
+
 /*
  * The following vectors are part of the Linux architecture, there
  * is no hardware IRQ pin equivalent for them, they are triggered
@@ -487,6 +490,47 @@ static void apic_pm_activate(void)
 static void apic_pm_activate(void)
 {
     apic_pm_state.active = 1;
+}
+
+static void resume_x2apic(void)
+{
+    uint64_t msr_content;
+    struct IO_APIC_route_entry **ioapic_entries = NULL;
+
+    ASSERT(x2apic_enabled);
+
+    ioapic_entries = alloc_ioapic_entries();
+    if ( !ioapic_entries )
+    {
+        printk("Allocate ioapic_entries failed\n");
+        goto out;
+    }
+
+    if ( save_IO_APIC_setup(ioapic_entries) )
+    {
+        printk("Saving IO-APIC state failed\n");
+        goto out;
+    }
+
+    mask_8259A();
+    mask_IO_APIC_setup(ioapic_entries);
+
+    iommu_enable_IR();
+
+    rdmsrl(MSR_IA32_APICBASE, msr_content);
+    if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
+    {
+        msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD;
+        msr_content = (uint32_t)msr_content;
+        wrmsrl(MSR_IA32_APICBASE, msr_content);
+    }
+
+    restore_IO_APIC_setup(ioapic_entries);
+    unmask_8259A();
+
+out:
+    if ( ioapic_entries )
+        free_ioapic_entries(ioapic_entries);
 }
 
 void __devinit setup_local_APIC(void)
@@ -727,7 +771,7 @@ int lapic_resume(void)
             msr_content | MSR_IA32_APICBASE_ENABLE | mp_lapic_addr);
     }
     else
-        enable_x2apic();
+        resume_x2apic();
 
     apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
     apic_write(APIC_ID, apic_pm_state.apic_id);
@@ -894,35 +938,138 @@ no_apic:
     return -1;
 }
 
-void enable_x2apic(void)
+void check_x2apic_preenabled(void)
 {
     uint64_t msr_content;
 
-    if ( smp_processor_id() == 0 )
+    if ( !x2apic_is_available() )
+        return;
+
+    rdmsrl(MSR_IA32_APICBASE, msr_content);
+    if ( msr_content & MSR_IA32_APICBASE_EXTD )
     {
-        if ( !iommu_supports_eim() )
+        printk("x2APIC mode is already enabled by BIOS.\n");
+        x2apic_preenabled = 1;
+        x2apic_enabled = 1;
+    }
+}
+
+static void enable_bsp_x2apic(void)
+{
+    struct IO_APIC_route_entry **ioapic_entries = NULL;
+    const struct genapic *x2apic_genapic = NULL;
+
+    ASSERT(smp_processor_id() == 0);
+
+    if ( x2apic_preenabled )
+    {
+        /*
+         * Interrupt remapping should be also enabled by BIOS when
+         * x2APIC is already enabled by BIOS, otherwise it's a BIOS
+         * bug
+         */
+        if ( !intremap_enabled() )
+            panic("Interrupt remapping is not enabled by BIOS while "
+                  "x2APIC is already enabled by BIOS!\n");
+    }
+
+    x2apic_genapic = apic_x2apic_probe();
+    if ( x2apic_genapic )
+        genapic = x2apic_genapic;
+    else
+    {
+        if ( x2apic_cmdline_disable() )
         {
-            printk("x2APIC would not be enabled without EIM.\n");
-            return;
+            if ( x2apic_preenabled )
+            {
+                /* Ignore x2apic=0, and set default x2apic mode */
+                genapic = &apic_x2apic_cluster;
+                printk("x2APIC: already enabled by BIOS, ignore x2apic=0.\n");
+            }
+            else
+            {
+                printk("Not enable x2APIC due to x2apic=0 is set.\n");
+                return;
+            }
         }
-
-        if ( apic_x2apic_phys.probe() )
-            genapic = &apic_x2apic_phys;
-        else if ( apic_x2apic_cluster.probe() )
-            genapic = &apic_x2apic_cluster;
         else
         {
-            printk("x2APIC would not be enabled due to x2apic=off.\n");
-            return;
+            if ( !iommu_enabled || !iommu_intremap || !iommu_qinval )
+                panic("Cannot enable x2APIC due to iommu or interrupt "
+                      "remapping or queued invalidation is disabled "
+                      "by command line!\n");
+            else
+            {
+                if ( x2apic_preenabled )
+                    panic("x2APIC: already enabled by BIOS, but "
+                          "iommu_supports_eim fails\n");
+                else
+                {
+                    printk("Not enable x2APIC due to "
+                           "iommu_supports_eim fails!\n");
+                    return;
+                }
+            }
         }
-
-        x2apic_enabled = 1;
-        printk("Switched to APIC driver %s.\n", genapic->name);
-    }
-    else
+    }
+
+    ioapic_entries = alloc_ioapic_entries();
+    if ( !ioapic_entries )
     {
-        BUG_ON(!x2apic_enabled); /* APs only enable x2apic when BSP did so. */
-    }
+        printk("Allocate ioapic_entries failed\n");
+        goto out;
+    }
+
+    if ( save_IO_APIC_setup(ioapic_entries) )
+    {
+        printk("Saving IO-APIC state failed\n");
+        goto out;
+    }
+
+    mask_8259A();
+    mask_IO_APIC_setup(ioapic_entries);
+
+    if ( iommu_enable_IR() )
+    {
+        printk("Would not enable x2APIC due to interrupt remapping "
+               "cannot be enabled.\n");
+        goto restore_out;
+    }
+
+    x2apic_enabled = 1;
+    printk("Switched to APIC driver %s.\n", genapic->name);
+
+    if ( !x2apic_preenabled )
+    {
+        uint64_t msr_content;
+        rdmsrl(MSR_IA32_APICBASE, msr_content);
+        if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
+        {
+            msr_content |= MSR_IA32_APICBASE_ENABLE |
+                           MSR_IA32_APICBASE_EXTD;
+            msr_content = (uint32_t)msr_content;
+            wrmsrl(MSR_IA32_APICBASE, msr_content);
+            printk("x2APIC mode enabled.\n");
+        }
+    }
+
+restore_out:
+    restore_IO_APIC_setup(ioapic_entries);
+    unmask_8259A();
+
+out:
+    if ( ioapic_entries )
+        free_ioapic_entries(ioapic_entries);
+}
+
+static void enable_ap_x2apic(void)
+{
+    uint64_t msr_content;
+
+    ASSERT(smp_processor_id() != 0);
+
+    /* APs only enable x2apic when BSP did so. */
+    BUG_ON(!x2apic_enabled);
 
     rdmsrl(MSR_IA32_APICBASE, msr_content);
     if ( !(msr_content & MSR_IA32_APICBASE_EXTD) )
@@ -930,10 +1077,15 @@ void enable_x2apic(void)
         msr_content |= MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD;
         msr_content = (uint32_t)msr_content;
         wrmsrl(MSR_IA32_APICBASE, msr_content);
-        printk("x2APIC mode enabled.\n");
-    }
+    }
+}
+
+void enable_x2apic(void)
+{
+    if ( smp_processor_id() == 0 )
+        enable_bsp_x2apic();
     else
-        printk("x2APIC mode enabled by BIOS.\n");
+        enable_ap_x2apic();
 }
 
 void __init init_apic_mappings(void)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/cpu/amd.c    Tue Jul 06 11:54:40 2010 +0100
@@ -7,12 +7,12 @@
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
+#include <asm/amd.h>
 #include <asm/hvm/support.h>
 #include <asm/setup.h> /* amd_init_cpu */
 #include <asm/acpi.h>
 
 #include "cpu.h"
-#include "amd.h"
 
 /*
  * Pre-canned values for overriding the CPUID features 
@@ -148,6 +148,54 @@ static void __devinit set_cpuidmask(cons
 }
 
 /*
+ * Check for the presence of an AMD erratum. Arguments are defined in amd.h 
+ * for each known erratum. Return 1 if erratum is found.
+ */
+int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw, ...) 
+{
+       va_list ap;
+       u32 range;
+       u32 ms;
+       
+       if (cpu->x86_vendor != X86_VENDOR_AMD)
+               return 0;
+
+       va_start(ap, osvw);
+
+       if (osvw) {
+               u16 osvw_id = va_arg(ap, int);
+
+               if (cpu_has(cpu, X86_FEATURE_OSVW)) {
+                       u64 osvw_len;
+                       rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len);
+
+                       if (osvw_id < osvw_len) {
+                               u64 osvw_bits;
+                               rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6), 
+                                      osvw_bits);
+
+                               va_end(ap);
+                               return (osvw_bits >> (osvw_id & 0x3f)) & 0x01;
+                       }
+               }
+       }
+
+       /* OSVW unavailable or ID unknown, match family-model-stepping range */
+       ms = (cpu->x86_model << 8) | cpu->x86_mask;
+       while ((range = va_arg(ap, int))) {
+               if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+                   (ms >= AMD_MODEL_RANGE_START(range)) &&
+                   (ms <= AMD_MODEL_RANGE_END(range))) {
+                       va_end(ap);
+                       return 1;
+               }
+       }
+
+       va_end(ap);
+       return 0;
+}
+
+/*
  * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush
  * filter on AMD 64-bit processors.
  */
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/cpu/amd.h
--- a/xen/arch/x86/cpu/amd.h    Fri Jul 02 18:04:54 2010 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-/*
- * amd.h - AMD processor specific definitions
- */
-
-#ifndef __AMD_H__
-#define __AMD_H__
-
-#include <asm/cpufeature.h>
-
-/* CPUID masked for use by AMD-V Extended Migration */
-
-#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32)
-#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_))
-
-/* Family 0Fh, Revision C */
-#define AMD_FEATURES_K8_REV_C_ECX  0
-#define AMD_FEATURES_K8_REV_C_EDX (                                    \
-       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
-       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
-       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
-       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
-       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
-       __bit(X86_FEATURE_SEP)      | __bit(X86_FEATURE_MTRR)  |        \
-       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
-       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
-       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_CLFLSH)|        \
-       __bit(X86_FEATURE_MMX)      | __bit(X86_FEATURE_FXSR)  |        \
-       __bit(X86_FEATURE_XMM)      | __bit(X86_FEATURE_XMM2))
-#define AMD_EXTFEATURES_K8_REV_C_ECX  0 
-#define AMD_EXTFEATURES_K8_REV_C_EDX  (                                        
\
-       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
-       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
-       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
-       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
-       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
-       __bit(X86_FEATURE_SYSCALL)  | __bit(X86_FEATURE_MTRR)  |        \
-       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
-       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
-       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_NX)    |        \
-       __bit(X86_FEATURE_MMXEXT)   | __bit(X86_FEATURE_MMX)   |        \
-       __bit(X86_FEATURE_FXSR)     | __bit(X86_FEATURE_LM)    |        \
-       __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW))
-
-/* Family 0Fh, Revision D */
-#define AMD_FEATURES_K8_REV_D_ECX         AMD_FEATURES_K8_REV_C_ECX
-#define AMD_FEATURES_K8_REV_D_EDX         AMD_FEATURES_K8_REV_C_EDX
-#define AMD_EXTFEATURES_K8_REV_D_ECX     (AMD_EXTFEATURES_K8_REV_C_ECX |\
-       __bit(X86_FEATURE_LAHF_LM))
-#define AMD_EXTFEATURES_K8_REV_D_EDX     (AMD_EXTFEATURES_K8_REV_C_EDX |\
-       __bit(X86_FEATURE_FFXSR))
-
-/* Family 0Fh, Revision E */
-#define AMD_FEATURES_K8_REV_E_ECX        (AMD_FEATURES_K8_REV_D_ECX |  \
-       __bit(X86_FEATURE_XMM3))
-#define AMD_FEATURES_K8_REV_E_EDX        (AMD_FEATURES_K8_REV_D_EDX |  \
-       __bit(X86_FEATURE_HT))
-#define AMD_EXTFEATURES_K8_REV_E_ECX     (AMD_EXTFEATURES_K8_REV_D_ECX |\
-       __bit(X86_FEATURE_CMP_LEGACY)) 
-#define AMD_EXTFEATURES_K8_REV_E_EDX      AMD_EXTFEATURES_K8_REV_D_EDX
-
-/* Family 0Fh, Revision F */
-#define AMD_FEATURES_K8_REV_F_ECX        (AMD_FEATURES_K8_REV_E_ECX |  \
-       __bit(X86_FEATURE_CX16))
-#define AMD_FEATURES_K8_REV_F_EDX         AMD_FEATURES_K8_REV_E_EDX
-#define AMD_EXTFEATURES_K8_REV_F_ECX     (AMD_EXTFEATURES_K8_REV_E_ECX |\
-       __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) |     \
-       __bit(X86_FEATURE_ALTMOVCR))
-#define AMD_EXTFEATURES_K8_REV_F_EDX     (AMD_EXTFEATURES_K8_REV_E_EDX |\
-       __bit(X86_FEATURE_RDTSCP))
-
-/* Family 0Fh, Revision G */
-#define AMD_FEATURES_K8_REV_G_ECX         AMD_FEATURES_K8_REV_F_ECX
-#define AMD_FEATURES_K8_REV_G_EDX         AMD_FEATURES_K8_REV_F_EDX
-#define AMD_EXTFEATURES_K8_REV_G_ECX     (AMD_EXTFEATURES_K8_REV_F_ECX |\
-       __bit(X86_FEATURE_3DNOWPF))
-#define AMD_EXTFEATURES_K8_REV_G_EDX      AMD_EXTFEATURES_K8_REV_F_EDX
-
-/* Family 10h, Revision B */
-#define AMD_FEATURES_FAM10h_REV_B_ECX    (AMD_FEATURES_K8_REV_F_ECX |  \
-       __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT))
-#define AMD_FEATURES_FAM10h_REV_B_EDX     AMD_FEATURES_K8_REV_F_EDX
-#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
-       __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) |             \
-       __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) |      \
-       __bit(X86_FEATURE_IBS))
-#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\
-       __bit(X86_FEATURE_PAGE1GB))
-
-/* Family 10h, Revision C */
-#define AMD_FEATURES_FAM10h_REV_C_ECX     AMD_FEATURES_FAM10h_REV_B_ECX
-#define AMD_FEATURES_FAM10h_REV_C_EDX     AMD_FEATURES_FAM10h_REV_B_EDX
-#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\
-       __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT))
-#define AMD_EXTFEATURES_FAM10h_REV_C_EDX  AMD_EXTFEATURES_FAM10h_REV_B_EDX
-
-/* Family 11h, Revision B */
-#define AMD_FEATURES_FAM11h_REV_B_ECX     AMD_FEATURES_K8_REV_G_ECX
-#define AMD_FEATURES_FAM11h_REV_B_EDX     AMD_FEATURES_K8_REV_G_EDX
-#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\
-       __bit(X86_FEATURE_SKINIT))
-#define AMD_EXTFEATURES_FAM11h_REV_B_EDX  AMD_EXTFEATURES_K8_REV_G_EDX
-
-#endif /* __AMD_H__ */
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/domctl.c     Tue Jul 06 11:54:40 2010 +0100
@@ -34,7 +34,6 @@
 #include <public/mem_event.h>
 #include <asm/mem_sharing.h>
 
-#ifdef XEN_GDBSX_CONFIG                    
 #ifdef XEN_KDB_CONFIG
 #include "../kdb/include/kdbdefs.h"
 #include "../kdb/include/kdbproto.h"
@@ -43,8 +42,9 @@ typedef unsigned char kdbbyt_t;
 typedef unsigned char kdbbyt_t;
 extern int dbg_rw_mem(kdbva_t, kdbbyt_t *, int, domid_t, int, uint64_t);
 #endif
-static int 
-gdbsx_guest_mem_io(domid_t domid, struct xen_domctl_gdbsx_memio *iop)
+
+static int gdbsx_guest_mem_io(
+    domid_t domid, struct xen_domctl_gdbsx_memio *iop)
 {   
     ulong l_uva = (ulong)iop->uva;
     iop->remain = dbg_rw_mem(
@@ -52,7 +52,6 @@ gdbsx_guest_mem_io(domid_t domid, struct
         iop->gwr, iop->pgd3val);
     return (iop->remain ? -EFAULT : 0);
 }
-#endif  /* XEN_GDBSX_CONFIG */
 
 long arch_do_domctl(
     struct xen_domctl *domctl,
@@ -1309,7 +1308,6 @@ long arch_do_domctl(
     }
     break;
 
-#ifdef XEN_GDBSX_CONFIG
     case XEN_DOMCTL_gdbsx_guestmemio:
     {
         struct domain *d;
@@ -1418,7 +1416,6 @@ long arch_do_domctl(
         rcu_unlock_domain(d);
     }
     break;
-#endif /* XEN_GDBSX_CONFIG */
 
 #ifdef __x86_64__
     case XEN_DOMCTL_mem_event_op:
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/genapic/x2apic.c
--- a/xen/arch/x86/genapic/x2apic.c     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/genapic/x2apic.c     Tue Jul 06 11:54:40 2010 +0100
@@ -33,6 +33,11 @@ static int  x2apic_phys; /* By default w
 static int  x2apic_phys; /* By default we use logical cluster mode. */
 boolean_param("x2apic_phys", x2apic_phys);
 
+int x2apic_cmdline_disable(void)
+{
+    return (x2apic == 0);
+}
+
 static int probe_x2apic_phys(void)
 {
     return x2apic && x2apic_phys && x2apic_is_available() &&
@@ -54,6 +59,20 @@ const struct genapic apic_x2apic_cluster
     APIC_INIT("x2apic_cluster", probe_x2apic_cluster),
     GENAPIC_X2APIC_CLUSTER
 };
+
+const struct genapic *apic_x2apic_probe(void)
+{
+    if ( !x2apic || !x2apic_is_available() )
+        return NULL;
+
+    if ( !iommu_supports_eim() )
+        return NULL;
+
+    if ( x2apic_phys )
+        return &apic_x2apic_phys;
+    else
+        return &apic_x2apic_cluster;
+}
 
 void init_apic_ldr_x2apic_phys(void)
 {
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/mtrr.c   Tue Jul 06 11:54:40 2010 +0100
@@ -707,7 +707,7 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
                           1, HVMSR_PER_VCPU);
 
 uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
-                           uint8_t *ipat, int direct_mmio)
+                           uint8_t *ipat, bool_t direct_mmio)
 {
     uint8_t gmtrr_mtype, hmtrr_mtype;
     uint32_t type;
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/asid.c
--- a/xen/arch/x86/hvm/svm/asid.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/svm/asid.c       Tue Jul 06 11:54:40 2010 +0100
@@ -21,14 +21,14 @@
 #include <xen/lib.h>
 #include <xen/perfc.h>
 #include <asm/hvm/svm/asid.h>
+#include <asm/amd.h>
 
 void svm_asid_init(struct cpuinfo_x86 *c)
 {
     int nasids = 0;
 
     /* Check for erratum #170, and leave ASIDs disabled if it's present. */
-    if ( (c->x86 == 0x10) ||
-         ((c->x86 == 0xf) && (c->x86_model >= 0x68) && (c->x86_mask >= 1)) )
+    if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_170) )
         nasids = cpuid_ebx(0x8000000A);
 
     hvm_asid_init(nasids);
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c        Tue Jul 06 11:54:40 2010 +0100
@@ -34,6 +34,7 @@
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
+#include <asm/amd.h>
 #include <asm/types.h>
 #include <asm/debugreg.h>
 #include <asm/msr.h>
@@ -846,8 +847,8 @@ static void svm_init_erratum_383(struct 
 {
     uint64_t msr_content;
 
-    /* only family 10h is affected */
-    if ( c->x86 != 0x10 )
+    /* check whether CPU is affected */
+    if ( !cpu_has_amd_erratum(c, AMD_ERRATUM_383) )
         return;
 
     /* use safe methods to be compatible with nested virtualization */
@@ -1492,9 +1493,7 @@ asmlinkage void svm_vmexit_handler(struc
         if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 )
             break;
         __update_guest_eip(regs, inst_len);
-#ifdef XEN_GDBSX_CONFIG
         current->arch.gdbsx_vcpu_event = TRAP_int3;
-#endif
         domain_pause_for_debugger();
         break;
 
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Tue Jul 06 11:54:40 2010 +0100
@@ -1064,8 +1064,10 @@ void vmx_do_resume(struct vcpu *v)
          *  1: flushing cache (wbinvd) when the guest is scheduled out if
          *     there is no wbinvd exit, or
          *  2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
+         * If VT-d engine can force snooping, we don't need to do these.
          */
-        if ( has_arch_pdevs(v->domain) && !cpu_has_wbinvd_exiting )
+        if ( has_arch_pdevs(v->domain) && !iommu_snoop
+                && !cpu_has_wbinvd_exiting )
         {
             int cpu = v->arch.hvm_vmx.active_cpu;
             if ( cpu != -1 )
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue Jul 06 11:54:40 2010 +0100
@@ -2089,6 +2089,9 @@ static void vmx_wbinvd_intercept(void)
     if ( !has_arch_mmios(current->domain) )
         return;
 
+    if ( iommu_snoop )
+        return;
+
     if ( cpu_has_wbinvd_exiting )
         on_each_cpu(wbinvd_ipi, NULL, 1);
     else
@@ -2406,9 +2409,7 @@ asmlinkage void vmx_vmexit_handler(struc
                 goto exit_and_crash;
             inst_len = __get_instruction_length(); /* Safe: INT3 */
             __update_guest_eip(inst_len);
-#ifdef XEN_GDBSX_CONFIG
             current->arch.gdbsx_vcpu_event = TRAP_int3;
-#endif
             domain_pause_for_debugger();
             break;
         case TRAP_no_device:
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/hvm/vmx/vpmu.c
--- a/xen/arch/x86/hvm/vmx/vpmu.c       Fri Jul 02 18:04:54 2010 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-/*
- * vpmu.c: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@xxxxxxxxx>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <asm/regs.h>
-#include <asm/types.h>
-#include <asm/msr.h>
-#include <asm/hvm/support.h>
-#include <asm/hvm/vmx/vmx.h>
-#include <asm/hvm/vmx/vmcs.h>
-#include <public/sched.h>
-#include <public/hvm/save.h>
-#include <asm/hvm/vmx/vpmu.h>
-
-static int __read_mostly opt_vpmu_enabled;
-boolean_param("vpmu", opt_vpmu_enabled);
-
-int vpmu_do_wrmsr(struct cpu_user_regs *regs)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( vpmu->arch_vpmu_ops )
-        return vpmu->arch_vpmu_ops->do_wrmsr(regs);
-    return 0;
-}
-
-int vpmu_do_rdmsr(struct cpu_user_regs *regs)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( vpmu->arch_vpmu_ops )
-        return vpmu->arch_vpmu_ops->do_rdmsr(regs);
-    return 0;
-}
-
-int vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( vpmu->arch_vpmu_ops )
-        return vpmu->arch_vpmu_ops->do_interrupt(regs);
-    return 0;
-}
-
-void vpmu_save(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu->arch_vpmu_ops )
-        vpmu->arch_vpmu_ops->arch_vpmu_save(v);
-}
-
-void vpmu_load(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu->arch_vpmu_ops )
-        vpmu->arch_vpmu_ops->arch_vpmu_load(v);
-}
-
-extern struct arch_vpmu_ops core2_vpmu_ops;
-void vpmu_initialise(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( !opt_vpmu_enabled )
-        return;
-
-    if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
-        vpmu_destroy(v);
-
-    if ( current_cpu_data.x86 == 6 )
-    {
-        switch ( current_cpu_data.x86_model )
-        {
-        case 15:
-        case 23:
-        case 26:
-        case 29:
-            vpmu->arch_vpmu_ops = &core2_vpmu_ops;
-            break;
-        }
-    }
-
-    if ( vpmu->arch_vpmu_ops != NULL )
-    {
-        vpmu->flags = 0;
-        vpmu->context = NULL;
-        vpmu->arch_vpmu_ops->arch_vpmu_initialise(v);
-    }
-}
-
-void vpmu_destroy(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu->arch_vpmu_ops )
-        vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
-}
-
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/i8259.c      Tue Jul 06 11:54:40 2010 +0100
@@ -173,6 +173,26 @@ int i8259A_irq_pending(unsigned int irq)
     spin_unlock_irqrestore(&i8259A_lock, flags);
 
     return ret;
+}
+
+void mask_8259A(void)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    outb(0xff, 0xA1);
+    outb(0xff, 0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    outb(cached_A1, 0xA1);
+    outb(cached_21, 0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
 /*
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/io_apic.c    Tue Jul 06 11:54:40 2010 +0100
@@ -134,6 +134,126 @@ static void __init replace_pin_at_irq(un
             break;
         entry = irq_2_pin + entry->next;
     }
+}
+
+struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+    int apic;
+    struct IO_APIC_route_entry **ioapic_entries;
+
+    ioapic_entries = xmalloc_array(struct IO_APIC_route_entry *, nr_ioapics);
+    if (!ioapic_entries)
+        return 0;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        ioapic_entries[apic] =
+            xmalloc_array(struct IO_APIC_route_entry,
+                          nr_ioapic_registers[apic]);
+        if (!ioapic_entries[apic])
+            goto nomem;
+    }
+
+    return ioapic_entries;
+
+nomem:
+    while (--apic >= 0)
+        xfree(ioapic_entries[apic]);
+    xfree(ioapic_entries);
+
+    return 0;
+}
+
+/*
+ * Saves all the IO-APIC RTE's
+ */
+int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic, pin;
+
+    if (!ioapic_entries)
+        return -ENOMEM;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        if (!ioapic_entries[apic])
+            return -ENOMEM;
+
+        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+            *(((int *)&ioapic_entries[apic][pin])+0) =
+                __io_apic_read(apic, 0x10+pin*2);
+            *(((int *)&ioapic_entries[apic][pin])+1) =
+                __io_apic_read(apic, 0x11+pin*2);
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Mask all IO APIC entries.
+ */
+void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic, pin;
+
+    if (!ioapic_entries)
+        return;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        if (!ioapic_entries[apic])
+            break;
+
+        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+            struct IO_APIC_route_entry entry;
+            unsigned long flags;
+
+            entry = ioapic_entries[apic][pin];
+            if (!entry.mask) {
+                entry.mask = 1;
+
+                spin_lock_irqsave(&ioapic_lock, flags);
+                __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+                __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+                spin_unlock_irqrestore(&ioapic_lock, flags);
+            }
+        }
+    }
+}
+
+/*
+ * Restore IO APIC entries which was saved in ioapic_entries.
+ */
+int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic, pin;
+    unsigned long flags;
+    struct IO_APIC_route_entry entry;
+
+    if (!ioapic_entries)
+        return -ENOMEM;
+
+    for (apic = 0; apic < nr_ioapics; apic++) {
+        if (!ioapic_entries[apic])
+            return -ENOMEM;
+
+        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+            entry = ioapic_entries[apic][pin];
+            spin_lock_irqsave(&ioapic_lock, flags);
+            __io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+            __io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+            spin_unlock_irqrestore(&ioapic_lock, flags);
+    }
+
+    return 0;
+}
+
+void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
+{
+    int apic;
+
+    for (apic = 0; apic < nr_ioapics; apic++)
+        xfree(ioapic_entries[apic]);
+
+    xfree(ioapic_entries);
 }
 
 static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, 
unsigned long disable)
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Tue Jul 06 11:54:40 2010 +0100
@@ -118,6 +118,74 @@ static int ept_set_middle_entry(struct d
     return 1;
 }
 
+/* free ept sub tree behind an entry */
+void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level)
+{
+    /* End if the entry is a leaf entry. */
+    if ( level == 0 || !is_epte_present(ept_entry) ||
+         is_epte_superpage(ept_entry) )
+        return;
+
+    if ( level > 1 )
+    {
+        ept_entry_t *epte = map_domain_page(ept_entry->mfn);
+        for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+            ept_free_entry(d, epte + i, level - 1);
+        unmap_domain_page(epte);
+    }
+
+    d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn));
+}
+
+static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry,
+                                int level, int target)
+{
+    ept_entry_t new_ept, *table;
+    uint64_t trunk;
+    int rv = 1;
+
+    /* End if the entry is a leaf entry or reaches the target level. */
+    if ( level == 0 || level == target )
+        return rv;
+
+    ASSERT(is_epte_superpage(ept_entry));
+
+    if ( !ept_set_middle_entry(d, &new_ept) )
+        return 0;
+
+    table = map_domain_page(new_ept.mfn);
+    trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
+
+    for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+    {
+        ept_entry_t *epte = table + i;
+
+        epte->emt = ept_entry->emt;
+        epte->ipat = ept_entry->ipat;
+        epte->sp = (level > 1) ? 1 : 0;
+        epte->avail1 = ept_entry->avail1;
+        epte->avail2 = 0;
+        epte->mfn = ept_entry->mfn + i * trunk;
+
+        ept_p2m_type_to_flags(epte, epte->avail1);
+
+        if ( (level - 1) == target )
+            continue;
+
+        ASSERT(is_epte_superpage(epte));
+
+        if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) )
+            break;
+    }
+
+    unmap_domain_page(table);
+
+    /* Even failed we should install the newly allocated ept page. */
+    *ept_entry = new_ept;
+
+    return rv;
+}
+
 /* Take the currently mapped table, find the corresponding gfn entry,
  * and map the next table, if available.  If the entry is empty
  * and read_only is set, 
@@ -134,13 +202,18 @@ static int ept_set_middle_entry(struct d
  */
 static int ept_next_level(struct domain *d, bool_t read_only,
                           ept_entry_t **table, unsigned long *gfn_remainder,
-                          u32 shift)
-{
+                          int next_level)
+{
+    unsigned long mfn;
     ept_entry_t *ept_entry;
-    ept_entry_t *next;
-    u32 index;
+    u32 shift, index;
+
+    shift = next_level * EPT_TABLE_ORDER;
 
     index = *gfn_remainder >> shift;
+
+    /* index must be falling into the page */
+    ASSERT(index < EPT_PAGETABLE_ENTRIES);
 
     ept_entry = (*table) + index;
 
@@ -159,69 +232,12 @@ static int ept_next_level(struct domain 
     /* The only time sp would be set here is if we had hit a superpage */
     if ( is_epte_superpage(ept_entry) )
         return GUEST_TABLE_SUPER_PAGE;
-    else
-    {
-        *gfn_remainder &= (1UL << shift) - 1;
-        next = map_domain_page(ept_entry->mfn);
-        unmap_domain_page(*table);
-        *table = next;
-        return GUEST_TABLE_NORMAL_PAGE;
-    }
-}
-
-/* It's super page before and we should break down it now. */
-static int ept_split_large_page(struct domain *d,
-                                ept_entry_t **table, u32 *index,
-                                unsigned long gfn, int level)
-{
-    ept_entry_t *prev_table = *table;
-    ept_entry_t *split_table = NULL;
-    ept_entry_t *split_entry = NULL;
-    ept_entry_t *ept_entry = (*table) + (*index);
-    ept_entry_t temp_ept_entry;
-    unsigned long s_gfn, s_mfn;
-    unsigned long offset, trunk;
-    int i;
-
-    /* alloc new page for new ept middle level entry which is
-     * before a leaf super entry
-     */
-
-    if ( !ept_set_middle_entry(d, &temp_ept_entry) )
-        return 0;
-
-    /* split the super page to small next level pages */
-    split_table = map_domain_page(temp_ept_entry.mfn);
-    offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1);
-    trunk = (1UL << ((level-1) * EPT_TABLE_ORDER));
-
-    for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ )
-    {
-        s_gfn = gfn - offset + i * trunk;
-        s_mfn = ept_entry->mfn + i * trunk;
-
-        split_entry = split_table + i;
-        split_entry->emt = ept_entry->emt;
-        split_entry->ipat = ept_entry->ipat;
-
-        split_entry->sp = (level > 1) ? 1 : 0;
-
-        split_entry->mfn = s_mfn;
-
-        split_entry->avail1 = ept_entry->avail1;
-        split_entry->avail2 = 0;
-        /* last step */
-        split_entry->r = split_entry->w = split_entry->x = 1;
-        ept_p2m_type_to_flags(split_entry, ept_entry->avail1);
-    }
-
-    *ept_entry = temp_ept_entry;
-    
-    *index = offset / trunk;
-    *table = split_table;
-    unmap_domain_page(prev_table);
-
-    return 1;
+
+    mfn = ept_entry->mfn;
+    unmap_domain_page(*table);
+    *table = map_domain_page(mfn);
+    *gfn_remainder &= (1UL << shift) - 1;
+    return GUEST_TABLE_NORMAL_PAGE;
 }
 
 /*
@@ -229,56 +245,64 @@ static int ept_split_large_page(struct d
  * by observing whether any gfn->mfn translations are modified.
  */
 static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
               unsigned int order, p2m_type_t p2mt)
 {
-    ept_entry_t *table = NULL;
+    ept_entry_t *table, *ept_entry;
     unsigned long gfn_remainder = gfn;
     unsigned long offset = 0;
-    ept_entry_t *ept_entry = NULL;
     u32 index;
-    int i;
+    int i, target = order / EPT_TABLE_ORDER;
     int rv = 0;
     int ret = 0;
-    int split_level = 0;
-    int walk_level = order / EPT_TABLE_ORDER;
-    int direct_mmio = (p2mt == p2m_mmio_direct);
+    bool_t direct_mmio = (p2mt == p2m_mmio_direct);
     uint8_t ipat = 0;
     int need_modify_vtd_table = 1;
     int needs_sync = 1;
 
-    if (  order != 0 )
-        if ( (gfn & ((1UL << order) - 1)) )
-            return 1;
+    /*
+     * the caller must make sure:
+     * 1. passing valid gfn and mfn at order boundary.
+     * 2. gfn not exceeding guest physical address width.
+     * 3. passing a valid order.
+     */
+    if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) ||
+         (gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) ||
+         (order % EPT_TABLE_ORDER) )
+        return 0;
+
+    ASSERT((target == 2 && hvm_hap_has_1gb(d)) ||
+           (target == 1 && hvm_hap_has_2mb(d)) ||
+           (target == 0));
 
     table = map_domain_page(ept_get_asr(d));
 
     ASSERT(table != NULL);
 
-    for ( i = ept_get_wl(d); i > walk_level; i-- )
-    {
-        ret = ept_next_level(d, 0, &table, &gfn_remainder, i * 
EPT_TABLE_ORDER);
+    for ( i = ept_get_wl(d); i > target; i-- )
+    {
+        ret = ept_next_level(d, 0, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret != GUEST_TABLE_NORMAL_PAGE )
             break;
     }
 
-    /* If order == 0, we should only get POD if we have a POD superpage.
-     * If i > walk_level, we need to split the page; otherwise,
-     * just behave as normal. */
-    ASSERT(ret != GUEST_TABLE_POD_PAGE || i != walk_level);
-
-    index = gfn_remainder >> ( i ?  (i * EPT_TABLE_ORDER): order);
-    offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
-
-    split_level = i;
+    ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
+
+    index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+    offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
 
     ept_entry = table + index;
 
-    if ( i == walk_level )
-    {
-        /* We reached the level we're looking for */
+    /*
+     * When we are here, we must be on a leaf ept entry
+     * with i == target or i > target.
+     */
+
+    if ( i == target )
+    {
+        /* We reached the target level. */
 
         /* No need to flush if the old entry wasn't valid */
         if ( !is_epte_present(ept_entry) )
@@ -291,15 +315,14 @@ ept_set_entry(struct domain *d, unsigned
                                                 direct_mmio);
             ept_entry->ipat = ipat;
             ept_entry->sp = order ? 1 : 0;
+            ept_entry->avail1 = p2mt;
+            ept_entry->avail2 = 0;
 
             if ( ept_entry->mfn == mfn_x(mfn) )
                 need_modify_vtd_table = 0;
             else
                 ept_entry->mfn = mfn_x(mfn);
 
-            ept_entry->avail1 = p2mt;
-            ept_entry->avail2 = 0;
-
             ept_p2m_type_to_flags(ept_entry, p2mt);
         }
         else
@@ -307,32 +330,51 @@ ept_set_entry(struct domain *d, unsigned
     }
     else
     {
-        int level;
-        ept_entry_t *split_ept_entry;
-
-        for ( level = split_level; level > walk_level ; level-- )
-        {
-            rv = ept_split_large_page(d, &table, &index, gfn, level);
-            if ( !rv )
-                goto out;
-        }
-
-        split_ept_entry = table + index;
-        split_ept_entry->avail1 = p2mt;
-        ept_p2m_type_to_flags(split_ept_entry, p2mt);
-        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
-                                                  direct_mmio);
-        split_ept_entry->ipat = ipat;
-
-        if ( split_ept_entry->mfn == mfn_x(mfn) )
-            need_modify_vtd_table = 0;
-        else
-            split_ept_entry->mfn = mfn_x(mfn);
+        /* We need to split the original page. */
+        ept_entry_t split_ept_entry;
+
+        ASSERT(is_epte_superpage(ept_entry));
+
+        split_ept_entry = *ept_entry;
+
+        if ( !ept_split_super_page(d, &split_ept_entry, i, target) )
+        {
+            ept_free_entry(d, &split_ept_entry, i);
+            goto out;
+        }
+
+        /* now install the newly split ept sub-tree */
+        /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
+        *ept_entry = split_ept_entry;
+
+        /* then move to the level we want to make real changes */
+        for ( ; i > target; i-- )
+            ept_next_level(d, 0, &table, &gfn_remainder, i);
+
+        ASSERT(i == target);
+
+        index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+        offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
+
+        ept_entry = table + index;
+
+        ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
+        ept_entry->ipat = ipat;
+        ept_entry->sp = i ? 1 : 0;
+        ept_entry->avail1 = p2mt;
+        ept_entry->avail2 = 0;
+
+        if ( ept_entry->mfn == mfn_x(mfn) )
+             need_modify_vtd_table = 0;
+        else /* the caller should take care of the previous page */
+            ept_entry->mfn = mfn_x(mfn);
+
+        ept_p2m_type_to_flags(ept_entry, p2mt);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( mfn_valid(mfn_x(mfn))
-         && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+    if ( mfn_valid(mfn_x(mfn)) &&
+         (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
         d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
 
     /* Success */
@@ -354,11 +396,11 @@ out:
                 for ( i = 0; i < (1 << order); i++ )
                     iommu_map_page(
                         d, gfn - offset + i, mfn_x(mfn) - offset + i,
-                        IOMMUF_readable|IOMMUF_writable);
+                        IOMMUF_readable | IOMMUF_writable);
             }
             else if ( !order )
                 iommu_map_page(
-                    d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable);
+                    d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable);
         }
         else
         {
@@ -398,8 +440,7 @@ static mfn_t ept_get_entry(struct domain
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
     retry:
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret == GUEST_TABLE_POD_PAGE )
@@ -486,8 +527,7 @@ static ept_entry_t ept_get_entry_content
 
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret || ret == GUEST_TABLE_POD_PAGE )
             goto out;
         else if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -559,7 +599,7 @@ static mfn_t ept_get_entry_current(unsig
     return ept_get_entry(current->domain, gfn, t, q);
 }
 
-/* 
+/*
  * To test if the new emt type is the same with old,
  * return 1 to not to reset ept entry.
  */
@@ -569,14 +609,14 @@ static int need_modify_ept_entry(struct 
 {
     uint8_t ipat;
     uint8_t emt;
-    int direct_mmio = (p2mt == p2m_mmio_direct);
+    bool_t direct_mmio = (p2mt == p2m_mmio_direct);
 
     emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
 
     if ( (emt == o_emt) && (ipat == o_ipat) )
         return 0;
 
-    return 1; 
+    return 1;
 }
 
 void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
@@ -710,8 +750,7 @@ static void ept_dump_p2m_table(unsigned 
 
             for ( i = ept_get_wl(d); i > 0; i-- )
             {
-                ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                                     i * EPT_TABLE_ORDER);
+                ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
                     break;
             }
diff -r ce278fdaced3 -r db6234d3eafb xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/arch/x86/setup.c      Tue Jul 06 11:54:40 2010 +0100
@@ -909,6 +909,9 @@ void __init __start_xen(unsigned long mb
 
     tboot_probe();
 
+    /* Check if x2APIC is already enabled in BIOS */
+    check_x2apic_preenabled();
+
     /* Unmap the first page of CPU0's stack. */
     memguard_guard_stack(cpu0_stack);
 
@@ -926,9 +929,6 @@ void __init __start_xen(unsigned long mb
     generic_apic_probe();
 
     acpi_boot_init();
-
-    if ( x2apic_is_available() )
-        enable_x2apic();
 
     init_cpu_to_node();
 
@@ -941,6 +941,9 @@ void __init __start_xen(unsigned long mb
 #endif
 
     init_apic_mappings();
+
+    if ( x2apic_is_available() )
+        enable_x2apic();
 
     init_IRQ();
 
diff -r ce278fdaced3 -r db6234d3eafb xen/common/memory.c
--- a/xen/common/memory.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/memory.c       Tue Jul 06 11:54:40 2010 +0100
@@ -545,6 +545,8 @@ long do_memory_op(unsigned long cmd, XEN
         }
 
         args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
+        if (reservation.mem_flags & XENMEMF_exact_node_request)
+            args.memflags |= MEMF_exact_node;
 
         if ( op == XENMEM_populate_physmap
              && (reservation.mem_flags & XENMEMF_populate_on_demand) )
diff -r ce278fdaced3 -r db6234d3eafb xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/page_alloc.c   Tue Jul 06 11:54:40 2010 +0100
@@ -300,11 +300,15 @@ static struct page_info *alloc_heap_page
     unsigned int i, j, zone = 0;
     unsigned int num_nodes = num_online_nodes();
     unsigned long request = 1UL << order;
+    bool_t exact_node_request = !!(memflags & MEMF_exact_node);
     cpumask_t extra_cpus_mask, mask;
     struct page_info *pg;
 
     if ( node == NUMA_NO_NODE )
+    {
         node = cpu_to_node(smp_processor_id());
+        exact_node_request = 0;
+    }
 
     ASSERT(node >= 0);
     ASSERT(zone_lo <= zone_hi);
@@ -345,6 +349,9 @@ static struct page_info *alloc_heap_page
                     goto found;
         } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
 
+        if ( exact_node_request )
+            goto not_found;
+
         /* Pick next node, wrapping around if needed. */
         node = next_node(node, node_online_map);
         if (node == MAX_NUMNODES)
@@ -360,6 +367,7 @@ static struct page_info *alloc_heap_page
         return pg;
     }
 
+ not_found:
     /* No suitable memory blocks. Fail the request. */
     spin_unlock(&heap_lock);
     return NULL;
diff -r ce278fdaced3 -r db6234d3eafb xen/common/trace.c
--- a/xen/common/trace.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/common/trace.c        Tue Jul 06 11:54:40 2010 +0100
@@ -50,16 +50,15 @@ static struct t_info *t_info;
 static struct t_info *t_info;
 #define T_INFO_PAGES 2  /* Size fixed at 2 pages for now. */
 #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
-/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
-#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) 
/ sizeof(uint32_t))
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
+static u32 t_info_first_offset __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
 
 /* Number of records lost due to per-CPU trace buffer being full. */
 static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -75,13 +74,37 @@ static cpumask_t tb_cpu_mask = CPU_MASK_
 /* which tracing events are enabled */
 static u32 tb_event_mask = TRC_ALL;
 
+/* Return the number of elements _type necessary to store at least _x bytes of 
data
+ * i.e., sizeof(_type) * ans >= _x. */
+#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type))
+
+static void calc_tinfo_first_offset(void)
+{
+    int offset_in_bytes;
+    
+    offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
+
+    t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+
+    gdprintk(XENLOG_INFO, "%s: NR_CPUs %d, offset_in_bytes %d, 
t_info_first_offset %u\n",
+           __func__, NR_CPUS, offset_in_bytes, (unsigned)t_info_first_offset);
+}
+
 /**
  * check_tbuf_size - check to make sure that the proposed size will fit
- * in the currently sized struct t_info.
- */
-static inline int check_tbuf_size(int size)
-{
-    return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / 
sizeof(uint32_t));
+ * in the currently sized struct t_info and allows prod and cons to
+ * reach double the value without overflow.
+ */
+static int check_tbuf_size(u32 pages)
+{
+    struct t_buf dummy;
+    typeof(dummy.prod) size;
+    
+    size = ((typeof(dummy.prod))pages)  * PAGE_SIZE;
+    
+    return (size / PAGE_SIZE != pages)
+           || (size + size < size)
+           || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE / 
sizeof(uint32_t));
 }
 
 /**
@@ -100,7 +123,7 @@ static int alloc_trace_bufs(void)
     unsigned long nr_pages;
     /* Start after a fixed-size array of NR_CPUS */
     uint32_t *t_info_mfn_list = (uint32_t *)t_info;
-    int offset = T_INFO_FIRST_OFFSET;
+    int offset = t_info_first_offset;
 
     BUG_ON(check_tbuf_size(opt_tbuf_size));
 
@@ -115,7 +138,7 @@ static int alloc_trace_bufs(void)
     }
 
     t_info->tbuf_size = opt_tbuf_size;
-    printk("tbuf_size %d\n", t_info->tbuf_size);
+    printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
 
     nr_pages = opt_tbuf_size;
     order = get_order_from_pages(nr_pages);
@@ -140,7 +163,7 @@ static int alloc_trace_bufs(void)
 
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
 
-        buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+        per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
         buf->cons = buf->prod = 0;
         per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
 
@@ -172,7 +195,7 @@ static int alloc_trace_bufs(void)
             /* Write list first, then write per-cpu offset. */
             wmb();
             t_info->mfn_offset[cpu]=offset;
-            printk("p%d mfn %"PRIx32" offset %d\n",
+            printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
                    cpu, mfn, offset);
             offset+=i;
         }
@@ -191,6 +214,7 @@ out_dealloc:
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
         if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
         {
+            per_cpu(t_bufs, cpu) = NULL;
             ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
             free_xenheap_pages(rawbuf, order);
         }
@@ -293,6 +317,10 @@ void __init init_trace_bufs(void)
 void __init init_trace_bufs(void)
 {
     int i;
+
+    /* Calculate offset in u32 of first mfn */
+    calc_tinfo_first_offset();
+
     /* t_info size fixed at 2 pages for now.  That should be big enough / 
small enough
      * until it's worth making it dynamic. */
     t_info = alloc_xenheap_pages(1, 0);
@@ -405,19 +433,39 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
     return rc;
 }
 
-static inline int calc_rec_size(int cycles, int extra) 
-{
-    int rec_size;
-    rec_size = 4;
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra) 
+{
+    unsigned int rec_size = 4;
+
     if ( cycles )
         rec_size += 8;
     rec_size += extra;
     return rec_size;
 }
 
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
-{
-    int x = buf->prod - buf->cons;
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+    if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+         unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+    {
+        tb_init_done = 0;
+        printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+               smp_processor_id(), prod, cons);
+        return 1;
+    }
+    return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const struct t_buf *buf)
+{
+    u32 prod = buf->prod, cons = buf->cons;
+    s32 x;
+
+    barrier(); /* must read buf->prod and buf->cons only once */
+    if ( bogus(prod, cons) )
+        return data_size;
+
+    x = prod - cons;
     if ( x < 0 )
         x += 2*data_size;
 
@@ -427,9 +475,16 @@ static inline int calc_unconsumed_bytes(
     return x;
 }
 
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
-{
-    int x = data_size - buf->prod;
+static inline u32 calc_bytes_to_wrap(const struct t_buf *buf)
+{
+    u32 prod = buf->prod, cons = buf->cons;
+    s32 x;
+
+    barrier(); /* must read buf->prod and buf->cons only once */
+    if ( bogus(prod, cons) )
+        return 0;
+
+    x = data_size - prod;
     if ( x <= 0 )
         x += data_size;
 
@@ -439,54 +494,60 @@ static inline int calc_bytes_to_wrap(str
     return x;
 }
 
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const struct t_buf *buf)
 {
     return data_size - calc_unconsumed_bytes(buf);
 }
 
-static inline struct t_rec *
-next_record(struct t_buf *buf)
-{
-    int x = buf->prod;
+static inline struct t_rec *next_record(const struct t_buf *buf,
+                                        uint32_t *next)
+{
+    u32 x = buf->prod, cons = buf->cons;
+
+    barrier(); /* must read buf->prod and buf->cons only once */
+    *next = x;
+    if ( !tb_init_done || bogus(x, cons) )
+        return NULL;
+
     if ( x >= data_size )
         x -= data_size;
 
-    ASSERT(x >= 0);
     ASSERT(x < data_size);
 
     return (struct t_rec *)&this_cpu(t_data)[x];
 }
 
-static inline int __insert_record(struct t_buf *buf,
-                                  unsigned long event,
-                                  int extra,
-                                  int cycles,
-                                  int rec_size,
-                                  unsigned char *extra_data)
+static inline void __insert_record(struct t_buf *buf,
+                                   unsigned long event,
+                                   unsigned int extra,
+                                   bool_t cycles,
+                                   unsigned int rec_size,
+                                   const void *extra_data)
 {
     struct t_rec *rec;
     unsigned char *dst;
-    unsigned long extra_word = extra/sizeof(u32);
-    int local_rec_size = calc_rec_size(cycles, extra);
+    unsigned int extra_word = extra / sizeof(u32);
+    unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
 
     BUG_ON(local_rec_size != rec_size);
     BUG_ON(extra & 3);
 
+    rec = next_record(buf, &next);
+    if ( !rec )
+        return;
     /* Double-check once more that we have enough space.
      * Don't bugcheck here, in case the userland tool is doing
      * something stupid. */
-    if ( calc_bytes_avail(buf) < rec_size )
-    {
-        printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
-               __func__,
-               calc_bytes_avail(buf),
-               data_size, buf->prod, buf->cons, data_size, rec_size);
-        return 0;
-    }
-    rmb();
-
-    rec = next_record(buf);
+    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
+    {
+        if ( printk_ratelimit() )
+            printk(XENLOG_WARNING
+                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
+                   __func__, data_size, next, buf->cons, rec_size);
+        return;
+    }
+
     rec->event = event;
     rec->extra_u32 = extra_word;
     dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -503,21 +564,19 @@ static inline int __insert_record(struct
 
     wmb();
 
-    next = buf->prod + rec_size;
+    next += rec_size;
     if ( next >= 2*data_size )
         next -= 2*data_size;
-    ASSERT(next >= 0);
     ASSERT(next < 2*data_size);
     buf->prod = next;
-
-    return rec_size;
-}
-
-static inline int insert_wrap_record(struct t_buf *buf, int size)
-{
-    int space_left = calc_bytes_to_wrap(buf);
-    unsigned long extra_space = space_left - sizeof(u32);
-    int cycles = 0;
+}
+
+static inline void insert_wrap_record(struct t_buf *buf,
+                                      unsigned int size)
+{
+    u32 space_left = calc_bytes_to_wrap(buf);
+    unsigned int extra_space = space_left - sizeof(u32);
+    bool_t cycles = 0;
 
     BUG_ON(space_left > size);
 
@@ -529,17 +588,13 @@ static inline int insert_wrap_record(str
         ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
     }
 
-    return __insert_record(buf,
-                    TRC_TRACE_WRAP_BUFFER,
-                    extra_space,
-                    cycles,
-                    space_left,
-                    NULL);
+    __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+                    space_left, NULL);
 }
 
 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
 
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(struct t_buf *buf)
 {
     struct {
         u32 lost_records;
@@ -554,12 +609,8 @@ static inline int insert_lost_records(st
 
     this_cpu(lost_records) = 0;
 
-    return __insert_record(buf,
-                           TRC_LOST_RECORDS,
-                           sizeof(ed),
-                           1 /* cycles */,
-                           LOST_REC_SIZE,
-                           (unsigned char *)&ed);
+    __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+                    LOST_REC_SIZE, &ed);
 }
 
 /*
@@ -581,13 +632,15 @@ static DECLARE_TASKLET(trace_notify_dom0
  * failure, otherwise 0.  Failure occurs only if the trace buffers are not yet
  * initialised.
  */
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+                 const void *extra_data)
 {
     struct t_buf *buf;
-    unsigned long flags, bytes_to_tail, bytes_to_wrap;
-    int rec_size, total_size;
-    int extra_word;
-    int started_below_highwater = 0;
+    unsigned long flags;
+    u32 bytes_to_tail, bytes_to_wrap;
+    unsigned int rec_size, total_size;
+    unsigned int extra_word;
+    bool_t started_below_highwater;
 
     if( !tb_init_done )
         return;
@@ -626,7 +679,11 @@ void __trace_var(u32 event, int cycles, 
     buf = this_cpu(t_bufs);
 
     if ( unlikely(!buf) )
+    {
+        /* Make gcc happy */
+        started_below_highwater = 0;
         goto unlock;
+    }
 
     started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
 
@@ -707,8 +764,9 @@ unlock:
     spin_unlock_irqrestore(&this_cpu(t_lock), flags);
 
     /* Notify trace buffer consumer that we've crossed the high water mark. */
-    if ( started_below_highwater &&
-         (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
+    if ( likely(buf!=NULL)
+         && started_below_highwater
+         && (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
         tasklet_schedule(&trace_notify_dom0_tasklet);
 }
 
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/dmar.c        Tue Jul 06 11:54:40 2010 +0100
@@ -32,6 +32,7 @@
 #include "dmar.h"
 #include "iommu.h"
 #include "extern.h"
+#include "vtd.h"
 
 #undef PREFIX
 #define PREFIX VTDPREFIX "ACPI DMAR:"
@@ -378,7 +379,6 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
     struct acpi_table_drhd * drhd = (struct acpi_table_drhd *)header;
     void *dev_scope_start, *dev_scope_end;
     struct acpi_drhd_unit *dmaru;
-    void *addr;
     int ret;
     static int include_all = 0;
 
@@ -397,8 +397,9 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
         dprintk(VTDPREFIX, "  dmaru->address = %"PRIx64"\n",
                 dmaru->address);
 
-    addr = map_to_nocache_virt(0, drhd->address);
-    dmaru->ecap = dmar_readq(addr, DMAR_ECAP_REG);
+    ret = iommu_alloc(dmaru);
+    if ( ret )
+        goto out;
 
     dev_scope_start = (void *)(drhd + 1);
     dev_scope_end = ((void *)drhd) + header->length;
@@ -420,7 +421,7 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
     }
 
     if ( ret )
-        xfree(dmaru);
+        goto out;
     else if ( force_iommu || dmaru->include_all )
         acpi_register_drhd_unit(dmaru);
     else
@@ -451,14 +452,15 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
 
         if ( invalid_cnt )
         {
-            xfree(dmaru);
-
             if ( iommu_workaround_bios_bug &&
                  invalid_cnt == dmaru->scope.devices_cnt )
             {
                 dprintk(XENLOG_WARNING VTDPREFIX,
                     "  Workaround BIOS bug: ignore the DRHD due to all "
                     "devices under its scope are not PCI discoverable!\n");
+
+                iommu_free(dmaru);
+                xfree(dmaru);
             }
             else
             {
@@ -474,6 +476,12 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
             acpi_register_drhd_unit(dmaru);
     }
 
+out:
+    if ( ret )
+    {
+        iommu_free(dmaru);
+        xfree(dmaru);
+    }
     return ret;
 }
 
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/dmar.h
--- a/xen/drivers/passthrough/vtd/dmar.h        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/dmar.h        Tue Jul 06 11:54:40 2010 +0100
@@ -50,7 +50,6 @@ struct acpi_drhd_unit {
     struct dmar_scope scope;            /* must be first member of struct */
     struct list_head list;
     u64    address;                     /* register base address of the unit */
-    u64    ecap;
     u8     include_all:1;
     struct iommu *iommu;
     struct list_head ioapic_list;
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/extern.h
--- a/xen/drivers/passthrough/vtd/extern.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/extern.h      Tue Jul 06 11:54:40 2010 +0100
@@ -33,7 +33,7 @@ extern struct keyhandler dump_iommu_info
 
 int enable_qinval(struct iommu *iommu);
 void disable_qinval(struct iommu *iommu);
-int enable_intremap(struct iommu *iommu);
+int enable_intremap(struct iommu *iommu, int eim);
 void disable_intremap(struct iommu *iommu);
 int queue_invalidate_context(struct iommu *iommu,
     u16 did, u16 source_id, u8 function_mask, u8 granu);
@@ -44,6 +44,7 @@ int invalidate_sync(struct iommu *iommu)
 int invalidate_sync(struct iommu *iommu);
 int iommu_flush_iec_global(struct iommu *iommu);
 int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
+void clear_fault_bits(struct iommu *iommu);
 struct iommu * ioapic_to_iommu(unsigned int apic_id);
 struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id);
 struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu);
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c    Tue Jul 06 11:54:40 2010 +0100
@@ -134,18 +134,26 @@ int iommu_supports_eim(void)
     if ( !iommu_enabled || !iommu_qinval || !iommu_intremap )
         return 0;
 
+    if ( list_empty(&acpi_drhd_units) )
+    {
+        dprintk(XENLOG_WARNING VTDPREFIX, "VT-d is not supported\n");
+        return 0;
+    }
+
     /* We MUST have a DRHD unit for each IOAPIC. */
     for ( apic = 0; apic < nr_ioapics; apic++ )
         if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
+    {
+            dprintk(XENLOG_WARNING VTDPREFIX,
+                    "There is not a DRHD for IOAPIC 0x%x (id: 0x%x)!\n",
+                    apic, IO_APIC_ID(apic));
             return 0;
-
-    if ( list_empty(&acpi_drhd_units) )
-        return 0;
+    }
 
     for_each_drhd_unit ( drhd )
-        if ( !ecap_queued_inval(drhd->ecap) ||
-             !ecap_intr_remap(drhd->ecap) ||
-             !ecap_eim(drhd->ecap) )
+        if ( !ecap_queued_inval(drhd->iommu->ecap) ||
+             !ecap_intr_remap(drhd->iommu->ecap) ||
+             !ecap_eim(drhd->iommu->ecap) )
             return 0;
 
     return 1;
@@ -706,7 +714,7 @@ void msi_msg_write_remap_rte(
 }
 #endif
 
-int enable_intremap(struct iommu *iommu)
+int enable_intremap(struct iommu *iommu, int eim)
 {
     struct acpi_drhd_unit *drhd;
     struct ir_ctrl *ir_ctrl;
@@ -716,10 +724,25 @@ int enable_intremap(struct iommu *iommu)
     ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
 
     ir_ctrl = iommu_ir_ctrl(iommu);
+    sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+
+    /* Return if already enabled by Xen */
+    if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr )
+        return 0;
+
+    sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( !(sts & DMA_GSTS_QIES) )
+    {
+        dprintk(XENLOG_ERR VTDPREFIX,
+                "Queued invalidation is not enabled, should not enable "
+                "interrupt remapping\n");
+        return -EINVAL;
+    }
+
     if ( ir_ctrl->iremap_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR 
);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR);
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
@@ -732,7 +755,7 @@ int enable_intremap(struct iommu *iommu)
 #ifdef CONFIG_X86
     /* set extended interrupt mode bit */
     ir_ctrl->iremap_maddr |=
-            x2apic_enabled ? (1 << IRTA_REG_EIME_SHIFT) : 0;
+            eim ? (1 << IRTA_REG_EIME_SHIFT) : 0;
 #endif
     spin_lock_irqsave(&iommu->register_lock, flags);
 
@@ -769,13 +792,95 @@ void disable_intremap(struct iommu *iomm
     u32 sts;
     unsigned long flags;
 
-    ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+    if ( !ecap_intr_remap(iommu->ecap) )
+        return;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( !(sts & DMA_GSTS_IRES) )
+        goto out;
+
     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE));
 
     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
                   !(sts & DMA_GSTS_IRES), sts);
+out:
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
+
+/*
+ * This function is used to enable Interrutp remapping when
+ * enable x2apic
+ */
+int iommu_enable_IR(void)
+{
+    struct acpi_drhd_unit *drhd;
+    struct iommu *iommu;
+
+    if ( !iommu_supports_eim() )
+        return -1;
+
+    for_each_drhd_unit ( drhd )
+    {
+        struct qi_ctrl *qi_ctrl = NULL;
+
+        iommu = drhd->iommu;
+        qi_ctrl = iommu_qi_ctrl(iommu);
+
+        /* Clear previous faults */
+        clear_fault_bits(iommu);
+
+        /*
+         * Disable interrupt remapping and queued invalidation if
+         * already enabled by BIOS
+         */
+        disable_intremap(iommu);
+        disable_qinval(iommu);
+    }
+
+    /* Enable queue invalidation */
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        if ( enable_qinval(iommu) != 0 )
+        {
+            dprintk(XENLOG_INFO VTDPREFIX,
+                    "Failed to enable Queued Invalidation!\n");
+            return -1;
+        }
+    }
+
+    /* Enable interrupt remapping */
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        if ( enable_intremap(iommu, 1) )
+        {
+            dprintk(XENLOG_INFO VTDPREFIX,
+                    "Failed to enable Interrupt Remapping!\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Check if interrupt remapping is enabled or not
+ * return 1: enabled
+ * return 0: not enabled
+ */
+int intremap_enabled(void)
+{
+    struct acpi_drhd_unit *drhd;
+    u32 sts;
+
+    for_each_drhd_unit ( drhd )
+    {
+        sts = dmar_readl(drhd->iommu->reg, DMAR_GSTS_REG);
+        if ( !(sts & DMA_GSTS_IRES) )
+            return 0;
+    }
+
+    return 1;
+}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Tue Jul 06 11:54:40 2010 +0100
@@ -144,14 +144,17 @@ struct iommu_flush *iommu_get_flush(stru
     return iommu ? &iommu->intel->flush : NULL;
 }
 
-static unsigned int clflush_size;
 static int iommus_incoherent;
 static void __iommu_flush_cache(void *addr, unsigned int size)
 {
     int i;
+    static unsigned int clflush_size = 0;
 
     if ( !iommus_incoherent )
         return;
+
+    if ( clflush_size == 0 )
+        clflush_size = get_cache_line_size();
 
     for ( i = 0; i < size; i += clflush_size )
         cacheline_flush((char *)addr + i);
@@ -1037,7 +1040,7 @@ static int iommu_set_interrupt(struct io
     return irq;
 }
 
-static int __init iommu_alloc(struct acpi_drhd_unit *drhd)
+int __init iommu_alloc(struct acpi_drhd_unit *drhd)
 {
     struct iommu *iommu;
     unsigned long sagaw, nr_dom;
@@ -1131,7 +1134,7 @@ static int __init iommu_alloc(struct acp
     return 0;
 }
 
-static void __init iommu_free(struct acpi_drhd_unit *drhd)
+void __init iommu_free(struct acpi_drhd_unit *drhd)
 {
     struct iommu *iommu = drhd->iommu;
 
@@ -1787,7 +1790,7 @@ static void setup_dom0_devices(struct do
     spin_unlock(&pcidevs_lock);
 }
 
-static void clear_fault_bits(struct iommu *iommu)
+void clear_fault_bits(struct iommu *iommu)
 {
     u64 val;
     unsigned long flags;
@@ -1831,24 +1834,20 @@ static int init_vtd_hw(void)
         spin_lock_irqsave(&iommu->register_lock, flags);
         dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
         spin_unlock_irqrestore(&iommu->register_lock, flags);
-
-        /* initialize flush functions */
-        flush = iommu_get_flush(iommu);
-        flush->context = flush_context_reg;
-        flush->iotlb = flush_iotlb_reg;
-    }
-
-    if ( iommu_qinval )
-    {
-        for_each_drhd_unit ( drhd )
-        {
-            iommu = drhd->iommu;
-            if ( enable_qinval(iommu) != 0 )
-            {
-                dprintk(XENLOG_INFO VTDPREFIX,
-                        "Failed to enable Queued Invalidation!\n");
-                break;
-            }
+    }
+
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        /*
+         * If queued invalidation not enabled, use regiser based
+         * invalidation
+         */
+        if ( enable_qinval(iommu) != 0 )
+        {
+            flush = iommu_get_flush(iommu);
+            flush->context = flush_context_reg;
+            flush->iotlb = flush_iotlb_reg;
         }
     }
 
@@ -1874,9 +1873,9 @@ static int init_vtd_hw(void)
         for_each_drhd_unit ( drhd )
         {
             iommu = drhd->iommu;
-            if ( enable_intremap(iommu) != 0 )
+            if ( enable_intremap(iommu, 0) != 0 )
             {
-                dprintk(XENLOG_INFO VTDPREFIX,
+                dprintk(XENLOG_WARNING VTDPREFIX,
                         "Failed to enable Interrupt Remapping!\n");
                 break;
             }
@@ -1943,8 +1942,6 @@ int __init intel_vtd_setup(void)
 
     platform_quirks();
 
-    clflush_size = get_cache_line_size();
-
     irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
     BUG_ON(!irq_to_iommu);
     memset(irq_to_iommu, 0, nr_irqs * sizeof(struct iommu*));
@@ -1958,9 +1955,6 @@ int __init intel_vtd_setup(void)
      */
     for_each_drhd_unit ( drhd )
     {
-        if ( iommu_alloc(drhd) != 0 )
-            goto error;
-
         iommu = drhd->iommu;
 
         if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
@@ -2000,8 +1994,6 @@ int __init intel_vtd_setup(void)
     return 0;
 
  error:
-    for_each_drhd_unit ( drhd )
-        iommu_free(drhd);
     iommu_enabled = 0;
     iommu_snoop = 0;
     iommu_passthrough = 0;
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c      Tue Jul 06 11:54:40 2010 +0100
@@ -437,10 +437,16 @@ int enable_qinval(struct iommu *iommu)
     u32 sts;
     unsigned long flags;
 
+    if ( !ecap_queued_inval(iommu->ecap) || !iommu_qinval )
+        return -ENOENT;
+
     qi_ctrl = iommu_qi_ctrl(iommu);
     flush = iommu_get_flush(iommu);
 
-    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+    /* Return if already enabled by Xen */
+    sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( (sts & DMA_GSTS_QIES) && qi_ctrl->qinval_maddr )
+        return 0;
 
     if ( qi_ctrl->qinval_maddr == 0 )
     {
@@ -488,14 +494,19 @@ void disable_qinval(struct iommu *iommu)
     u32 sts;
     unsigned long flags;
 
-    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+    if ( !ecap_queued_inval(iommu->ecap) )
+        return;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+    if ( !(sts & DMA_GSTS_QIES) )
+        goto out;
+
     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_QIE));
 
     /* Make sure hardware complete it */
     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
                   !(sts & DMA_GSTS_QIES), sts);
-    spin_unlock_irqrestore(&iommu->register_lock, flags);
-}
+out:
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
diff -r ce278fdaced3 -r db6234d3eafb xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/drivers/passthrough/vtd/vtd.h Tue Jul 06 11:54:40 2010 +0100
@@ -108,4 +108,7 @@ void iommu_flush_cache_entry(void *addr,
 void iommu_flush_cache_entry(void *addr, unsigned int size);
 void iommu_flush_cache_page(void *addr, unsigned long npages);
 
+int iommu_alloc(struct acpi_drhd_unit *drhd);
+void iommu_free(struct acpi_drhd_unit *drhd);
+
 #endif // _VTD_H_
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/amd.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/amd.h Tue Jul 06 11:54:40 2010 +0100
@@ -0,0 +1,138 @@
+/*
+ * amd.h - AMD processor specific definitions
+ */
+
+#ifndef __AMD_H__
+#define __AMD_H__
+
+#include <asm/cpufeature.h>
+
+/* CPUID masked for use by AMD-V Extended Migration */
+
+#define X86_FEATURE_BITPOS(_feature_) ((_feature_) % 32)
+#define __bit(_x_) (1U << X86_FEATURE_BITPOS(_x_))
+
+/* Family 0Fh, Revision C */
+#define AMD_FEATURES_K8_REV_C_ECX  0
+#define AMD_FEATURES_K8_REV_C_EDX (                                    \
+       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
+       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
+       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
+       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
+       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
+       __bit(X86_FEATURE_SEP)      | __bit(X86_FEATURE_MTRR)  |        \
+       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
+       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
+       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_CLFLSH)|        \
+       __bit(X86_FEATURE_MMX)      | __bit(X86_FEATURE_FXSR)  |        \
+       __bit(X86_FEATURE_XMM)      | __bit(X86_FEATURE_XMM2))
+#define AMD_EXTFEATURES_K8_REV_C_ECX  0 
+#define AMD_EXTFEATURES_K8_REV_C_EDX  (                                        
\
+       __bit(X86_FEATURE_FPU)      | __bit(X86_FEATURE_VME)   |        \
+       __bit(X86_FEATURE_DE)       | __bit(X86_FEATURE_PSE)   |        \
+       __bit(X86_FEATURE_TSC)      | __bit(X86_FEATURE_MSR)   |        \
+       __bit(X86_FEATURE_PAE)      | __bit(X86_FEATURE_MCE)   |        \
+       __bit(X86_FEATURE_CX8)      | __bit(X86_FEATURE_APIC)  |        \
+       __bit(X86_FEATURE_SYSCALL)  | __bit(X86_FEATURE_MTRR)  |        \
+       __bit(X86_FEATURE_PGE)      | __bit(X86_FEATURE_MCA)   |        \
+       __bit(X86_FEATURE_CMOV)     | __bit(X86_FEATURE_PAT)   |        \
+       __bit(X86_FEATURE_PSE36)    | __bit(X86_FEATURE_NX)    |        \
+       __bit(X86_FEATURE_MMXEXT)   | __bit(X86_FEATURE_MMX)   |        \
+       __bit(X86_FEATURE_FXSR)     | __bit(X86_FEATURE_LM)    |        \
+       __bit(X86_FEATURE_3DNOWEXT) | __bit(X86_FEATURE_3DNOW))
+
+/* Family 0Fh, Revision D */
+#define AMD_FEATURES_K8_REV_D_ECX         AMD_FEATURES_K8_REV_C_ECX
+#define AMD_FEATURES_K8_REV_D_EDX         AMD_FEATURES_K8_REV_C_EDX
+#define AMD_EXTFEATURES_K8_REV_D_ECX     (AMD_EXTFEATURES_K8_REV_C_ECX |\
+       __bit(X86_FEATURE_LAHF_LM))
+#define AMD_EXTFEATURES_K8_REV_D_EDX     (AMD_EXTFEATURES_K8_REV_C_EDX |\
+       __bit(X86_FEATURE_FFXSR))
+
+/* Family 0Fh, Revision E */
+#define AMD_FEATURES_K8_REV_E_ECX        (AMD_FEATURES_K8_REV_D_ECX |  \
+       __bit(X86_FEATURE_XMM3))
+#define AMD_FEATURES_K8_REV_E_EDX        (AMD_FEATURES_K8_REV_D_EDX |  \
+       __bit(X86_FEATURE_HT))
+#define AMD_EXTFEATURES_K8_REV_E_ECX     (AMD_EXTFEATURES_K8_REV_D_ECX |\
+       __bit(X86_FEATURE_CMP_LEGACY)) 
+#define AMD_EXTFEATURES_K8_REV_E_EDX      AMD_EXTFEATURES_K8_REV_D_EDX
+
+/* Family 0Fh, Revision F */
+#define AMD_FEATURES_K8_REV_F_ECX        (AMD_FEATURES_K8_REV_E_ECX |  \
+       __bit(X86_FEATURE_CX16))
+#define AMD_FEATURES_K8_REV_F_EDX         AMD_FEATURES_K8_REV_E_EDX
+#define AMD_EXTFEATURES_K8_REV_F_ECX     (AMD_EXTFEATURES_K8_REV_E_ECX |\
+       __bit(X86_FEATURE_SVME) | __bit(X86_FEATURE_EXTAPICSPACE) |     \
+       __bit(X86_FEATURE_ALTMOVCR))
+#define AMD_EXTFEATURES_K8_REV_F_EDX     (AMD_EXTFEATURES_K8_REV_E_EDX |\
+       __bit(X86_FEATURE_RDTSCP))
+
+/* Family 0Fh, Revision G */
+#define AMD_FEATURES_K8_REV_G_ECX         AMD_FEATURES_K8_REV_F_ECX
+#define AMD_FEATURES_K8_REV_G_EDX         AMD_FEATURES_K8_REV_F_EDX
+#define AMD_EXTFEATURES_K8_REV_G_ECX     (AMD_EXTFEATURES_K8_REV_F_ECX |\
+       __bit(X86_FEATURE_3DNOWPF))
+#define AMD_EXTFEATURES_K8_REV_G_EDX      AMD_EXTFEATURES_K8_REV_F_EDX
+
+/* Family 10h, Revision B */
+#define AMD_FEATURES_FAM10h_REV_B_ECX    (AMD_FEATURES_K8_REV_F_ECX |  \
+       __bit(X86_FEATURE_POPCNT) | __bit(X86_FEATURE_MWAIT))
+#define AMD_FEATURES_FAM10h_REV_B_EDX     AMD_FEATURES_K8_REV_F_EDX
+#define AMD_EXTFEATURES_FAM10h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_F_ECX |\
+       __bit(X86_FEATURE_ABM) | __bit(X86_FEATURE_SSE4A) |             \
+       __bit(X86_FEATURE_MISALIGNSSE) | __bit(X86_FEATURE_OSVW) |      \
+       __bit(X86_FEATURE_IBS))
+#define AMD_EXTFEATURES_FAM10h_REV_B_EDX (AMD_EXTFEATURES_K8_REV_F_EDX |\
+       __bit(X86_FEATURE_PAGE1GB))
+
+/* Family 10h, Revision C */
+#define AMD_FEATURES_FAM10h_REV_C_ECX     AMD_FEATURES_FAM10h_REV_B_ECX
+#define AMD_FEATURES_FAM10h_REV_C_EDX     AMD_FEATURES_FAM10h_REV_B_EDX
+#define AMD_EXTFEATURES_FAM10h_REV_C_ECX (AMD_EXTFEATURES_FAM10h_REV_B_ECX |\
+       __bit(X86_FEATURE_SKINIT) | __bit(X86_FEATURE_WDT))
+#define AMD_EXTFEATURES_FAM10h_REV_C_EDX  AMD_EXTFEATURES_FAM10h_REV_B_EDX
+
+/* Family 11h, Revision B */
+#define AMD_FEATURES_FAM11h_REV_B_ECX     AMD_FEATURES_K8_REV_G_ECX
+#define AMD_FEATURES_FAM11h_REV_B_EDX     AMD_FEATURES_K8_REV_G_EDX
+#define AMD_EXTFEATURES_FAM11h_REV_B_ECX (AMD_EXTFEATURES_K8_REV_G_ECX |\
+       __bit(X86_FEATURE_SKINIT))
+#define AMD_EXTFEATURES_FAM11h_REV_B_EDX  AMD_EXTFEATURES_K8_REV_G_EDX
+
+/* AMD errata checking
+ *
+ * Errata are defined using the AMD_LEGACY_ERRATUM() or AMD_OSVW_ERRATUM()
+ * macros. The latter is intended for newer errata that have an OSVW id
+ * assigned, which it takes as first argument. Both take a variable number
+ * of family-specific model-stepping ranges created by AMD_MODEL_RANGE().
+ *
+ * Example 1:
+ * #define AMD_ERRATUM_319                                              \
+ *   AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),      \
+ *                      AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),      \
+ *                      AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0))
+ * Example 2:
+ * #define AMD_ERRATUM_400                                              \
+ *   AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),    \
+ *                       AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf))
+ *   
+ */
+
+#define AMD_LEGACY_ERRATUM(...)         0 /* legacy */, __VA_ARGS__, 0
+#define AMD_OSVW_ERRATUM(osvw_id, ...)  1 /* osvw */, osvw_id, __VA_ARGS__, 0
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end)              \
+    ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range)   (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range)    (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range)      ((range) & 0xfff)
+
+#define AMD_ERRATUM_170                                                 \
+    AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x0f, 0x0, 0x0, 0x67, 0xf))
+
+#define AMD_ERRATUM_383                                                 \
+    AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf),    \
+                       AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0x1, 0x0))
+
+int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
+#endif /* __AMD_H__ */
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/apic.h
--- a/xen/include/asm-x86/apic.h        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/apic.h        Tue Jul 06 11:54:40 2010 +0100
@@ -25,6 +25,8 @@ extern int x2apic_enabled;
 extern int x2apic_enabled;
 extern int directed_eoi_enabled;
 
+extern void check_x2apic_preenabled(void);
+extern int x2apic_cmdline_disable(void);
 extern void enable_x2apic(void);
 
 static __inline int x2apic_is_available(void)
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/debugger.h
--- a/xen/include/asm-x86/debugger.h    Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/debugger.h    Tue Jul 06 11:54:40 2010 +0100
@@ -68,10 +68,8 @@ static inline int debugger_trap_entry(
     if ( guest_kernel_mode(v, regs) && v->domain->debugger_attached &&
          ((vector == TRAP_int3) || (vector == TRAP_debug)) )
     {
-#ifdef XEN_GDBSX_CONFIG
         if ( vector != TRAP_debug ) /* domain pause is good enough */
             current->arch.gdbsx_vcpu_event = vector;
-#endif
         domain_pause_for_debugger();
         return 1;
     }
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/domain.h      Tue Jul 06 11:54:40 2010 +0100
@@ -415,9 +415,7 @@ struct arch_vcpu
     struct mapcache_vcpu mapcache;
 #endif
 
-#if XEN_GDBSX_CONFIG
     uint32_t gdbsx_vcpu_event;
-#endif 
 
     /* A secondary copy of the vcpu time info. */
     XEN_GUEST_HANDLE(vcpu_time_info_t) time_info_guest;
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/genapic.h
--- a/xen/include/asm-x86/genapic.h     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/genapic.h     Tue Jul 06 11:54:40 2010 +0100
@@ -70,6 +70,7 @@ cpumask_t vector_allocation_domain_flat(
        .send_IPI_mask = send_IPI_mask_flat, \
        .send_IPI_self = send_IPI_self_flat
 
+const struct genapic *apic_x2apic_probe(void);
 void init_apic_ldr_x2apic_phys(void);
 void init_apic_ldr_x2apic_cluster(void);
 void clustered_apic_check_x2apic(void);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h     Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/io_apic.h     Tue Jul 06 11:54:40 2010 +0100
@@ -199,6 +199,12 @@ extern void ioapic_suspend(void);
 extern void ioapic_suspend(void);
 extern void ioapic_resume(void);
 
+extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
+extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
+extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
+
 #else  /* !CONFIG_X86_IO_APIC */
 static inline void init_ioapic_mappings(void) {}
 static inline void ioapic_suspend(void) {}
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/irq.h Tue Jul 06 11:54:40 2010 +0100
@@ -91,6 +91,8 @@ void disable_8259A_irq(unsigned int irq)
 void disable_8259A_irq(unsigned int irq);
 void enable_8259A_irq(unsigned int irq);
 int i8259A_irq_pending(unsigned int irq);
+void mask_8259A(void);
+void unmask_8259A(void);
 void init_8259A(int aeoi);
 int i8259A_suspend(void);
 int i8259A_resume(void);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/msr-index.h   Tue Jul 06 11:54:40 2010 +0100
@@ -251,6 +251,10 @@
 /* AMD Microcode MSRs */
 #define MSR_AMD_PATCHLEVEL             0x0000008b
 #define MSR_AMD_PATCHLOADER            0xc0010020
+
+/* AMD OS Visible Workaround MSRs */
+#define MSR_AMD_OSVW_ID_LENGTH          0xc0010140
+#define MSR_AMD_OSVW_STATUS             0xc0010141
 
 /* K6 MSRs */
 #define MSR_K6_EFER                    0xc0000080
diff -r ce278fdaced3 -r db6234d3eafb xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h        Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/asm-x86/mtrr.h        Tue Jul 06 11:54:40 2010 +0100
@@ -65,7 +65,7 @@ extern u32 get_pat_flags(struct vcpu *v,
 extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
                   paddr_t spaddr, uint8_t gmtrr_mtype);
 extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn,
-                                  mfn_t mfn, uint8_t *ipat, int direct_mmio);
+                                  mfn_t mfn, uint8_t *ipat, bool_t 
direct_mmio);
 extern void ept_change_entry_emt_with_range(
     struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
 extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/io/ring.h
--- a/xen/include/public/io/ring.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/public/io/ring.h      Tue Jul 06 11:54:40 2010 +0100
@@ -103,8 +103,16 @@ struct __name##_sring {                 
 struct __name##_sring {                                                 \
     RING_IDX req_prod, req_event;                                       \
     RING_IDX rsp_prod, rsp_event;                                       \
-    uint8_t  netfront_smartpoll_active;                                 \
-    uint8_t  pad[47];                                                   \
+    union {                                                             \
+        struct {                                                        \
+            uint8_t smartpoll_active;                                   \
+        } netif;                                                        \
+        struct {                                                        \
+            uint8_t msg;                                                \
+        } tapif_user;                                                   \
+        uint8_t pvt_pad[4];                                             \
+    } private;                                                          \
+    uint8_t __pad[44];                                                  \
     union __name##_sring_entry ring[1]; /* variable-length */           \
 };                                                                      \
                                                                         \
@@ -148,7 +156,8 @@ typedef struct __name##_back_ring __name
 #define SHARED_RING_INIT(_s) do {                                       \
     (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
     (_s)->req_event = (_s)->rsp_event = 1;                              \
-    (void)memset((_s)->pad, 0, sizeof((_s)->pad));                      \
+    (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \
+    (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \
 } while(0)
 
 #define FRONT_RING_INIT(_r, _s, __size) do {                            \
diff -r ce278fdaced3 -r db6234d3eafb xen/include/public/memory.h
--- a/xen/include/public/memory.h       Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/public/memory.h       Tue Jul 06 11:54:40 2010 +0100
@@ -52,6 +52,9 @@
 #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
 /* Flag to populate physmap with populate-on-demand entries */
 #define XENMEMF_populate_on_demand (1<<16)
+/* Flag to request allocation only from the node specified */
+#define XENMEMF_exact_node_request  (1<<17)
+#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
 #endif
 
 struct xen_memory_reservation {
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/iommu.h   Tue Jul 06 11:54:40 2010 +0100
@@ -58,6 +58,8 @@ struct iommu {
 
 int iommu_setup(void);
 int iommu_supports_eim(void);
+int iommu_enable_IR(void);
+int intremap_enabled(void);
 
 int iommu_add_device(struct pci_dev *pdev);
 int iommu_remove_device(struct pci_dev *pdev);
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/mm.h      Tue Jul 06 11:54:40 2010 +0100
@@ -82,6 +82,8 @@ int assign_pages(
 #define  MEMF_tmem        (1U<<_MEMF_tmem)
 #define _MEMF_no_dma      3
 #define  MEMF_no_dma      (1U<<_MEMF_no_dma)
+#define _MEMF_exact_node  4
+#define  MEMF_exact_node  (1U<<_MEMF_exact_node)
 #define _MEMF_node        8
 #define  MEMF_node(n)     ((((n)+1)&0xff)<<_MEMF_node)
 #define _MEMF_bits        24
diff -r ce278fdaced3 -r db6234d3eafb xen/include/xen/trace.h
--- a/xen/include/xen/trace.h   Fri Jul 02 18:04:54 2010 +0100
+++ b/xen/include/xen/trace.h   Tue Jul 06 11:54:40 2010 +0100
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
 
 int trace_will_trace_event(u32 event);
 
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
 
 static inline void trace_var(u32 event, int cycles, int extra,
                                unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event, 
         {                                                       \
             u32 _d[1];                                          \
             _d[0] = d1;                                         \
-            __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event, 
             u32 _d[2];                                          \
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event, 
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event, 
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event, 
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
 
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event, 
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
             _d[5] = d6;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>