| # HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1212374139 -32400
# Node ID f1508348ffabedf2eab0b666a5b8e2c9515e52d7
# Parent  d2a239224cb23f5a7a8059e4167c50bd3abeb9df
# Parent  85fa199b4b7bd1e7511ce7fc2361bae808c27ba6
merge with xen-unstable.hg
---
 extras/mini-os/main-caml.c                         |   42 --
 tools/examples/xend-config-xenapi.sxp              |  196 ---------
 tools/examples/xm-config-xenapi.xml                |   41 --
 extras/mini-os/Config.mk                           |   13 
 extras/mini-os/Makefile                            |   35 -
 extras/mini-os/arch/ia64/minios-ia64.lds           |   17 
 extras/mini-os/arch/ia64/mm.c                      |    8 
 extras/mini-os/arch/x86/minios-x86_32.lds          |   18 
 extras/mini-os/arch/x86/minios-x86_64.lds          |   18 
 extras/mini-os/arch/x86/mm.c                       |   17 
 extras/mini-os/hypervisor.c                        |    4 
 extras/mini-os/include/lib.h                       |    1 
 extras/mini-os/include/mm.h                        |    3 
 extras/mini-os/include/posix/pthread.h             |   52 ++
 extras/mini-os/include/x86/arch_mm.h               |    5 
 extras/mini-os/lib/sys.c                           |   67 +--
 extras/mini-os/lib/xmalloc.c                       |    2 
 extras/mini-os/main.c                              |   12 
 extras/mini-os/mm.c                                |   38 -
 extras/mini-os/sched.c                             |    2 
 stubdom/Makefile                                   |   33 -
 stubdom/c/Makefile                                 |    7 
 stubdom/c/main.c                                   |    2 
 stubdom/caml/Makefile                              |   10 
 stubdom/caml/main-caml.c                           |   42 ++
 tools/examples/Makefile                            |    2 
 tools/examples/xend-config.sxp                     |   16 
 tools/firmware/hvmloader/util.c                    |   12 
 tools/firmware/rombios/rombios.c                   |   26 -
 tools/ioemu/Makefile.target                        |    7 
 tools/ioemu/hw/cirrus_vga.c                        |    2 
 tools/ioemu/vl.c                                   |    5 
 tools/ioemu/vl.h                                   |    2 
 tools/ioemu/xenstore.c                             |    2 
 tools/libxc/Makefile                               |    8 
 tools/libxc/xc_core.c                              |   18 
 tools/libxc/xc_domain.c                            |   31 +
 tools/libxc/xc_minios.c                            |   17 
 tools/libxc/xenctrl.h                              |    7 
 tools/libxc/xg_private.c                           |   16 
 tools/python/xen/lowlevel/xc/xc.c                  |  230 +++++++++--
 tools/python/xen/xend/XendAPI.py                   |    6 
 tools/python/xen/xend/XendDomain.py                |   31 -
 tools/python/xen/xend/XendDomainInfo.py            |   22 -
 tools/python/xen/xend/XendOptions.py               |   27 +
 tools/python/xen/xend/server/SrvDomain.py          |   13 
 tools/python/xen/xend/server/pciif.py              |   52 ++
 tools/python/xen/xend/server/relocate.py           |   24 -
 tools/python/xen/xm/migrate.py                     |   10 
 tools/xenstat/libxenstat/src/xenstat.c             |   18 
 unmodified_drivers/linux-2.6/platform-pci/evtchn.c |    2 
 xen/arch/x86/acpi/cpu_idle.c                       |    2 
 xen/arch/x86/acpi/power.c                          |   14 
 xen/arch/x86/cpu/amd.c                             |    8 
 xen/arch/x86/crash.c                               |    1 
 xen/arch/x86/domain.c                              |   30 -
 xen/arch/x86/domctl.c                              |   63 ++-
 xen/arch/x86/hvm/hpet.c                            |   18 
 xen/arch/x86/hvm/hvm.c                             |  158 ++++---
 xen/arch/x86/hvm/i8254.c                           |   26 -
 xen/arch/x86/hvm/pmtimer.c                         |    2 
 xen/arch/x86/hvm/svm/svm.c                         |    4 
 xen/arch/x86/hvm/vlapic.c                          |   24 -
 xen/arch/x86/hvm/vmx/vmx.c                         |    4 
 xen/arch/x86/hvm/vpt.c                             |   35 +
 xen/arch/x86/mm.c                                  |   14 
 xen/arch/x86/mm/hap/p2m-ept.c                      |    6 
 xen/arch/x86/mm/p2m.c                              |   21 -
 xen/arch/x86/mm/shadow/common.c                    |  119 +++---
 xen/arch/x86/msi.c                                 |    7 
 xen/arch/x86/setup.c                               |    8 
 xen/arch/x86/smpboot.c                             |    5 
 xen/arch/x86/tboot.c                               |   12 
 xen/arch/x86/x86_emulate/x86_emulate.c             |    8 
 xen/common/domain.c                                |    4 
 xen/common/grant_table.c                           |   57 ++
 xen/common/libelf/libelf-private.h                 |    2 
 xen/common/memory.c                                |   17 
 xen/drivers/passthrough/amd/pci_amd_iommu.c        |   11 
 xen/drivers/passthrough/iommu.c                    |  108 +++++
 xen/drivers/passthrough/vtd/dmar.c                 |   33 -
 xen/drivers/passthrough/vtd/dmar.h                 |    1 
 xen/drivers/passthrough/vtd/extern.h               |    3 
 xen/drivers/passthrough/vtd/intremap.c             |  318 ++++++++++++----
 xen/drivers/passthrough/vtd/iommu.c                |  416 ++++++++++-----------
 xen/drivers/passthrough/vtd/iommu.h                |    1 
 xen/drivers/passthrough/vtd/utils.c                |  178 ++++----
 xen/drivers/passthrough/vtd/vtd.h                  |   22 +
 xen/drivers/passthrough/vtd/x86/vtd.c              |  184 ---------
 xen/include/asm-x86/hvm/hvm.h                      |    6 
 xen/include/asm-x86/hvm/vcpu.h                     |    3 
 xen/include/asm-x86/hvm/vmx/vmx.h                  |    1 
 xen/include/asm-x86/hvm/vpt.h                      |    7 
 xen/include/asm-x86/tboot.h                        |   15 
 xen/include/public/domctl.h                        |   11 
 xen/include/xen/elfcore.h                          |    1 
 xen/include/xen/hvm/iommu.h                        |    4 
 xen/include/xen/iommu.h                            |   14 
 xen/include/xen/sched.h                            |    3 
 xen/include/xen/time.h                             |    1 
 100 files changed, 1874 insertions(+), 1427 deletions(-)
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/Config.mk
--- a/extras/mini-os/Config.mk  Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/Config.mk  Mon Jun 02 11:35:39 2008 +0900
@@ -41,10 +41,7 @@ extra_incl := $(foreach dir,$(EXTRA_INC)
 extra_incl := $(foreach dir,$(EXTRA_INC),-I$(MINI-OS_ROOT)/include/$(dir))
 
 DEF_CPPFLAGS += -I$(MINI-OS_ROOT)/include
-
-ifeq ($(stubdom),y)
-DEF_CPPFLAGS += -DCONFIG_STUBDOM
-endif
+DEF_CPPFLAGS += -D__MINIOS__
 
 ifeq ($(libc),y)
 DEF_CPPFLAGS += -DHAVE_LIBC
@@ -58,11 +55,3 @@ DEF_CPPFLAGS += -I$(LWIPDIR)/src/include
 DEF_CPPFLAGS += -I$(LWIPDIR)/src/include
 DEF_CPPFLAGS += -I$(LWIPDIR)/src/include/ipv4
 endif
-
-ifneq ($(QEMUDIR),)
-qemu=y
-endif
-
-ifneq ($(CAMLDIR),)
-caml=y
-endif
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/Makefile   Mon Jun 02 11:35:39 2008 +0900
@@ -73,44 +73,25 @@ OBJS += lwip.a
 OBJS += lwip.a
 endif
 
-OBJS := $(filter-out lwip%.o $(LWO), $(OBJS))
-
-ifeq ($(caml),y)
-CAMLLIB = $(shell ocamlc -where)
-APP_OBJS += main-caml.o
-APP_OBJS += $(CAMLDIR)/caml.o
-APP_OBJS += $(CAMLLIB)/libasmrun.a
-CFLAGS += -I$(CAMLLIB)
-APP_LDLIBS += -lm
-endif
-OBJS := $(filter-out main-caml.o, $(OBJS))
-
-ifeq ($(qemu),y)
-APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a 
$(QEMUDIR)/i386-dm-stubdom/libqemu.a
-CFLAGS += -DCONFIG_QEMU
-endif
-
-ifneq ($(CDIR),)
-APP_OBJS += $(CDIR)/main.a
-APP_LDLIBS += 
-endif
+OBJS := $(filter-out main.o lwip%.o $(LWO), $(OBJS))
 
 ifeq ($(libc),y)
-LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest
+APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -whole-archive -lxenguest -lxenctrl 
-no-whole-archive
 APP_LDLIBS += -lpci
 APP_LDLIBS += -lz
+APP_LDLIBS += -lm
 LDLIBS += -lc
 endif
 
-ifneq ($(caml)-$(qemu)-$(CDIR)-$(lwip),---y)
+ifneq ($(APP_OBJS)-$(lwip),-y)
 OBJS := $(filter-out daytime.o, $(OBJS))
 endif
 
-app.o: $(APP_OBJS) app.lds
-       $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@
+$(TARGET)_app.o: $(APP_OBJS) app.lds
+       $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined app_main -o $@
 
-$(TARGET): links $(OBJS) app.o arch_lib
-       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o 
$@.o
+$(TARGET): links $(OBJS) $(TARGET)_app.o arch_lib
+       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(TARGET)_app.o $(OBJS) $(LDARCHLIB) 
$(LDLIBS) -o $@.o
        $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
        $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
        gzip -f -9 -c $@ >$@.gz
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/ia64/minios-ia64.lds
--- a/extras/mini-os/arch/ia64/minios-ia64.lds  Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/ia64/minios-ia64.lds  Mon Jun 02 11:35:39 2008 +0900
@@ -52,6 +52,23 @@ SECTIONS
   .fini_array     : { *(.fini_array) }
   PROVIDE (__fini_array_end = .);
 
+  .ctors : {
+        __CTOR_LIST__ = .;
+        QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.ctors))
+       SORT_BY_NAME(CONSTRUCTORS)
+        QUAD(0)
+        __CTOR_END__ = .;
+        }
+
+  .dtors : {
+        __DTOR_LIST__ = .;
+        QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.dtors))
+        QUAD(0)
+        __DTOR_END__ = .;
+        }
+
   .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - (((5<<(61))+0x100000000) 
- (1 << 20)))
   { *(.IA_64.unwind_info) }
 
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/ia64/mm.c
--- a/extras/mini-os/arch/ia64/mm.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/ia64/mm.c     Mon Jun 02 11:35:39 2008 +0900
@@ -131,6 +131,14 @@ arch_init_demand_mapping_area(unsigned l
 }
 
 /* Helper function used in gnttab.c. */
+void do_map_frames(unsigned long addr,
+        unsigned long *f, unsigned long n, unsigned long stride,
+       unsigned long increment, domid_t id, int may_fail, unsigned long prot)
+{
+       /* TODO */
+       ASSERT(0);
+}
+
 void*
 map_frames_ex(unsigned long* frames, unsigned long n, unsigned long stride,
        unsigned long increment, unsigned long alignment, domid_t id,
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/minios-x86_32.lds
--- a/extras/mini-os/arch/x86/minios-x86_32.lds Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/x86/minios-x86_32.lds Mon Jun 02 11:35:39 2008 +0900
@@ -28,9 +28,25 @@ SECTIONS
   .fini_array     : { *(.fini_array) }
   PROVIDE (__fini_array_end = .);
 
+  .ctors : {
+        __CTOR_LIST__ = .;
+        LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 2)
+        *(SORT_BY_NAME(.ctors))
+       SORT_BY_NAME(CONSTRUCTORS)
+        LONG(0)
+        __CTOR_END__ = .;
+        }
+
+  .dtors : {
+        __DTOR_LIST__ = .;
+        LONG((__DTOR_END__ - __DTOR_LIST__) / 4 - 2)
+        *(SORT_BY_NAME(.dtors))
+        LONG(0)
+        __DTOR_END__ = .;
+        }
+
   .data : {                    /* Data */
        *(.data)
-       CONSTRUCTORS
        }
 
   _edata = .;                  /* End of data section */
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/minios-x86_64.lds
--- a/extras/mini-os/arch/x86/minios-x86_64.lds Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/x86/minios-x86_64.lds Mon Jun 02 11:35:39 2008 +0900
@@ -28,9 +28,25 @@ SECTIONS
   .fini_array     : { *(.fini_array) }
   PROVIDE (__fini_array_end = .);
 
+  .ctors : {
+        __CTOR_LIST__ = .;
+        QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.ctors))
+       SORT_BY_NAME(CONSTRUCTORS)
+        QUAD(0)
+        __CTOR_END__ = .;
+        }
+
+  .dtors : {
+        __DTOR_LIST__ = .;
+        QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.dtors))
+        QUAD(0)
+        __DTOR_END__ = .;
+        }
+
   .data : {                    /* Data */
        *(.data)
-       CONSTRUCTORS
        }
 
   _edata = .;                  /* End of data section */
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/x86/mm.c      Mon Jun 02 11:35:39 2008 +0900
@@ -59,11 +59,10 @@ void new_pt_frame(unsigned long *pt_pfn,
 {   
     pgentry_t *tab = (pgentry_t *)start_info.pt_base;
     unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); 
-    unsigned long prot_e, prot_t, pincmd;
+    unsigned long prot_e, prot_t;
     mmu_update_t mmu_updates[1];
-    struct mmuext_op pin_request;
     
-    prot_e = prot_t = pincmd = 0;
+    prot_e = prot_t = 0;
     DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, "
            "prev_l_mfn=%lx, offset=%lx", 
            level, *pt_pfn, prev_l_mfn, offset);
@@ -77,18 +76,15 @@ void new_pt_frame(unsigned long *pt_pfn,
     case L1_FRAME:
          prot_e = L1_PROT;
          prot_t = L2_PROT;
-         pincmd = MMUEXT_PIN_L1_TABLE;
          break;
     case L2_FRAME:
          prot_e = L2_PROT;
          prot_t = L3_PROT;
-         pincmd = MMUEXT_PIN_L2_TABLE;
          break;
 #if defined(__x86_64__)
     case L3_FRAME:
          prot_e = L3_PROT;
          prot_t = L4_PROT;
-         pincmd = MMUEXT_PIN_L3_TABLE;
          break;
 #endif
     default:
@@ -113,15 +109,6 @@ void new_pt_frame(unsigned long *pt_pfn,
          do_exit();
     }
                         
-    /* Pin the page to provide correct protection */
-    pin_request.cmd = pincmd;
-    pin_request.arg1.mfn = pfn_to_mfn(*pt_pfn);
-    if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
-    {
-        printk("ERROR: pinning failed\n");
-        do_exit();
-    }
-
     /* Now fill the new page table page with entries.
        Update the page directory as well. */
     mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + 
sizeof(pgentry_t) * offset;
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/hypervisor.c
--- a/extras/mini-os/hypervisor.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/hypervisor.c       Mon Jun 02 11:35:39 2008 +0900
@@ -55,12 +55,12 @@ void do_hypervisor_callback(struct pt_re
     while ( l1 != 0 )
     {
         l1i = __ffs(l1);
-        l1 &= ~(1 << l1i);
+        l1 &= ~(1UL << l1i);
         
         while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 )
         {
             l2i = __ffs(l2);
-            l2 &= ~(1 << l2i);
+            l2 &= ~(1UL << l2i);
 
             port = (l1i * (sizeof(unsigned long) * 8)) + l2i;
                        do_event(port, regs);
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/lib.h
--- a/extras/mini-os/include/lib.h      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/lib.h      Mon Jun 02 11:35:39 2008 +0900
@@ -136,6 +136,7 @@ enum fd_type {
     FTYPE_CONSOLE,
     FTYPE_FILE,
     FTYPE_XENBUS,
+    FTYPE_XC,
     FTYPE_EVTCHN,
     FTYPE_SOCKET,
     FTYPE_TAP,
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/mm.h       Mon Jun 02 11:35:39 2008 +0900
@@ -67,6 +67,9 @@ void *map_frames_ex(unsigned long *f, un
 void *map_frames_ex(unsigned long *f, unsigned long n, unsigned long stride,
        unsigned long increment, unsigned long alignment, domid_t id,
        int may_fail, unsigned long prot);
+void do_map_frames(unsigned long addr,
+        unsigned long *f, unsigned long n, unsigned long stride,
+       unsigned long increment, domid_t id, int may_fail, unsigned long prot);
 #ifdef HAVE_LIBC
 extern unsigned long heap, brk, heap_mapped, heap_end;
 #endif
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/posix/pthread.h
--- a/extras/mini-os/include/posix/pthread.h    Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/posix/pthread.h    Mon Jun 02 11:35:39 2008 +0900
@@ -1,18 +1,56 @@
 #ifndef _POSIX_PTHREAD_H
 #define _POSIX_PTHREAD_H
 
+#include <stdlib.h>
+
 /* Let's be single-threaded for now.  */
 
-typedef void *pthread_key_t;
-typedef struct {} pthread_mutex_t, pthread_once_t;
+typedef struct {
+    void *ptr;
+} *pthread_key_t;
+static inline int pthread_key_create(pthread_key_t *key, void 
(*destr_function)(void*))
+{
+    *key = malloc(sizeof(**key));
+    (*key)->ptr = NULL;
+    return 0;
+}
+static inline int pthread_setspecific(pthread_key_t key, const void *pointer)
+{
+    key->ptr = (void*) pointer;
+    return 0;
+}
+static inline void *pthread_getspecific(pthread_key_t key)
+{
+    return key->ptr;
+}
+static inline int pthread_key_delete(pthread_key_t key)
+{
+    free(key);
+    return 0;
+}
+
+
+
+typedef struct {} pthread_mutex_t;
 #define PTHREAD_MUTEX_INITIALIZER {}
-#define PTHREAD_ONCE_INIT {}
 static inline int pthread_mutex_lock(pthread_mutex_t *mutex) { return 0; }
 static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
-static inline int pthread_key_create(pthread_key_t *key, void 
(*destr_function)(void*)) { *key = NULL; return 0; }
-static inline int pthread_setspecific(pthread_key_t *key, const void *pointer) 
{ *key = (void*) pointer; return 0; }
-static inline void *pthread_getspecific(pthread_key_t *key) { return *key; }
-static inline int pthread_once(pthread_once_t *once_control, void 
(*init_routine)(void)) { init_routine(); return 0; }
+
+
+
+typedef struct {
+    int done;
+} pthread_once_t;
+#define PTHREAD_ONCE_INIT { 0 }
+
+static inline int pthread_once(pthread_once_t *once_control, void 
(*init_routine)(void))
+{
+    if (!once_control->done) {
+        once_control->done = 1;
+        init_routine();
+    }
+    return 0;
+}
 
 #define __thread
 
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/x86/arch_mm.h
--- a/extras/mini-os/include/x86/arch_mm.h      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/x86/arch_mm.h      Mon Jun 02 11:35:39 2008 +0900
@@ -219,11 +219,6 @@ static __inline__ paddr_t machine_to_phy
 
 #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT)
 #define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, 
L1_PROT_RO)
-#ifndef __ASSEMBLY__
-void do_map_frames(unsigned long addr,
-        unsigned long *f, unsigned long n, unsigned long stride,
-       unsigned long increment, domid_t id, int may_fail, unsigned long prot);
-#endif
 #define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, 
DOMID_SELF, 0, L1_PROT_RO)
 
 #endif /* _ARCH_MM_H_ */
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/lib/sys.c
--- a/extras/mini-os/lib/sys.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/lib/sys.c  Mon Jun 02 11:35:39 2008 +0900
@@ -81,6 +81,7 @@
 
 #define NOFILE 32
 extern int xc_evtchn_close(int fd);
+extern int xc_interface_close(int fd);
 
 pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER;
 struct file files[NOFILE] = {
@@ -259,10 +260,7 @@ int read(int fd, void *buf, size_t nbyte
            }
            return ret * sizeof(union xenfb_in_event);
         }
-       case FTYPE_NONE:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_BLK:
+       default:
            break;
     }
     printk("read(%d): Bad descriptor\n", fd);
@@ -295,12 +293,7 @@ int write(int fd, const void *buf, size_
        case FTYPE_TAP:
            netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes);
            return nbytes;
-       case FTYPE_NONE:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
     printk("write(%d): Bad descriptor\n", fd);
@@ -351,15 +344,7 @@ int fsync(int fd) {
            }
            return 0;
        }
-       case FTYPE_NONE:
-       case FTYPE_CONSOLE:
-       case FTYPE_SOCKET:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_TAP:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
     printk("fsync(%d): Bad descriptor\n", fd);
@@ -391,6 +376,9 @@ int close(int fd)
            files[fd].type = FTYPE_NONE;
            return res;
        }
+       case FTYPE_XC:
+           xc_interface_close(fd);
+           return 0;
        case FTYPE_EVTCHN:
             xc_evtchn_close(fd);
             return 0;
@@ -495,13 +483,7 @@ int fstat(int fd, struct stat *buf)
            stat_from_fs(buf, &stat);
            return 0;
        }
-       case FTYPE_NONE:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_TAP:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
 
@@ -522,15 +504,7 @@ int ftruncate(int fd, off_t length)
            }
            return 0;
        }
-       case FTYPE_NONE:
-       case FTYPE_CONSOLE:
-       case FTYPE_SOCKET:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_TAP:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
 
@@ -636,9 +610,10 @@ static const char file_types[] = {
     [FTYPE_NONE]       = 'N',
     [FTYPE_CONSOLE]    = 'C',
     [FTYPE_FILE]       = 'F',
-    [FTYPE_XENBUS]     = 'X',
+    [FTYPE_XENBUS]     = 'S',
+    [FTYPE_XC]         = 'X',
     [FTYPE_EVTCHN]     = 'E',
-    [FTYPE_SOCKET]     = 'S',
+    [FTYPE_SOCKET]     = 's',
     [FTYPE_TAP]                = 'T',
     [FTYPE_BLK]                = 'B',
     [FTYPE_KBD]                = 'K',
@@ -722,7 +697,7 @@ static int select_poll(int nfds, fd_set 
     /* Then see others as well. */
     for (i = 0; i < nfds; i++) {
        switch(files[i].type) {
-       case FTYPE_NONE:
+       default:
            if (FD_ISSET(i, readfds) || FD_ISSET(i, writefds) || FD_ISSET(i, 
exceptfds))
                printk("bogus fd %d in select\n", i);
            /* Fallthrough.  */
@@ -1083,14 +1058,20 @@ int clock_gettime(clockid_t clk_id, stru
 
 void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t 
offset)
 {
+    unsigned long n = (length + PAGE_SIZE - 1) / PAGE_SIZE;
+
     ASSERT(!start);
-    length = (length + PAGE_SIZE - 1) & PAGE_MASK;
     ASSERT(prot == (PROT_READ|PROT_WRITE));
-    ASSERT(flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON));
-    ASSERT(fd == -1);
+    ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == 
(MAP_PRIVATE|MAP_ANON)))
+        || (fd != -1 && flags == MAP_SHARED));
     ASSERT(offset == 0);
 
-    return map_zero(length / PAGE_SIZE, 1);
+    if (fd == -1)
+        return map_zero(n, 1);
+    else if (files[fd].type == FTYPE_XC) {
+        unsigned long zero = 0;
+        return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, 0, 0);
+    } else ASSERT(0);
 }
 #if defined(__x86_64__) || defined(__ia64__)
 __typeof__(mmap) mmap64 __attribute__((__alias__("mmap")));
@@ -1110,7 +1091,7 @@ int munmap(void *start, size_t length)
        call[i].args[0] = (unsigned long) &data[i];
        call[i].args[1] = 0;
        call[i].args[2] = 0;
-       call[i].args[3] = UVMF_INVLPG | UVMF_ALL;
+       call[i].args[3] = UVMF_INVLPG;
     }
 
     ret = HYPERVISOR_multicall(call, n);
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/lib/xmalloc.c
--- a/extras/mini-os/lib/xmalloc.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/lib/xmalloc.c      Mon Jun 02 11:35:39 2008 +0900
@@ -127,7 +127,7 @@ static void *xmalloc_whole_pages(size_t 
     if ( hdr == NULL )
         return NULL;
 
-    hdr->size = (1 << (pageorder + PAGE_SHIFT));
+    hdr->size = (1UL << (pageorder + PAGE_SHIFT));
     /* Debugging aid. */
     hdr->freelist.next = hdr->freelist.prev = NULL;
 
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/main-caml.c
--- a/extras/mini-os/main-caml.c        Mon Jun 02 11:35:02 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * Caml bootstrap
- *
- * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, January 2008
- */
-
-#include <stdio.h>
-#include <errno.h>
-
-#include <caml/mlvalues.h>
-#include <caml/callback.h>
-#include <unistd.h>
-
-/* Ugly binary compatibility with Linux */
-FILE *_stderr asm("stderr");
-int *__errno_location;
-/* Will probably break everything, probably need to fetch from glibc */
-void *__ctype_b_loc;
-
-int main(int argc, char *argv[], char *envp[])
-{
-    value *val;
-
-    /* Get current thread's value */
-    _stderr = stderr;
-    __errno_location = &errno;
-
-    printf("starting caml\n");
-
-    /* Wait before things might hang up */
-    sleep(1);
-
-    caml_startup(argv);
-    val = caml_named_value("main");
-    if (!val) {
-        printf("Couldn't find Caml main");
-        return 1;
-    }
-    caml_callback(*val, Val_int(0));
-    printf("callback returned\n");
-    return 0;
-}
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/main.c
--- a/extras/mini-os/main.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/main.c     Mon Jun 02 11:35:39 2008 +0900
@@ -4,7 +4,6 @@
  * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, October 2007
  */
 
-#ifdef HAVE_LIBC
 #include <os.h>
 #include <sched.h>
 #include <console.h>
@@ -19,8 +18,8 @@ extern int main(int argc, char *argv[], 
 extern int main(int argc, char *argv[], char *envp[]);
 extern void __libc_init_array(void);
 extern void __libc_fini_array(void);
-
-struct thread *main_thread;
+extern unsigned long __CTOR_LIST__[];
+extern unsigned long __DTOR_LIST__[];
 
 #if 0
 #include <stdio.h>
@@ -147,6 +146,8 @@ static void call_main(void *p)
 
     __libc_init_array();
     environ = envp;
+    for (i = 1; i <= __CTOR_LIST__[0]; i++)
+        ((void((*)(void)))__CTOR_LIST__[i]) ();
     tzset();
 
     exit(main(argc, argv, envp));
@@ -154,6 +155,10 @@ static void call_main(void *p)
 
 void _exit(int ret)
 {
+    int i;
+
+    for (i = 1; i <= __DTOR_LIST__[0]; i++)
+        ((void((*)(void)))__DTOR_LIST__[i]) ();
     close_all_files();
     __libc_fini_array();
     printk("main returned %d\n", ret);
@@ -172,4 +177,3 @@ int app_main(start_info_t *si)
     main_thread = create_thread("main", call_main, si);
     return 0;
 }
-#endif
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/mm.c       Mon Jun 02 11:35:39 2008 +0900
@@ -58,7 +58,7 @@ static unsigned long *alloc_bitmap;
 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
 
 #define allocated_in_map(_pn) \
-(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
+(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1UL<<((_pn)&(PAGES_PER_MAPWORD-1))))
 
 /*
  * Hint regarding bitwise arithmetic in map_{alloc,free}:
@@ -80,13 +80,13 @@ static void map_alloc(unsigned long firs
 
     if ( curr_idx == end_idx )
     {
-        alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
+        alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
     }
     else 
     {
-        alloc_bitmap[curr_idx] |= -(1<<start_off);
-        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
-        alloc_bitmap[curr_idx] |= (1<<end_off)-1;
+        alloc_bitmap[curr_idx] |= -(1UL<<start_off);
+        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
+        alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
     }
 }
 
@@ -102,13 +102,13 @@ static void map_free(unsigned long first
 
     if ( curr_idx == end_idx )
     {
-        alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
+        alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
     }
     else 
     {
-        alloc_bitmap[curr_idx] &= (1<<start_off)-1;
+        alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
         while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
-        alloc_bitmap[curr_idx] &= -(1<<end_off);
+        alloc_bitmap[curr_idx] &= -(1UL<<end_off);
     }
 }
 
@@ -178,7 +178,7 @@ USED static void print_chunks(void *star
         head = free_head[order];
         while(!FREELIST_EMPTY(head))
         {
-            for(count = 0; count < 1<< head->level; count++)
+            for(count = 0; count < 1UL<< head->level; count++)
             {
                 if(count + virt_to_pfn(head) - pfn_start < 1000)
                     chunks[count + virt_to_pfn(head) - pfn_start] = current;
@@ -235,13 +235,13 @@ static void init_page_allocator(unsigned
          * Next chunk is limited by alignment of min, but also
          * must not be bigger than remaining range.
          */
-        for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
-            if ( min & (1<<i) ) break;
+        for ( i = PAGE_SHIFT; (1UL<<(i+1)) <= range; i++ )
+            if ( min & (1UL<<i) ) break;
 
 
         ch = (chunk_head_t *)min;
-        min   += (1<<i);
-        range -= (1<<i);
+        min   += (1UL<<i);
+        range -= (1UL<<i);
         ct = (chunk_tail_t *)min-1;
         i -= PAGE_SHIFT;
         ch->level       = i;
@@ -280,8 +280,8 @@ unsigned long alloc_pages(int order)
     {
         /* Split into two equal parts. */
         i--;
-        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
-        spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
+        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1UL<<(i+PAGE_SHIFT)));
+        spare_ct = (chunk_tail_t *)((char *)spare_ch + 
(1UL<<(i+PAGE_SHIFT)))-1;
 
         /* Create new header for spare chunk. */
         spare_ch->level = i;
@@ -294,7 +294,7 @@ unsigned long alloc_pages(int order)
         free_head[i] = spare_ch;
     }
     
-    map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1<<order);
+    map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1UL<<order);
 
     return((unsigned long)alloc_ch);
 
@@ -312,16 +312,16 @@ void free_pages(void *pointer, int order
     unsigned long mask;
     
     /* First free the chunk */
-    map_free(virt_to_pfn(pointer), 1 << order);
+    map_free(virt_to_pfn(pointer), 1UL << order);
     
     /* Create free chunk */
     freed_ch = (chunk_head_t *)pointer;
-    freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
+    freed_ct = (chunk_tail_t *)((char *)pointer + (1UL<<(order + 
PAGE_SHIFT)))-1;
     
     /* Now, possibly we can conseal chunks together */
     while(order < FREELIST_SIZE)
     {
-        mask = 1 << (order + PAGE_SHIFT);
+        mask = 1UL << (order + PAGE_SHIFT);
         if((unsigned long)freed_ch & mask) 
         {
             to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/sched.c
--- a/extras/mini-os/sched.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/sched.c    Mon Jun 02 11:35:39 2008 +0900
@@ -57,6 +57,8 @@ struct thread *idle_thread = NULL;
 struct thread *idle_thread = NULL;
 LIST_HEAD(exited_threads);
 static int threads_started;
+
+struct thread *main_thread;
 
 void inline print_runqueue(void)
 {
diff -r d2a239224cb2 -r f1508348ffab stubdom/Makefile
--- a/stubdom/Makefile  Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/Makefile  Mon Jun 02 11:35:39 2008 +0900
@@ -37,7 +37,7 @@ export PATH:=$(CROSS_PREFIX)/bin:$(PATH)
 export PATH:=$(CROSS_PREFIX)/bin:$(PATH)
 
 .PHONY: all
-all: qemu-stubdom
+all: ioemu-stubdom c-stubdom
 
 ################
 # Cross-binutils
@@ -174,6 +174,7 @@ mk-symlinks:
          ([ ! -h config-host.h ] || rm -f config-host.h) && \
          ([ ! -h config-host.mak ] || rm -f config-host.mak) )
        [ -h mini-os ] || ln -sf ../extras/mini-os .
+       [ -h mini-os/include/xen ] || ln -sf ../../../xen/include/public 
mini-os/include/xen
 
 #######
 # libxc
@@ -198,40 +199,41 @@ ioemu: cross-zlib cross-libpci mk-symlin
 ######
 
 .PHONY: caml
-caml:
-       $(MAKE) -C $@
+caml: mk-symlinks
+       $(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs 
 
 ###
 # C
 ###
 
 .PHONY: c
-c:
-       $(MAKE) -C $@
+c: mk-symlinks
+       $(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs 
 
 ########
 # minios
 ########
 
-.PHONY: qemu-stubdom
-qemu-stubdom: mk-symlinks lwip-cvs libxc ioemu
-       $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs QEMUDIR=$(CURDIR)/ioemu
-
+.PHONY: ioemu-stubdom
+ioemu-stubdom: lwip-cvs libxc ioemu
+       $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs 
APP_OBJS="$(CURDIR)/ioemu/i386-dm-stubdom/qemu.a 
$(CURDIR)/ioemu/i386-dm-stubdom/libqemu.a"
+
+CAMLLIB = $(shell ocamlc -where)
 .PHONY: caml-stubdom
-caml-stubdom: mk-symlinks lwip-cvs libxc cross-libpci caml
-       $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CAMLDIR=$(CURDIR)/caml
+caml-stubdom: lwip-cvs libxc caml
+       $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs 
APP_OBJS="$(CURDIR)/caml/main-c.o $(CURDIR)/caml/main-caml.o 
$(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
 
 .PHONY: c-stubdom
-c-stubdom: mk-symlinks lwip-cvs libxc cross-libpci c
-       $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CDIR=$(CURDIR)/c
+c-stubdom: lwip-cvs libxc c
+       $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs 
APP_OBJS=$(CURDIR)/c/main.a
 
 #########
 # install
 #########
 
-install: mini-os/mini-os.gz
+install: mini-os/ioemu-stubdom.gz
        $(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin"
-       $(INSTALL_PROG) mini-os/mini-os.gz 
"$(DESTDIR)/usr/lib/xen/boot/stubdom.gz"
+       $(INSTALL_PROG) $< "$(DESTDIR)/usr/lib/xen/boot/stubdom.gz"
 
 #######
 # clean
@@ -242,6 +244,7 @@ clean:
 clean:
        -$(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs clean
        $(MAKE) -C caml clean
+       $(MAKE) -C c clean
        rm -fr libxc ioemu mini-os include
 
 # clean the cross-compilation result
diff -r d2a239224cb2 -r f1508348ffab stubdom/c/Makefile
--- a/stubdom/c/Makefile        Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/c/Makefile        Mon Jun 02 11:35:39 2008 +0900
@@ -2,7 +2,12 @@ XEN_ROOT = ../..
 
 include $(XEN_ROOT)/Config.mk
 
-main.a: main.o
+all: main.a
+
+main-c.c:
+       ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@
+
+main.a: main-c.o main.o 
        $(AR) cr $@ $^
 
 clean:
diff -r d2a239224cb2 -r f1508348ffab stubdom/c/main.c
--- a/stubdom/c/main.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/c/main.c  Mon Jun 02 11:35:39 2008 +0900
@@ -1,4 +1,6 @@
 #include <stdio.h>
+#include <unistd.h>
+
 int main(void) {
         sleep(2);
         printf("Hello, world!\n");
diff -r d2a239224cb2 -r f1508348ffab stubdom/caml/Makefile
--- a/stubdom/caml/Makefile     Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/caml/Makefile     Mon Jun 02 11:35:39 2008 +0900
@@ -1,12 +1,20 @@ XEN_ROOT = ../..
 XEN_ROOT = ../..
 
 include $(XEN_ROOT)/Config.mk
+
+CAMLLIB = $(shell ocamlc -where)
+DEF_CPPFLAGS += -I$(CAMLLIB)
 
 OCAMLFIND=ocamlfind
 OCAMLOPT=ocamlopt
 
 OBJS := hello.cmx
 LIBS := 
+
+all: main-c.o main-caml.o caml.o
+
+main-c.c:
+       ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@
 
 %.cmx: %.ml
        $(OCAMLFIND) $(OCAMLOPT) -c $< -o $@
@@ -15,4 +23,4 @@ caml.o: $(OBJS)
        $(OCAMLFIND) $(OCAMLOPT) $(LIBS) $^ -output-obj -o $@
 
 clean:
-       rm -f *.o *.cmx *.cmi
+       rm -f *.a *.o *.cmx *.cmi
diff -r d2a239224cb2 -r f1508348ffab stubdom/caml/main-caml.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/stubdom/caml/main-caml.c  Mon Jun 02 11:35:39 2008 +0900
@@ -0,0 +1,42 @@
+/*
+ * Caml bootstrap
+ *
+ * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, January 2008
+ */
+
+#include <stdio.h>
+#include <errno.h>
+
+#include <caml/mlvalues.h>
+#include <caml/callback.h>
+#include <unistd.h>
+
+/* Ugly binary compatibility with Linux */
+FILE *_stderr asm("stderr");
+int *__errno_location;
+/* Will probably break everything, probably need to fetch from glibc */
+void *__ctype_b_loc;
+
+int main(int argc, char *argv[], char *envp[])
+{
+    value *val;
+
+    /* Get current thread's value */
+    _stderr = stderr;
+    __errno_location = &errno;
+
+    printf("starting caml\n");
+
+    /* Wait before things might hang up */
+    sleep(1);
+
+    caml_startup(argv);
+    val = caml_named_value("main");
+    if (!val) {
+        printf("Couldn't find Caml main");
+        return 1;
+    }
+    caml_callback(*val, Val_int(0));
+    printf("callback returned\n");
+    return 0;
+}
diff -r d2a239224cb2 -r f1508348ffab tools/examples/Makefile
--- a/tools/examples/Makefile   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/examples/Makefile   Mon Jun 02 11:35:39 2008 +0900
@@ -9,9 +9,7 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig.
 # Xen configuration dir and configs to go there.
 XEN_CONFIG_DIR = /etc/xen
 XEN_CONFIGS = xend-config.sxp
-XEN_CONFIGS += xend-config-xenapi.sxp
 XEN_CONFIGS += xm-config.xml
-XEN_CONFIGS += xm-config-xenapi.xml
 XEN_CONFIGS += xmexample1 
 XEN_CONFIGS += xmexample2
 XEN_CONFIGS += xmexample.hvm
diff -r d2a239224cb2 -r f1508348ffab tools/examples/xend-config-xenapi.sxp
--- a/tools/examples/xend-config-xenapi.sxp     Mon Jun 02 11:35:02 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,196 +0,0 @@
-# -*- sh -*-
-
-#
-# Xend configuration file.
-#
-
-# This example configuration is appropriate for an installation that 
-# utilizes a bridged network configuration. Access to xend via http
-# is disabled.  
-
-# Commented out entries show the default for that entry, unless otherwise
-# specified.
-
-#(logfile /var/log/xen/xend.log)
-#(loglevel DEBUG)
-
-
-# The Xen-API server configuration.  (Please note that this server is
-# available as an UNSUPPORTED PREVIEW in Xen 3.0.4, and should not be relied
-# upon).
-#
-# This value configures the ports, interfaces, and access controls for the
-# Xen-API server.  Each entry in the list starts with either unix, a port
-# number, or an address:port pair.  If this is "unix", then a UDP socket is
-# opened, and this entry applies to that.  If it is a port, then Xend will
-# listen on all interfaces on that TCP port, and if it is an address:port
-# pair, then Xend will listen on the specified port, using the interface with
-# the specified address.
-#
-# The subsequent string configures the user-based access control for the
-# listener in question.  This can be one of "none" or "pam", indicating either
-# that users should be allowed access unconditionally, or that the local
-# Pluggable Authentication Modules configuration should be used.  If this
-# string is missing or empty, then "pam" is used.
-#
-# The final string gives the host-based access control for that listener. If
-# this is missing or empty, then all connections are accepted.  Otherwise,
-# this should be a space-separated sequence of regular expressions; any host
-# with a fully-qualified domain name or an IP address that matches one of
-# these regular expressions will be accepted.
-#
-# Example: listen on TCP port 9363 on all interfaces, accepting connections
-# only from machines in example.com or localhost, and allow access through
-# the unix domain socket unconditionally:
-#
-   (xen-api-server ((9363 none)))
-#                    (unix none)))
-#
-# Optionally, the TCP Xen-API server can use SSL by specifying the private
-# key and certificate location:
-#
-#                    (9367 pam '' /etc/xen/xen-api.key /etc/xen/xen-api.crt)
-#
-# Default:
-#   (xen-api-server ((unix)))
-
-
-#(xend-http-server no)
-#(xend-unix-server no)
-#(xend-tcp-xmlrpc-server no)
-#(xend-unix-xmlrpc-server yes)
-#(xend-relocation-server no)
-(xend-relocation-server yes)
-
-#(xend-unix-path /var/lib/xend/xend-socket)
-
-
-# Address and port xend should use for the legacy TCP XMLRPC interface, 
-# if xend-tcp-xmlrpc-server is set.
-#(xend-tcp-xmlrpc-server-address 'localhost')
-#(xend-tcp-xmlrpc-server-port 8006)
-
-# SSL key and certificate to use for the legacy TCP XMLRPC interface.
-# Setting these will mean that this port serves only SSL connections as
-# opposed to plaintext ones.
-#(xend-tcp-xmlrpc-server-ssl-key-file  /etc/xen/xmlrpc.key)
-#(xend-tcp-xmlrpc-server-ssl-cert-file /etc/xen/xmlrpc.crt)
-
-
-# Port xend should use for the HTTP interface, if xend-http-server is set.
-#(xend-port            8000)
-
-# Port xend should use for the relocation interface, if xend-relocation-server
-# is set.
-#(xend-relocation-port 8002)
-
-# Address xend should listen on for HTTP connections, if xend-http-server is
-# set.
-# Specifying 'localhost' prevents remote connections.
-# Specifying the empty string '' (the default) allows all connections.
-#(xend-address '')
-#(xend-address localhost)
-
-# Address xend should listen on for relocation-socket connections, if
-# xend-relocation-server is set.
-# Meaning and default as for xend-address above.
-#(xend-relocation-address '')
-
-# The hosts allowed to talk to the relocation port.  If this is empty (the
-# default), then all connections are allowed (assuming that the connection
-# arrives on a port and interface on which we are listening; see
-# xend-relocation-port and xend-relocation-address above).  Otherwise, this
-# should be a space-separated sequence of regular expressions.  Any host with
-# a fully-qualified domain name or an IP address that matches one of these
-# regular expressions will be accepted.
-#
-# For example:
-#  (xend-relocation-hosts-allow '^localhost$ ^.*\\.example\\.org$')
-#
-#(xend-relocation-hosts-allow '')
-(xend-relocation-hosts-allow '^localhost$ ^localhost\\.localdomain$')
-
-# The limit (in kilobytes) on the size of the console buffer
-#(console-limit 1024)
-
-##
-# To bridge network traffic, like this:
-#
-# dom0: ----------------- bridge -> real eth0 -> the network
-#                            |
-# domU: fake eth0 -> vifN.0 -+
-#
-# use
-#
-# (network-script network-bridge)
-#
-# Your default ethernet device is used as the outgoing interface, by default. 
-# To use a different one (e.g. eth1) use
-#
-# (network-script 'network-bridge netdev=eth1')
-#
-# The bridge is named xenbr0, by default.  To rename the bridge, use
-#
-# (network-script 'network-bridge bridge=<name>')
-#
-# It is possible to use the network-bridge script in more complicated
-# scenarios, such as having two outgoing interfaces, with two bridges, and
-# two fake interfaces per guest domain.  To do things like this, write
-# yourself a wrapper script, and call network-bridge from it, as appropriate.
-#
-(network-script network-bridge)
-
-# The script used to control virtual interfaces.  This can be overridden on a
-# per-vif basis when creating a domain or a configuring a new vif.  The
-# vif-bridge script is designed for use with the network-bridge script, or
-# similar configurations.
-#
-# If you have overridden the bridge name using
-# (network-script 'network-bridge bridge=<name>') then you may wish to do the
-# same here.  The bridge name can also be set when creating a domain or
-# configuring a new vif, but a value specified here would act as a default.
-#
-# If you are using only one bridge, the vif-bridge script will discover that,
-# so there is no need to specify it explicitly.
-#
-(vif-script vif-bridge)
-
-
-## Use the following if network traffic is routed, as an alternative to the
-# settings for bridged networking given above.
-#(network-script network-route)
-#(vif-script     vif-route)
-
-
-## Use the following if network traffic is routed with NAT, as an alternative
-# to the settings for bridged networking given above.
-#(network-script network-nat)
-#(vif-script     vif-nat)
-
-# dom0-min-mem is the lowest permissible memory level (in MB) for dom0.
-# This is a minimum both for auto-ballooning (as enabled by
-# enable-dom0-ballooning below) and for xm mem-set when applied to dom0.
-(dom0-min-mem 196)
-
-# Whether to enable auto-ballooning of dom0 to allow domUs to be created.
-# If enable-dom0-ballooning = no, dom0 will never balloon out.
-(enable-dom0-ballooning yes)
-
-# In SMP system, dom0 will use dom0-cpus # of CPUS
-# If dom0-cpus = 0, dom0 will take all cpus available
-(dom0-cpus 0)
-
-# Whether to enable core-dumps when domains crash.
-#(enable-dump no)
-
-# The tool used for initiating virtual TPM migration
-#(external-migration-tool '')
-
-# The interface for VNC servers to listen on. Defaults
-# to 127.0.0.1  To restore old 'listen everywhere' behaviour
-# set this to 0.0.0.0
-#(vnc-listen '127.0.0.1')
-
-# The default password for VNC console on HVM domain.
-# Empty string is no authentication.
-(vncpasswd '')
diff -r d2a239224cb2 -r f1508348ffab tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/examples/xend-config.sxp    Mon Jun 02 11:35:39 2008 +0900
@@ -59,6 +59,7 @@
 #(xend-unix-xmlrpc-server yes)
 #(xend-relocation-server no)
 (xend-relocation-server yes)
+#(xend-relocation-ssl-server no)
 
 #(xend-unix-path /var/lib/xend/xend-socket)
 
@@ -82,14 +83,17 @@
 # is set.
 #(xend-relocation-port 8002)
 
-# Whether to use tls when relocating.
-#(xend-relocation-tls no)
-
-# SSL key and certificate to use for the relocation interface.
-# Setting these will mean that this port serves only SSL connections as
-# opposed to plaintext ones.
+# Port xend should use for the ssl relocation interface, if
+# xend-relocation-ssl-server is set.
+#(xend-relocation-ssl-port 8003)
+
+# SSL key and certificate to use for the ssl relocation interface, if
+# xend-relocation-ssl-server is set.
 #(xend-relocation-server-ssl-key-file  /etc/xen/xmlrpc.key)
 #(xend-relocation-server-ssl-cert-file  /etc/xen/xmlrpc.crt)
+
+# Whether to use ssl as default when relocating.
+#(xend-relocation-ssl no)
 
 # Address xend should listen on for HTTP connections, if xend-http-server is
 # set.
diff -r d2a239224cb2 -r f1508348ffab tools/examples/xm-config-xenapi.xml
--- a/tools/examples/xm-config-xenapi.xml       Mon Jun 02 11:35:02 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-<!--
-
-Copyright (C) 2006 XenSource Inc.
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of version 2.1 of the GNU Lesser General Public
-License as published by the Free Software Foundation.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
--->
-
-<!--
-
-This is a configuration file for xm; it should be placed in
-/etc/xen/xm-config.xml.  If this file is missing, then xm will fall back to
-the normal behaviour that's in Xen 3.0.4 and below.  The settings here are
-most useful for experimenting with the Xen-API preview in Xen 3.0.4.
-
--->
-
-<xm>
-  <!-- The server element describes how to talk to Xend.  The type may be 
-       Xen-API or LegacyXMLRPC (the default).  The URI is that of the
-       server; you might try http://server:9363/ or
-       httpu:///var/run/xend/xen-api.sock for the Xen-API, or
-       httpu:///var/run/xend/xmlrpc.sock for the legacy server.
-
-       The username and password attributes will be used to log in if Xen-API
-       is being used.
-    -->
-  <server type='Xen-API'
-          uri='http://localhost:9363/'
-          username='me'
-          password='mypassword' />
-</xm>
diff -r d2a239224cb2 -r f1508348ffab tools/firmware/hvmloader/util.c
--- a/tools/firmware/hvmloader/util.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/firmware/hvmloader/util.c   Mon Jun 02 11:35:39 2008 +0900
@@ -609,7 +609,7 @@ uint16_t get_cpu_mhz(void)
 uint16_t get_cpu_mhz(void)
 {
     struct xen_add_to_physmap xatp;
-    struct shared_info *shared_info = (struct shared_info *)0xa0000;
+    struct shared_info *shared_info = (struct shared_info *)0xfffff000;
     struct vcpu_time_info *info = &shared_info->vcpu_info[0].time;
     uint64_t cpu_khz;
     uint32_t tsc_to_nsec_mul, version;
@@ -619,7 +619,7 @@ uint16_t get_cpu_mhz(void)
     if ( cpu_mhz != 0 )
         return cpu_mhz;
 
-    /* Map shared-info page to 0xa0000 (i.e., overlap VGA hole). */
+    /* Map shared-info page. */
     xatp.domid = DOMID_SELF;
     xatp.space = XENMAPSPACE_shared_info;
     xatp.idx   = 0;
@@ -643,14 +643,6 @@ uint16_t get_cpu_mhz(void)
         cpu_khz = cpu_khz << -tsc_shift;
     else
         cpu_khz = cpu_khz >> tsc_shift;
-
-    /* Get the VGA MMIO hole back by remapping shared info to scratch. */
-    xatp.domid = DOMID_SELF;
-    xatp.space = XENMAPSPACE_shared_info;
-    xatp.idx   = 0;
-    xatp.gpfn  = 0xfffff; /* scratch pfn */
-    if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
-        BUG();
 
     cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000);
     return cpu_mhz;
diff -r d2a239224cb2 -r f1508348ffab tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/firmware/rombios/rombios.c  Mon Jun 02 11:35:39 2008 +0900
@@ -2225,26 +2225,12 @@ void interactive_bootkey()
     Bit16u i;
     Bit8u scan = 0;
 
-    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\nPress F10 to 
select boot device.\n");
-    for (i = 3; i > 0; i--)
-    {
-        scan = wait(WAIT_HZ, 0);
-        switch (scan) {
-        case 0x3D:
-        case 0x3E:
-        case 0x3F:
-        case 0x58:
-            break;
-        case 0x44:
-            scan = bootmenu(inb_cmos(0x3d) & 0x0f);
-            break;
-        default:
-            scan = 0;
-            break;
-        }
-        if (scan != 0)
-            break;
-    }
+    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO,
+                "\n\nPress F10 to select boot device.\n");
+
+    scan = wait(1, 0);
+    if (scan == 0x44)
+        scan = bootmenu(inb_cmos(0x3d) & 0x0f);
 
     /* set the default based on the keypress or menu */
     switch(scan) {
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/Makefile.target
--- a/tools/ioemu/Makefile.target       Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/Makefile.target       Mon Jun 02 11:35:39 2008 +0900
@@ -358,6 +358,13 @@ endif
 endif
 
 ifdef CONFIG_STUBDOM
+VL_OBJS+=main-qemu.o
+CFLAGS += -DCONFIG_QEMU
+main-qemu.c:
+       ln -s $(XEN_ROOT)/extras/mini-os/main.c $@
+endif
+
+ifdef CONFIG_STUBDOM
 #CONFIG_PASSTHROUGH=1
 else
   ifeq (,$(wildcard /usr/include/pci))
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/hw/cirrus_vga.c       Mon Jun 02 11:35:39 2008 +0900
@@ -281,8 +281,6 @@ typedef struct PCICirrusVGAState {
 
 static uint8_t rop_to_index[256];
     
-void *shared_vram;
-
 /***************************************
  *
  *  prototypes.
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/vl.c  Mon Jun 02 11:35:39 2008 +0900
@@ -7807,8 +7807,9 @@ int main(int argc, char **argv)
                 bdrv_set_type_hint(fd_table[i], BDRV_TYPE_FLOPPY);
             }
             if (fd_filename[i] != '\0') {
-                if (bdrv_open(fd_table[i], fd_filename[i],
-                              snapshot ? BDRV_O_SNAPSHOT : 0) < 0) {
+                if (bdrv_open2(fd_table[i], fd_filename[i],
+                               snapshot ? BDRV_O_SNAPSHOT : 0,
+                               &bdrv_raw) < 0) {
                     fprintf(stderr, "qemu: could not open floppy disk image 
'%s'\n",
                             fd_filename[i]);
                     exit(1);
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/vl.h  Mon Jun 02 11:35:39 2008 +0900
@@ -153,8 +153,6 @@ int unset_mm_mapping(int xc_handle, uint
                      unsigned int address_bits, unsigned long *extent_start);
 int set_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages,
                    unsigned int address_bits, unsigned long *extent_start);
-
-extern void *shared_vram;
 
 extern FILE *logfile;
 
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/xenstore.c    Mon Jun 02 11:35:39 2008 +0900
@@ -112,7 +112,7 @@ void xenstore_parse_domain_config(int hv
 
     e = xs_directory(xsh, XBT_NULL, buf, &num);
     if (e == NULL)
-        goto out;
+        num = 0;
 
     for (i = 0; i < num; i++) {
         /* read the backend path */
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/Makefile
--- a/tools/libxc/Makefile      Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/Makefile      Mon Jun 02 11:35:39 2008 +0900
@@ -5,11 +5,9 @@ MINOR    = 0
 MINOR    = 0
 
 CTRL_SRCS-y       :=
-ifneq ($(stubdom),y)
 CTRL_SRCS-y       += xc_core.c
 CTRL_SRCS-$(CONFIG_X86) += xc_core_x86.c
 CTRL_SRCS-$(CONFIG_IA64) += xc_core_ia64.c
-endif
 CTRL_SRCS-y       += xc_domain.c
 CTRL_SRCS-y       += xc_evtchn.c
 CTRL_SRCS-y       += xc_misc.c
@@ -21,9 +19,7 @@ CTRL_SRCS-y       += xc_csched.c
 CTRL_SRCS-y       += xc_csched.c
 CTRL_SRCS-y       += xc_tbuf.c
 CTRL_SRCS-y       += xc_pm.c
-ifneq ($(stubdom),y)
 CTRL_SRCS-y       += xc_resume.c
-endif
 CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c
 CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c
 CTRL_SRCS-$(CONFIG_SunOS) += xc_solaris.c
@@ -33,15 +29,12 @@ CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios.
 
 GUEST_SRCS-y :=
 GUEST_SRCS-y += xg_private.c
-ifneq ($(stubdom),y)
 GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
 GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
-endif
 
 VPATH = ../../xen/common/libelf
 CFLAGS += -I../../xen/common/libelf
 
-ifneq ($(stubdom),y)
 GUEST_SRCS-y += libelf-tools.c libelf-loader.c
 GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c
 
@@ -55,7 +48,6 @@ GUEST_SRCS-$(CONFIG_X86)     += xc_dom_x
 GUEST_SRCS-$(CONFIG_X86)     += xc_dom_x86.c
 GUEST_SRCS-$(CONFIG_X86)     += xc_cpuid_x86.c
 GUEST_SRCS-$(CONFIG_IA64)    += xc_dom_ia64.c
-endif
 
 -include $(XEN_TARGET_ARCH)/Makefile
 
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xc_core.c     Mon Jun 02 11:35:39 2008 +0900
@@ -64,7 +64,7 @@
 /* string table */
 struct xc_core_strtab {
     char       *strings;
-    uint16_t    current;
+    uint16_t    length;
     uint16_t    max;
 };
 
@@ -89,7 +89,7 @@ xc_core_strtab_init(void)
 
     /* index 0 represents none */
     strtab->strings[0] = '\0';
-    strtab->current = 1;
+    strtab->length = 1;
 
     return strtab;
 }
@@ -107,14 +107,14 @@ xc_core_strtab_get(struct xc_core_strtab
     uint16_t ret = 0;
     uint16_t len = strlen(name) + 1;
 
-    if ( strtab->current > UINT16_MAX - len )
+    if ( strtab->length > UINT16_MAX - len )
     {
         PERROR("too long string table");
         errno = E2BIG;
         return ret;
     }
     
-    if ( strtab->current + len > strtab->max )
+    if ( strtab->length + len > strtab->max )
     {
         char *tmp;
         if ( strtab->max > UINT16_MAX / 2 )
@@ -135,9 +135,9 @@ xc_core_strtab_get(struct xc_core_strtab
         strtab->max *= 2;
     }
 
-    ret = strtab->current;
-    strcpy(strtab->strings + strtab->current, name);
-    strtab->current += len;
+    ret = strtab->length;
+    strcpy(strtab->strings + strtab->length, name);
+    strtab->length += len;
     return ret;
 }
 
@@ -669,7 +669,7 @@ xc_domain_dumpcore_via_callback(int xc_h
     offset += filesz;
 
     /* fixing up section header string table section header */
-    filesz = strtab->current;
+    filesz = strtab->length;
     sheaders->shdrs[strtab_idx].sh_offset = offset;
     sheaders->shdrs[strtab_idx].sh_size = filesz;
 
@@ -829,7 +829,7 @@ copy_done:
         goto out;
 
     /* elf section header string table: .shstrtab */
-    sts = dump_rtn(args, strtab->strings, strtab->current);
+    sts = dump_rtn(args, strtab->strings, strtab->length);
     if ( sts != 0 )
         goto out;
 
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xc_domain.c   Mon Jun 02 11:35:39 2008 +0900
@@ -767,6 +767,37 @@ int xc_assign_device(
     return do_domctl(xc_handle, &domctl);
 }
 
+int xc_get_device_group(
+    int xc_handle,
+    uint32_t domid,
+    uint32_t machine_bdf,
+    uint32_t max_sdevs,
+    uint32_t *num_sdevs,
+    uint32_t *sdev_array)
+{
+    int rc;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_get_device_group;
+    domctl.domain = (domid_t)domid;
+
+    domctl.u.get_device_group.machine_bdf = machine_bdf;
+    domctl.u.get_device_group.max_sdevs = max_sdevs;
+
+    set_xen_guest_handle(domctl.u.get_device_group.sdev_array, sdev_array);
+
+    if ( lock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)) != 0 )
+    {
+        PERROR("Could not lock memory for xc_get_device_group\n");
+        return -ENOMEM;
+    }
+    rc = do_domctl(xc_handle, &domctl);
+    unlock_pages(sdev_array, max_sdevs * sizeof(*sdev_array));
+
+    *num_sdevs = domctl.u.get_device_group.num_sdevs;
+    return rc;
+}
+
 int xc_test_assign_device(
     int xc_handle,
     uint32_t domid,
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_minios.c
--- a/tools/libxc/xc_minios.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xc_minios.c   Mon Jun 02 11:35:39 2008 +0900
@@ -35,11 +35,12 @@ extern struct wait_queue_head event_queu
 
 int xc_interface_open(void)
 {
-    return 0;
+    return alloc_fd(FTYPE_XC);
 }
 
 int xc_interface_close(int xc_handle)
 {
+    files[xc_handle].type = FTYPE_NONE;
     return 0;
 }
 
@@ -79,8 +80,12 @@ int xc_map_foreign_ranges(int xc_handle,
 int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
                           privcmd_mmap_entry_t *entries, int nr)
 {
-    printf("xc_map_foreign_ranges, TODO\n");
-    do_exit();
+    int i;
+    for (i = 0; i < nr; i++) {
+       unsigned long mfn = entries[i].mfn;
+        do_map_frames(entries[i].va, &mfn, entries[i].npages, 0, 1, dom, 0, 
L1_PROT);
+    }
+    return 0;
 }
 
 int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall)
@@ -294,6 +299,12 @@ int xc_evtchn_unmask(int xce_handle, evt
     return 0;
 }
 
+/* Optionally flush file to disk and discard page cache */
+void discard_file_cache(int fd, int flush)
+{
+    if (flush)
+        fsync(fd);
+}
 /*
  * Local variables:
  * mode: C
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xenctrl.h     Mon Jun 02 11:35:39 2008 +0900
@@ -955,6 +955,13 @@ int xc_assign_device(int xc_handle,
                      uint32_t domid,
                      uint32_t machine_bdf);
 
+int xc_get_device_group(int xc_handle,
+                     uint32_t domid,
+                     uint32_t machine_bdf,
+                     uint32_t max_sdevs,
+                     uint32_t *num_sdevs,
+                     uint32_t *sdev_array);
+
 int xc_test_assign_device(int xc_handle,
                           uint32_t domid,
                           uint32_t machine_bdf);
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xg_private.c  Mon Jun 02 11:35:39 2008 +0900
@@ -11,22 +11,6 @@
 #include <malloc.h>
 
 #include "xg_private.h"
-
-int lock_pages(void *addr, size_t len)
-{
-    int e = 0;
-#ifndef __sun__
-    e = mlock(addr, len);
-#endif
-    return (e);
-}
-
-void unlock_pages(void *addr, size_t len)
-{
-#ifndef __sun__
-    safe_munlock(addr, len);
-#endif
-}
 
 char *xc_read_image(const char *filename, unsigned long *size)
 {
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jun 02 11:35:39 2008 +0900
@@ -106,7 +106,7 @@ static PyObject *pyxc_domain_create(XcOb
     static char *kwd_list[] = { "domid", "ssidref", "handle", "flags", 
"target", NULL };
 
     if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|iiOii", kwd_list,
-                                     &dom, &ssidref, &pyhandle, &flags, 
&target))
+                                      &dom, &ssidref, &pyhandle, &flags, 
&target))
         return NULL;
     if ( pyhandle != NULL )
     {
@@ -434,44 +434,44 @@ static PyObject *pyxc_linux_build(XcObje
     dom->vhpt_size_log2 = vhpt;
 
     if ( xc_dom_linux_build(self->xc_handle, dom, domid, mem_mb, image,
-                           ramdisk, flags, store_evtchn, &store_mfn,
-                           console_evtchn, &console_mfn) != 0 ) {
-       goto out;
+                            ramdisk, flags, store_evtchn, &store_mfn,
+                            console_evtchn, &console_mfn) != 0 ) {
+        goto out;
     }
 
     if ( !(elfnote_dict = PyDict_New()) )
-       goto out;
+        goto out;
     
     for ( i = 0; i < ARRAY_SIZE(dom->parms.elf_notes); i++ )
     {
-       switch ( dom->parms.elf_notes[i].type )
+        switch ( dom->parms.elf_notes[i].type )
         {
-       case XEN_ENT_NONE:
-           continue;
-       case XEN_ENT_LONG:
-           elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num);
-           break;
-       case XEN_ENT_STR:
-           elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str);
-           break;
-       }
-       PyDict_SetItemString(elfnote_dict,
-                            dom->parms.elf_notes[i].name,
-                            elfnote);
-       Py_DECREF(elfnote);
+        case XEN_ENT_NONE:
+            continue;
+        case XEN_ENT_LONG:
+            elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num);
+            break;
+        case XEN_ENT_STR:
+            elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str);
+            break;
+        }
+        PyDict_SetItemString(elfnote_dict,
+                             dom->parms.elf_notes[i].name,
+                             elfnote);
+        Py_DECREF(elfnote);
     }
 
     ret = Py_BuildValue("{s:i,s:i,s:N}",
-                       "store_mfn", store_mfn,
-                       "console_mfn", console_mfn,
-                       "notes", elfnote_dict);
+                        "store_mfn", store_mfn,
+                        "console_mfn", console_mfn,
+                        "notes", elfnote_dict);
 
     if ( dom->arch_hooks->native_protocol )
     {
-       PyObject *native_protocol =
-           Py_BuildValue("s", dom->arch_hooks->native_protocol);
-       PyDict_SetItemString(ret, "native_protocol", native_protocol);
-       Py_DECREF(native_protocol);
+        PyObject *native_protocol =
+            Py_BuildValue("s", dom->arch_hooks->native_protocol);
+        PyDict_SetItemString(ret, "native_protocol", native_protocol);
+        Py_DECREF(native_protocol);
     }
 
     xc_dom_release(dom);
@@ -556,7 +556,7 @@ static PyObject *pyxc_test_assign_device
 {
     uint32_t dom;
     char *pci_str;
-    uint32_t bdf = 0;
+    int32_t bdf = 0;
     int seg, bus, dev, func;
 
     static char *kwd_list[] = { "domid", "pci", NULL };
@@ -571,12 +571,141 @@ static PyObject *pyxc_test_assign_device
         bdf |= (func & 0x7) << 8;
 
         if ( xc_test_assign_device(self->xc_handle, dom, bdf) != 0 )
+        {
+            if (errno == ENOSYS)
+                bdf = -1;
             break;
-
+        }
         bdf = 0;
     }
 
     return Py_BuildValue("i", bdf);
+}
+
+static PyObject *pyxc_assign_device(XcObject *self,
+                                    PyObject *args,
+                                    PyObject *kwds)
+{
+    uint32_t dom;
+    char *pci_str;
+    int32_t bdf = 0;
+    int seg, bus, dev, func;
+
+    static char *kwd_list[] = { "domid", "pci", NULL };
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list,
+                                      &dom, &pci_str) )
+        return NULL;
+
+    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
+    {
+        bdf |= (bus & 0xff) << 16;
+        bdf |= (dev & 0x1f) << 11;
+        bdf |= (func & 0x7) << 8;
+
+        if ( xc_assign_device(self->xc_handle, dom, bdf) != 0 )
+        {
+            if (errno == ENOSYS)
+                bdf = -1;
+            break;
+        }
+        bdf = 0;
+    }
+
+    return Py_BuildValue("i", bdf);
+}
+
+static PyObject *pyxc_deassign_device(XcObject *self,
+                                      PyObject *args,
+                                      PyObject *kwds)
+{
+    uint32_t dom;
+    char *pci_str;
+    int32_t bdf = 0;
+    int seg, bus, dev, func;
+
+    static char *kwd_list[] = { "domid", "pci", NULL };
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list,
+                                      &dom, &pci_str) )
+        return NULL;
+
+    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
+    {
+        bdf |= (bus & 0xff) << 16;
+        bdf |= (dev & 0x1f) << 11;
+        bdf |= (func & 0x7) << 8;
+
+        if ( xc_deassign_device(self->xc_handle, dom, bdf) != 0 )
+        {
+            if (errno == ENOSYS)
+                bdf = -1;
+            break;
+        }
+        bdf = 0;
+    }
+
+    return Py_BuildValue("i", bdf);
+}
+
+static PyObject *pyxc_get_device_group(XcObject *self,
+                                         PyObject *args)
+{
+    domid_t domid;
+    uint32_t bdf = 0;
+    uint32_t max_sdevs, num_sdevs;
+    int seg, bus, dev, func, rc, i;
+    PyObject *Pystr;
+    char *group_str;
+    char dev_str[9];
+    uint32_t *sdev_array;
+
+    if ( !PyArg_ParseTuple(args, "iiiii", &domid, &seg, &bus, &dev, &func) )
+        return NULL;
+
+    /* Maximum allowed siblings device number per group */
+    max_sdevs = 1024;
+
+    if ( (sdev_array = malloc(max_sdevs * sizeof(*sdev_array))) == NULL )
+        return PyErr_NoMemory();
+    memset(sdev_array, 0, max_sdevs * sizeof(*sdev_array));
+
+    bdf |= (bus & 0xff) << 16;
+    bdf |= (dev & 0x1f) << 11;
+    bdf |= (func & 0x7) << 8;
+
+    rc = xc_get_device_group(self->xc_handle,
+        domid, bdf, max_sdevs, &num_sdevs, sdev_array);
+
+    if ( rc < 0 )
+    {
+      free(sdev_array); 
+      return pyxc_error_to_exception();
+    }
+
+    if ( !num_sdevs )
+    {
+       free(sdev_array);
+       return Py_BuildValue("s", "");
+    }
+
+    if ( (group_str = malloc(num_sdevs * sizeof(dev_str))) == NULL )
+        return PyErr_NoMemory();
+    memset(group_str, '\0', num_sdevs * sizeof(dev_str));
+
+    for ( i = 0; i < num_sdevs; i++ )
+    {
+        bus = (sdev_array[i] >> 16) & 0xff;
+        dev = (sdev_array[i] >> 11) & 0x1f;
+        func = (sdev_array[i] >> 8) & 0x7;
+        sprintf(dev_str, "%02x:%02x.%x,", bus, dev, func);
+        strcat(group_str, dev_str);
+    }
+
+    Pystr = Py_BuildValue("s", group_str);
+
+    free(sdev_array);
+    free(group_str);
+
+    return Pystr;
 }
 
 #ifdef __ia64__
@@ -729,8 +858,8 @@ static PyObject *pyxc_hvm_build(XcObject
     int memsize, vcpus = 1, acpi = 0, apic = 1;
 
     static char *kwd_list[] = { "domid",
-                               "memsize", "image", "vcpus", "acpi",
-                               "apic", NULL };
+                                "memsize", "image", "vcpus", "acpi",
+                                "apic", NULL };
     if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
                                       &dom, &memsize,
                                       &image, &vcpus, &acpi, &apic) )
@@ -782,8 +911,8 @@ static PyObject *pyxc_evtchn_alloc_unbou
 }
 
 static PyObject *pyxc_evtchn_reset(XcObject *self,
-                                  PyObject *args,
-                                  PyObject *kwds)
+                                   PyObject *args,
+                                   PyObject *kwds)
 {
     uint32_t dom;
 
@@ -947,11 +1076,11 @@ static PyObject *pyxc_physinfo(XcObject 
 
     for ( i = 0; i < info.nr_nodes; i++ )
     {
-       xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
-       PyList_Append(node_to_memory_obj,
-           PyInt_FromLong(free_heap / 1024));
-    }
-       
+        xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
+        PyList_Append(node_to_memory_obj,
+                      PyInt_FromLong(free_heap / 1024));
+    }
+
     PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
     PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
  
@@ -1517,6 +1646,17 @@ static PyMethodDef pyxc_methods[] = {
       " value   [long]:     Value of param.\n"
       "Returns: [int] 0 on success.\n" },
 
+    { "get_device_group",
+      (PyCFunction)pyxc_get_device_group,
+      METH_VARARGS, "\n"
+      "get sibling devices infomation.\n"
+      " dom     [int]:      Domain to assign device to.\n"
+      " seg     [int]:      PCI segment.\n"
+      " bus     [int]:      PCI bus.\n"
+      " dev     [int]:      PCI dev.\n"
+      " func    [int]:      PCI func.\n"
+      "Returns: [string]:   Sibling devices \n" },
+
      { "test_assign_device",
        (PyCFunction)pyxc_test_assign_device,
        METH_VARARGS | METH_KEYWORDS, "\n"
@@ -1524,6 +1664,22 @@ static PyMethodDef pyxc_methods[] = {
        " dom     [int]:      Identifier of domain to build into.\n"
        " pci_str [str]:      PCI devices.\n"
        "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" 
},
+
+     { "assign_device",
+       (PyCFunction)pyxc_assign_device,
+       METH_VARARGS | METH_KEYWORDS, "\n"
+       "Assign device to IOMMU domain.\n"
+       " dom     [int]:      Domain to assign device to.\n"
+       " pci_str [str]:      PCI devices.\n"
+       "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" 
},
+
+     { "deassign_device",
+       (PyCFunction)pyxc_deassign_device,
+       METH_VARARGS | METH_KEYWORDS, "\n"
+       "Deassign device from IOMMU domain.\n"
+       " dom     [int]:      Domain to deassign device from.\n"
+       " pci_str [str]:      PCI devices.\n"
+       "Returns: [int] 0 on success, or device bdf that can't be 
deassigned.\n" },
   
     { "sched_id_get",
       (PyCFunction)pyxc_sched_id_get,
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py  Mon Jun 02 11:35:39 2008 +0900
@@ -1759,12 +1759,12 @@ class XendAPI(object):
         xendom = XendDomain.instance()
         xeninfo = xendom.get_vm_by_uuid(vm_ref)
 
-        resource = other_config.get("resource", 0)
         port = other_config.get("port", 0)
-        node = other_config.get("node", 0)
+        node = other_config.get("node", -1)
+        ssl = other_config.get("ssl", None)
         
         xendom.domain_migrate(xeninfo.getDomid(), destination_url,
-                              bool(live), resource, port, node)
+                              bool(live), port, node, ssl)
         return xen_api_success_void()
 
     def VM_save(self, _, vm_ref, dest, checkpoint):
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendDomain.py       Mon Jun 02 11:35:39 2008 +0900
@@ -43,8 +43,8 @@ from xen.xend.XendConstants import DOM_S
 from xen.xend.XendConstants import DOM_STATE_HALTED, DOM_STATE_PAUSED
 from xen.xend.XendConstants import DOM_STATE_RUNNING, DOM_STATE_SUSPENDED
 from xen.xend.XendConstants import DOM_STATE_SHUTDOWN, DOM_STATE_UNKNOWN
-from xen.xend.XendConstants import DOM_STATE_CRASHED
-from xen.xend.XendConstants import TRIGGER_TYPE
+from xen.xend.XendConstants import DOM_STATE_CRASHED, HVM_PARAM_ACPI_S_STATE
+from xen.xend.XendConstants import TRIGGER_TYPE, TRIGGER_S3RESUME
 from xen.xend.XendDevices import XendDevices
 from xen.xend.XendAPIConstants import *
 
@@ -1258,22 +1258,24 @@ class XendDomain:
 
         return val       
 
-    def domain_migrate(self, domid, dst, live=False, port=0, node=-1):
+    def domain_migrate(self, domid, dst, live=False, port=0, node=-1, 
ssl=None):
         """Start domain migration.
         
         @param domid: Domain ID or Name
         @type domid: int or string.
         @param dst: Destination IP address
         @type dst: string
-        @keyword port: relocation port on destination
-        @type port: int        
         @keyword live: Live migration
         @type live: bool
+        @keyword port: relocation port on destination
+        @type port: int
+        @keyword node: use node number for target
+        @type node: int
+        @keyword ssl: use ssl connection
+        @type ssl: bool
         @rtype: None
-        @keyword node: use node number for target
-        @rtype: int 
         @raise XendError: Failed to migrate
-        @raise XendInvalidDomain: Domain is not valid        
+        @raise XendInvalidDomain: Domain is not valid
         """
 
         dominfo = self.domain_lookup_nr(domid)
@@ -1294,13 +1296,14 @@ class XendDomain:
             """ Make sure there's memory free for enabling shadow mode """
             dominfo.checkLiveMigrateMemory()
 
-        if port == 0:
-            port = xoptions.get_xend_relocation_port()
-
-        tls = xoptions.get_xend_relocation_tls()
-        if tls:
+        if ssl is None:
+            ssl = xoptions.get_xend_relocation_ssl()
+
+        if ssl:
             from OpenSSL import SSL
             from xen.web import connection
+            if port == 0:
+                port = xoptions.get_xend_relocation_ssl_port()
             try:
                 ctx = SSL.Context(SSL.SSLv23_METHOD)
                 sock = SSL.Connection(ctx,
@@ -1328,6 +1331,8 @@ class XendDomain:
             os.close(p2cread)
             os.close(p2cwrite)
         else:
+            if port == 0:
+                port = xoptions.get_xend_relocation_port()
             try:
                 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                 # When connecting to our ssl enabled relocation server using a
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Mon Jun 02 11:35:39 2008 +0900
@@ -2091,28 +2091,28 @@ class XendDomainInfo:
                         xc.vcpu_setaffinity(self.domid, v, 
self.info['cpus'][v])
             else:
                 def find_relaxed_node(node_list):
-                    import sys 
+                    import sys
+                    nr_nodes = info['nr_nodes']
                     if node_list is None:
-                        node_list = range(0, info['nr_nodes'])
+                        node_list = range(0, nr_nodes)
                     nodeload = [0]
-                    nodeload = nodeload * info['nr_nodes']
+                    nodeload = nodeload * nr_nodes
                     from xen.xend import XendDomain
                     doms = XendDomain.instance().list('all')
-                    for dom in doms:
+                    for dom in filter (lambda d: d.domid != self.domid, doms):
                         cpuinfo = dom.getVCPUInfo()
                         for vcpu in sxp.children(cpuinfo, 'vcpu'):
-                            def vinfo(n, t):
-                                return t(sxp.child_value(vcpu, n))
-                            cpumap = vinfo('cpumap', list)
-                            for i in node_list:
+                            if sxp.child_value(vcpu, 'online') == 0: continue
+                            cpumap = list(sxp.child_value(vcpu,'cpumap'))
+                            for i in range(0, nr_nodes):
                                 node_cpumask = info['node_to_cpu'][i]
                                 for j in node_cpumask:
                                     if j in cpumap:
                                         nodeload[i] += 1
                                         break
-                    for i in node_list:
-                        if len(info['node_to_cpu'][i]) > 0:
-                            nodeload[i] = int(nodeload[i] / 
len(info['node_to_cpu'][i]))
+                    for i in range(0, nr_nodes):
+                        if len(info['node_to_cpu'][i]) > 0 and i in node_list:
+                            nodeload[i] = int(nodeload[i] * 16 / 
len(info['node_to_cpu'][i]))
                         else:
                             nodeload[i] = sys.maxint
                     index = nodeload.index( min(nodeload) )    
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendOptions.py
--- a/tools/python/xen/xend/XendOptions.py      Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendOptions.py      Mon Jun 02 11:35:39 2008 +0900
@@ -72,6 +72,9 @@ class XendOptions:
     """Default for the flag indicating whether xend should run a relocation 
server."""
     xend_relocation_server_default = 'no'
 
+    """Default for the flag indicating whether xend should run a ssl 
relocation server."""
+    xend_relocation_ssl_server_default = 'no'
+
     """Default interface address the xend relocation server listens at. """
     xend_relocation_address_default = ''
 
@@ -80,6 +83,9 @@ class XendOptions:
 
     """Default port xend serves relocation at. """
     xend_relocation_port_default = 8002
+
+    """Default port xend serves ssl relocation at. """
+    xend_relocation_ssl_port_default = 8003
 
     xend_relocation_hosts_allow_default = ''
 
@@ -192,6 +198,12 @@ class XendOptions:
         return self.get_config_bool("xend-relocation-server",
                                     self.xend_relocation_server_default)
 
+    def get_xend_relocation_ssl_server(self):
+        """Get the flag indicating whether xend should run a ssl relocation 
server.
+        """
+        return self.get_config_bool("xend-relocation-ssl-server",
+                                    self.xend_relocation_ssl_server_default)
+
     def get_xend_relocation_server_ssl_key_file(self):
         return self.get_config_string("xend-relocation-server-ssl-key-file")
 
@@ -209,10 +221,17 @@ class XendOptions:
         return self.get_config_int('xend-relocation-port',
                                    self.xend_relocation_port_default)
 
-    def get_xend_relocation_tls(self):
-        """Whether to use tls when relocating.
-        """
-        return self.get_config_bool('xend-relocation-tls', 'no')
+    def get_xend_relocation_ssl_port(self):
+        """Get the port xend listens at for ssl connection to its relocation
+        server.
+        """
+        return self.get_config_int('xend-relocation-ssl-port',
+                                   self.xend_relocation_ssl_port_default)
+
+    def get_xend_relocation_ssl(self):
+        """Whether to use ssl when relocating.
+        """
+        return self.get_config_bool('xend-relocation-ssl', 'no')
 
     def get_xend_relocation_hosts_allow(self):
         return self.get_config_string("xend-relocation-hosts-allow",
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/server/SrvDomain.py Mon Jun 02 11:35:39 2008 +0900
@@ -115,7 +115,9 @@ class SrvDomain(SrvDir):
                     [['dom',         'int'],
                      ['destination', 'str'],
                      ['live',        'int'],
-                     ['port',        'int']])
+                     ['port',        'int'],
+                     ['node',        'int'],
+                     ['ssl',         'int']])
         return fn(req.args, {'dom': self.dom.domid})
 
     def op_pincpu(self, _, req):
@@ -215,6 +217,11 @@ class SrvDomain(SrvDir):
 
     def op_vcpuinfo(self, _1, req):
         return self.call(self.dom.getVCPUInfo, [], req)
+
+
+    def op_reset(self, _, req):
+        self.acceptCommand(req)
+        return self.xd.domain_reset(self.dom.getName())
 
 
     def render_POST(self, req):
@@ -257,6 +264,10 @@ class SrvDomain(SrvDir):
         req.write('</form>')
 
         req.write('<form method="post" action="%s">' % url)
+        req.write('<input type="submit" name="op" value="reset">')
+        req.write('</form>')
+
+        req.write('<form method="post" action="%s">' % url)
         req.write('<input type="submit" name="op" value="shutdown">')
         req.write('<input type="radio" name="reason" value="poweroff" 
checked>Poweroff')
         req.write('<input type="radio" name="reason" value="halt">Halt')
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/server/pciif.py     Mon Jun 02 11:35:39 2008 +0900
@@ -226,6 +226,39 @@ class PciController(DevController):
 
         return sxpr    
 
+    def CheckSiblingDevices(self, domid, dev):
+        """ Check if all sibling devices of dev are owned by pciback
+        """
+        if not self.vm.info.is_hvm():
+            return
+
+        group_str = xc.get_device_group(domid, dev.domain, dev.bus, dev.slot, 
dev.func)
+        if group_str == "":
+            return
+
+        #group string format xx:xx.x,xx:xx.x,
+        devstr_len = group_str.find(',')
+        for i in range(0, len(group_str), devstr_len + 1):
+            (bus, slotfunc) = group_str[i:i + devstr_len].split(':')
+            (slot, func) = slotfunc.split('.')
+            b = parse_hex(bus)
+            d = parse_hex(slot)
+            f = parse_hex(func)
+            try:
+                sdev = PciDevice(dev.domain, b, d, f)
+            except Exception, e:
+                #no dom0 drivers bound to sdev
+                continue
+
+            if sdev.driver!='pciback':
+                raise VmError(("pci: PCI Backend does not own\n "+ \
+                    "sibling device %s of device %s\n"+ \
+                    "See the pciback.hide kernel "+ \
+                    "command-line parameter or\n"+ \
+                    "bind your slot/device to the PCI backend using sysfs" \
+                    )%(sdev.name, dev.name))
+        return
+
     def setupOneDevice(self, domain, bus, slot, func):
         """ Attach I/O resources for device to frontend domain
         """
@@ -245,8 +278,19 @@ class PciController(DevController):
                     "bind your slot/device to the PCI backend using sysfs" \
                     )%(dev.name))
 
+        self.CheckSiblingDevices(fe_domid, dev)
+
         PCIQuirk(dev.vendor, dev.device, dev.subvendor, dev.subdevice, domain, 
                 bus, slot, func)
+
+        if not self.vm.info.is_hvm():
+            # Setup IOMMU device assignment
+            pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func)
+            bdf = xc.assign_device(fe_domid, pci_str)
+            if bdf > 0:
+                raise VmError("Failed to assign device to IOMMU (%x:%x.%x)"
+                              % (bus, slot, func))
+            log.debug("pci: assign device %x:%x.%x" % (bus, slot, func))
 
         for (start, size) in dev.ioports:
             log.debug('pci: enabling ioport 0x%x/0x%x'%(start,size))
@@ -329,6 +373,14 @@ class PciController(DevController):
                     "command-line parameter or\n"+ \
                     "bind your slot/device to the PCI backend using sysfs" \
                     )%(dev.name))
+
+        if not self.vm.info.is_hvm():
+            pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func)
+            bdf = xc.deassign_device(fe_domid, pci_str)
+            if bdf > 0:
+                raise VmError("Failed to deassign device from IOMMU (%x:%x.%x)"
+                              % (bus, slot, func))
+            log.debug("pci: deassign device %x:%x.%x" % (bus, slot, func))
 
         for (start, size) in dev.ioports:
             log.debug('pci: disabling ioport 0x%x/0x%x'%(start,size))
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/server/relocate.py  Mon Jun 02 11:35:39 2008 +0900
@@ -142,16 +142,22 @@ def listenRelocation():
     if xoptions.get_xend_unix_server():
         path = '/var/lib/xend/relocation-socket'
         unix.UnixListener(path, RelocationProtocol)
+
+    interface = xoptions.get_xend_relocation_address()
+
+    hosts_allow = xoptions.get_xend_relocation_hosts_allow()
+    if hosts_allow == '':
+        hosts_allow = None
+    else:
+        hosts_allow = map(re.compile, hosts_allow.split(" "))
+
     if xoptions.get_xend_relocation_server():
         port = xoptions.get_xend_relocation_port()
-        interface = xoptions.get_xend_relocation_address()
+        tcp.TCPListener(RelocationProtocol, port, interface = interface,
+                        hosts_allow = hosts_allow)
 
-        hosts_allow = xoptions.get_xend_relocation_hosts_allow()
-        if hosts_allow == '':
-            hosts_allow = None
-        else:
-            hosts_allow = map(re.compile, hosts_allow.split(" "))
-
+    if xoptions.get_xend_relocation_ssl_server():
+        port = xoptions.get_xend_relocation_ssl_port()
         ssl_key_file = xoptions.get_xend_relocation_server_ssl_key_file()
         ssl_cert_file = xoptions.get_xend_relocation_server_ssl_cert_file()
 
@@ -161,5 +167,5 @@ def listenRelocation():
                                ssl_key_file = ssl_key_file,
                                ssl_cert_file = ssl_cert_file)
         else:
-            tcp.TCPListener(RelocationProtocol, port, interface = interface,
-                            hosts_allow = hosts_allow)
+            raise XendError("ssl_key_file or ssl_cert_file for ssl relocation 
server is missing.")
+
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xm/migrate.py    Mon Jun 02 11:35:39 2008 +0900
@@ -47,6 +47,10 @@ gopts.opt('node', short='n', val='nodenu
           fn=set_int, default=-1,
           use="Use specified NUMA node on target.")
 
+gopts.opt('ssl', short='s',
+          fn=set_true, default=None,
+          use="Use ssl connection for migration.")
+
 def help():
     return str(gopts)
     
@@ -65,11 +69,13 @@ def main(argv):
         vm_ref = get_single_vm(dom)
         other_config = {
             "port":     opts.vals.port,
-            "node":     opts.vals.node
+            "node":     opts.vals.node,
+            "ssl":      opts.vals.ssl
             }
         server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live),
                                  other_config)
     else:
         server.xend.domain.migrate(dom, dst, opts.vals.live,
                                    opts.vals.port,
-                                   opts.vals.node)
+                                   opts.vals.node,
+                                   opts.vals.ssl)
diff -r d2a239224cb2 -r f1508348ffab tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Mon Jun 02 11:35:39 2008 +0900
@@ -655,12 +655,20 @@ unsigned long long xenstat_vbd_wr_reqs(x
 
 static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int 
domain_id)
 {
-       char path[80];
-
-       snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id);
-       
+       char path[80], *vmpath;
+
+       snprintf(path, sizeof(path),"/local/domain/%i/vm", domain_id);
+
+       vmpath = xs_read(handle->xshandle, XBT_NULL, path, NULL);
+
+       if (vmpath == NULL)
+               return NULL;
+
+       snprintf(path, sizeof(path),"%s/name", vmpath);
+       free(vmpath);
+
        return xs_read(handle->xshandle, XBT_NULL, path, NULL);
-}      
+}
 
 /* Remove specified entry from list of domains */
 static void xenstat_prune_domain(xenstat_node *node, unsigned int entry)
diff -r d2a239224cb2 -r f1508348ffab 
unmodified_drivers/linux-2.6/platform-pci/evtchn.c
--- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c        Mon Jun 02 
11:35:02 2008 +0900
+++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c        Mon Jun 02 
11:35:39 2008 +0900
@@ -284,7 +284,7 @@ static irqreturn_t evtchn_interrupt(int 
 
 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
        /* Clear master flag /before/ clearing selector flag. */
-       rmb();
+       wmb();
 #endif
        l1 = xchg(&v->evtchn_pending_sel, 0);
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Mon Jun 02 11:35:39 2008 +0900
@@ -173,6 +173,8 @@ static inline u32 ticks_elapsed(u32 t1, 
 {
     if ( t2 >= t1 )
         return (t2 - t1);
+    else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) )
+        return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
     else
         return ((0xFFFFFFFF - t1) + t2);
 }
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/acpi/power.c Mon Jun 02 11:35:39 2008 +0900
@@ -238,9 +238,17 @@ static void tboot_sleep(u8 sleep_state)
 static void tboot_sleep(u8 sleep_state)
 {
    uint32_t shutdown_type;
-   
-   *((struct acpi_sleep_info *)(unsigned long)g_tboot_shared->acpi_sinfo) =
-       acpi_sinfo;
+
+   g_tboot_shared->acpi_sinfo.pm1a_cnt =
+                           (uint16_t)acpi_sinfo.pm1a_cnt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1b_cnt =
+                           (uint16_t)acpi_sinfo.pm1b_cnt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1a_evt =
+                           (uint16_t)acpi_sinfo.pm1a_evt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1b_evt =
+                           (uint16_t)acpi_sinfo.pm1b_evt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
+   g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
 
    switch ( sleep_state )
    {
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/cpu/amd.c    Mon Jun 02 11:35:39 2008 +0900
@@ -74,9 +74,11 @@ static void disable_c1_ramping(void)
 static void disable_c1_ramping(void) 
 {
        u8 pmm7;
-       int node;
-
-       for (node=0; node < NR_CPUS; node++) {
+       int node, nr_nodes;
+
+       /* Read the number of nodes from the first Northbridge. */
+       nr_nodes = ((pci_conf_read32(0, 0x18, 0x0, 0x60)>>4)&0x07)+1;
+       for (node = 0; node < nr_nodes; node++) {
                /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
                pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87);
                /* Invalid read means we've updated every Northbridge. */
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/crash.c
--- a/xen/arch/x86/crash.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/crash.c      Mon Jun 02 11:35:39 2008 +0900
@@ -102,6 +102,7 @@ void machine_crash_shutdown(void)
     hvm_cpu_down();
 
     info = kexec_crash_save_info();
+    info->xen_phys_start = xen_phys_start;
     info->dom0_pfn_to_mfn_frame_list_list =
         arch_get_pfn_to_mfn_frame_list_list(dom0);
 }
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/domain.c     Mon Jun 02 11:35:39 2008 +0900
@@ -59,8 +59,6 @@ static void default_idle(void);
 static void default_idle(void);
 void (*pm_idle) (void) = default_idle;
 
-static void unmap_vcpu_info(struct vcpu *v);
-
 static void paravirt_ctxt_switch_from(struct vcpu *v);
 static void paravirt_ctxt_switch_to(struct vcpu *v);
 
@@ -432,8 +430,6 @@ void vcpu_destroy(struct vcpu *v)
 {
     if ( is_pv_32on64_vcpu(v) )
         release_compat_l4(v);
-
-    unmap_vcpu_info(v);
 
     if ( is_hvm_vcpu(v) )
         hvm_vcpu_destroy(v);
@@ -825,8 +821,15 @@ int arch_set_info_guest(
 
 void arch_vcpu_reset(struct vcpu *v)
 {
-    destroy_gdt(v);
-    vcpu_destroy_pagetables(v);
+    if ( !is_hvm_vcpu(v) )
+    {
+        destroy_gdt(v);
+        vcpu_destroy_pagetables(v);
+    }
+    else
+    {
+        vcpu_end_shutdown_deferral(v);
+    }
 }
 
 /* 
@@ -1857,16 +1860,19 @@ int domain_relinquish_resources(struct d
         /* Tear down paging-assistance stuff. */
         paging_teardown(d);
 
-        /* Drop the in-use references to page-table bases. */
         for_each_vcpu ( d, v )
+        {
+            /* Drop the in-use references to page-table bases. */
             vcpu_destroy_pagetables(v);
 
-        /*
-         * Relinquish GDT mappings. No need for explicit unmapping of the LDT
-         * as it automatically gets squashed when the guest's mappings go away.
-         */
-        for_each_vcpu(d, v)
+            /*
+             * Relinquish GDT mappings. No need for explicit unmapping of the
+             * LDT as it automatically gets squashed with the guest mappings.
+             */
             destroy_gdt(v);
+
+            unmap_vcpu_info(v);
+        }
 
         d->arch.relmem = RELMEM_xen_l4;
         /* fallthrough */
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/domctl.c     Mon Jun 02 11:35:39 2008 +0900
@@ -526,14 +526,54 @@ long arch_do_domctl(
     }
     break;
 
+    case XEN_DOMCTL_get_device_group:
+    {
+        struct domain *d;
+        u32 max_sdevs;
+        u8 bus, devfn;
+        XEN_GUEST_HANDLE_64(uint32) sdevs;
+        int num_sdevs;
+
+        ret = -ENOSYS;
+        if ( !iommu_enabled )
+            break;
+
+        ret = -EINVAL;
+        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
+            break;
+
+        bus = (domctl->u.get_device_group.machine_bdf >> 16) & 0xff;
+        devfn = (domctl->u.get_device_group.machine_bdf >> 8) & 0xff;
+        max_sdevs = domctl->u.get_device_group.max_sdevs;
+        sdevs = domctl->u.get_device_group.sdev_array;
+
+        num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs);
+        if ( num_sdevs < 0 )
+        {
+            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
+            ret = -EFAULT;
+            domctl->u.get_device_group.num_sdevs = 0;
+        }
+        else
+        {
+            ret = 0;
+            domctl->u.get_device_group.num_sdevs = num_sdevs;
+        }
+        if ( copy_to_guest(u_domctl, domctl, 1) )
+            ret = -EFAULT;
+        rcu_unlock_domain(d);
+    }
+    break;
+
     case XEN_DOMCTL_test_assign_device:
     {
         u8 bus, devfn;
 
-        ret = -EINVAL;
+        ret = -ENOSYS;
         if ( !iommu_enabled )
             break;
 
+        ret = -EINVAL;
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
 
@@ -553,10 +593,11 @@ long arch_do_domctl(
         struct domain *d;
         u8 bus, devfn;
 
-        ret = -EINVAL;
+        ret = -ENOSYS;
         if ( !iommu_enabled )
             break;
 
+        ret = -EINVAL;
         if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) )
         {
             gdprintk(XENLOG_ERR,
@@ -565,6 +606,12 @@ long arch_do_domctl(
         }
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
+
+        if ( !iommu_pv_enabled && !is_hvm_domain(d) )
+        {
+            ret = -ENOSYS;
+            break;
+        }
 
         if ( device_assigned(bus, devfn) )
         {
@@ -576,7 +623,7 @@ long arch_do_domctl(
 
         ret = assign_device(d, bus, devfn);
         gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
-            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         put_domain(d);
     }
     break;
@@ -586,10 +633,11 @@ long arch_do_domctl(
         struct domain *d;
         u8 bus, devfn;
 
-        ret = -EINVAL;
+        ret = -ENOSYS;
         if ( !iommu_enabled )
             break;
 
+        ret = -EINVAL;
         if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) )
         {
             gdprintk(XENLOG_ERR,
@@ -599,9 +647,16 @@ long arch_do_domctl(
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
 
+        if ( !iommu_pv_enabled && !is_hvm_domain(d) )
+        {
+            ret = -ENOSYS;
+            break;
+        }
+
         if ( !device_assigned(bus, devfn) )
             break;
 
+        ret = 0;
         deassign_device(d, bus, devfn);
         gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/hpet.c   Mon Jun 02 11:35:39 2008 +0900
@@ -29,9 +29,9 @@
 #define S_TO_NS  1000000000ULL           /* 1s  = 10^9  ns */
 #define S_TO_FS  1000000000000000ULL     /* 1s  = 10^15 fs */
 
-/* Frequency_of_TSC / frequency_of_HPET = 32 */
-#define TSC_PER_HPET_TICK 32
-#define guest_time_hpet(v) (hvm_get_guest_time(v) / TSC_PER_HPET_TICK)
+/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */
+#define STIME_PER_HPET_TICK 16
+#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK)
 
 #define HPET_ID         0x000
 #define HPET_PERIOD     0x004
@@ -192,7 +192,7 @@ static void hpet_stop_timer(HPETState *h
 
 /* the number of HPET tick that stands for
  * 1/(2^10) second, namely, 0.9765625 milliseconds */
-#define  HPET_TINY_TIME_SPAN  ((h->tsc_freq >> 10) / TSC_PER_HPET_TICK)
+#define  HPET_TINY_TIME_SPAN  ((h->stime_freq >> 10) / STIME_PER_HPET_TICK)
 
 static void hpet_set_timer(HPETState *h, unsigned int tn)
 {
@@ -558,17 +558,17 @@ void hpet_init(struct vcpu *v)
     spin_lock_init(&h->lock);
 
     h->vcpu = v;
-    h->tsc_freq = ticks_per_sec(v);
-
-    h->hpet_to_ns_scale = ((S_TO_NS * TSC_PER_HPET_TICK) << 10) / h->tsc_freq;
+    h->stime_freq = S_TO_NS;
+
+    h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) / 
h->stime_freq;
     h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale;
 
     /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */
     h->hpet.capability = 0x8086A201ULL;
 
     /* This is the number of femptoseconds per HPET tick. */
-    /* Here we define HPET's frequency to be 1/32 of the TSC's */
-    h->hpet.capability |= ((S_TO_FS*TSC_PER_HPET_TICK/h->tsc_freq) << 32);
+    /* Here we define HPET's frequency to be 1/16 of Xen system time */
+    h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32);
 
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
     {
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/hvm.c    Mon Jun 02 11:35:39 2008 +0900
@@ -296,6 +296,8 @@ int hvm_domain_initialise(struct domain 
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
     spin_lock_init(&d->arch.hvm_domain.uc_lock);
 
+    hvm_init_guest_time(d);
+
     d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
 
     hvm_init_cacheattr_region_list(d);
@@ -661,7 +663,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
         hpet_init(v);
  
         /* Init guest TSC to start from zero. */
-        hvm_set_guest_time(v, 0);
+        hvm_set_guest_tsc(v, 0);
 
         /* Can start up without SIPI-SIPI or setvcpucontext domctl. */
         v->is_initialised = 1;
@@ -1098,16 +1100,17 @@ int hvm_virtual_to_linear_addr(
     return 0;
 }
 
-static void *hvm_map(unsigned long va, int size)
+static void *hvm_map_entry(unsigned long va)
 {
     unsigned long gfn, mfn;
     p2m_type_t p2mt;
     uint32_t pfec;
 
-    if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
-    {
-        hvm_inject_exception(TRAP_page_fault, PFEC_write_access,
-                             (va + PAGE_SIZE - 1) & PAGE_MASK);
+    if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )
+    {
+        gdprintk(XENLOG_ERR, "Descriptor table entry "
+                 "straddles page boundary\n");
+        domain_crash(current->domain);
         return NULL;
     }
 
@@ -1119,7 +1122,8 @@ static void *hvm_map(unsigned long va, i
     mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
     if ( !p2m_is_ram(p2mt) )
     {
-        hvm_inject_exception(TRAP_page_fault, pfec, va);
+        gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");
+        domain_crash(current->domain);
         return NULL;
     }
 
@@ -1130,7 +1134,7 @@ static void *hvm_map(unsigned long va, i
     return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);
 }
 
-static void hvm_unmap(void *p)
+static void hvm_unmap_entry(void *p)
 {
     if ( p )
         unmap_domain_page(p);
@@ -1166,7 +1170,7 @@ static int hvm_load_segment_selector(
     if ( ((sel & 0xfff8) + 7) > desctab.limit )
         goto fail;
 
-    pdesc = hvm_map(desctab.base + (sel & 0xfff8), 8);
+    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));
     if ( pdesc == NULL )
         goto hvm_map_fail;
 
@@ -1226,7 +1230,7 @@ static int hvm_load_segment_selector(
     desc.b |= 0x100;
 
  skip_accessed_flag:
-    hvm_unmap(pdesc);
+    hvm_unmap_entry(pdesc);
 
     segr.base = (((desc.b <<  0) & 0xff000000u) |
                  ((desc.b << 16) & 0x00ff0000u) |
@@ -1242,7 +1246,7 @@ static int hvm_load_segment_selector(
     return 0;
 
  unmap_and_fail:
-    hvm_unmap(pdesc);
+    hvm_unmap_entry(pdesc);
  fail:
     hvm_inject_exception(fault_type, sel & 0xfffc, 0);
  hvm_map_fail:
@@ -1258,7 +1262,7 @@ void hvm_task_switch(
     struct segment_register gdt, tr, prev_tr, segr;
     struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
     unsigned long eflags;
-    int exn_raised;
+    int exn_raised, rc;
     struct {
         u16 back_link,__blh;
         u32 esp0;
@@ -1270,7 +1274,7 @@ void hvm_task_switch(
         u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
         u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
         u16 trace, iomap;
-    } *ptss, tss;
+    } tss = { 0 };
 
     hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
     hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
@@ -1283,11 +1287,11 @@ void hvm_task_switch(
         goto out;
     }
 
-    optss_desc = hvm_map(gdt.base + (prev_tr.sel & 0xfff8), 8);
+    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8));
     if ( optss_desc == NULL )
         goto out;
 
-    nptss_desc = hvm_map(gdt.base + (tss_sel & 0xfff8), 8);
+    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8));
     if ( nptss_desc == NULL )
         goto out;
 
@@ -1322,84 +1326,89 @@ void hvm_task_switch(
         goto out;
     }
 
-    ptss = hvm_map(prev_tr.base, sizeof(tss));
-    if ( ptss == NULL )
+    rc = hvm_copy_from_guest_virt(
+        &tss, prev_tr.base, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
         goto out;
 
     eflags = regs->eflags;
     if ( taskswitch_reason == TSW_iret )
         eflags &= ~X86_EFLAGS_NT;
 
-    ptss->cr3    = v->arch.hvm_vcpu.guest_cr[3];
-    ptss->eip    = regs->eip;
-    ptss->eflags = eflags;
-    ptss->eax    = regs->eax;
-    ptss->ecx    = regs->ecx;
-    ptss->edx    = regs->edx;
-    ptss->ebx    = regs->ebx;
-    ptss->esp    = regs->esp;
-    ptss->ebp    = regs->ebp;
-    ptss->esi    = regs->esi;
-    ptss->edi    = regs->edi;
+    tss.cr3    = v->arch.hvm_vcpu.guest_cr[3];
+    tss.eip    = regs->eip;
+    tss.eflags = eflags;
+    tss.eax    = regs->eax;
+    tss.ecx    = regs->ecx;
+    tss.edx    = regs->edx;
+    tss.ebx    = regs->ebx;
+    tss.esp    = regs->esp;
+    tss.ebp    = regs->ebp;
+    tss.esi    = regs->esi;
+    tss.edi    = regs->edi;
 
     hvm_get_segment_register(v, x86_seg_es, &segr);
-    ptss->es = segr.sel;
+    tss.es = segr.sel;
     hvm_get_segment_register(v, x86_seg_cs, &segr);
-    ptss->cs = segr.sel;
+    tss.cs = segr.sel;
     hvm_get_segment_register(v, x86_seg_ss, &segr);
-    ptss->ss = segr.sel;
+    tss.ss = segr.sel;
     hvm_get_segment_register(v, x86_seg_ds, &segr);
-    ptss->ds = segr.sel;
+    tss.ds = segr.sel;
     hvm_get_segment_register(v, x86_seg_fs, &segr);
-    ptss->fs = segr.sel;
+    tss.fs = segr.sel;
     hvm_get_segment_register(v, x86_seg_gs, &segr);
-    ptss->gs = segr.sel;
+    tss.gs = segr.sel;
     hvm_get_segment_register(v, x86_seg_ldtr, &segr);
-    ptss->ldt = segr.sel;
-
-    hvm_unmap(ptss);
-
-    ptss = hvm_map(tr.base, sizeof(tss));
-    if ( ptss == NULL )
+    tss.ldt = segr.sel;
+
+    rc = hvm_copy_to_guest_virt(
+        prev_tr.base, &tss, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
         goto out;
 
-    if ( hvm_set_cr3(ptss->cr3) )
-    {
-        hvm_unmap(ptss);
+    rc = hvm_copy_from_guest_virt(
+        &tss, tr.base, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
         goto out;
-    }
-
-    regs->eip    = ptss->eip;
-    regs->eflags = ptss->eflags | 2;
-    regs->eax    = ptss->eax;
-    regs->ecx    = ptss->ecx;
-    regs->edx    = ptss->edx;
-    regs->ebx    = ptss->ebx;
-    regs->esp    = ptss->esp;
-    regs->ebp    = ptss->ebp;
-    regs->esi    = ptss->esi;
-    regs->edi    = ptss->edi;
+
+    if ( hvm_set_cr3(tss.cr3) )
+        goto out;
+
+    regs->eip    = tss.eip;
+    regs->eflags = tss.eflags | 2;
+    regs->eax    = tss.eax;
+    regs->ecx    = tss.ecx;
+    regs->edx    = tss.edx;
+    regs->ebx    = tss.ebx;
+    regs->esp    = tss.esp;
+    regs->ebp    = tss.ebp;
+    regs->esi    = tss.esi;
+    regs->edi    = tss.edi;
 
     if ( (taskswitch_reason == TSW_call_or_int) )
     {
         regs->eflags |= X86_EFLAGS_NT;
-        ptss->back_link = prev_tr.sel;
+        tss.back_link = prev_tr.sel;
     }
 
     exn_raised = 0;
-    if ( hvm_load_segment_selector(v, x86_seg_es, ptss->es) ||
-         hvm_load_segment_selector(v, x86_seg_cs, ptss->cs) ||
-         hvm_load_segment_selector(v, x86_seg_ss, ptss->ss) ||
-         hvm_load_segment_selector(v, x86_seg_ds, ptss->ds) ||
-         hvm_load_segment_selector(v, x86_seg_fs, ptss->fs) ||
-         hvm_load_segment_selector(v, x86_seg_gs, ptss->gs) ||
-         hvm_load_segment_selector(v, x86_seg_ldtr, ptss->ldt) )
+    if ( hvm_load_segment_selector(v, x86_seg_es, tss.es) ||
+         hvm_load_segment_selector(v, x86_seg_cs, tss.cs) ||
+         hvm_load_segment_selector(v, x86_seg_ss, tss.ss) ||
+         hvm_load_segment_selector(v, x86_seg_ds, tss.ds) ||
+         hvm_load_segment_selector(v, x86_seg_fs, tss.fs) ||
+         hvm_load_segment_selector(v, x86_seg_gs, tss.gs) ||
+         hvm_load_segment_selector(v, x86_seg_ldtr, tss.ldt) )
         exn_raised = 1;
 
-    if ( (ptss->trace & 1) && !exn_raised )
+    rc = hvm_copy_to_guest_virt(
+        tr.base, &tss, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
+        exn_raised = 1;
+
+    if ( (tss.trace & 1) && !exn_raised )
         hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
-
-    hvm_unmap(ptss);
 
     tr.attr.fields.type = 0xb; /* busy 32-bit tss */
     hvm_set_segment_register(v, x86_seg_tr, &tr);
@@ -1428,8 +1437,8 @@ void hvm_task_switch(
     }
 
  out:
-    hvm_unmap(optss_desc);
-    hvm_unmap(nptss_desc);
+    hvm_unmap_entry(optss_desc);
+    hvm_unmap_entry(nptss_desc);
 }
 
 #define HVMCOPY_from_guest (0u<<0)
@@ -1632,7 +1641,7 @@ int hvm_msr_read_intercept(struct cpu_us
     switch ( ecx )
     {
     case MSR_IA32_TSC:
-        msr_content = hvm_get_guest_time(v);
+        msr_content = hvm_get_guest_tsc(v);
         break;
 
     case MSR_IA32_APICBASE:
@@ -1725,7 +1734,7 @@ int hvm_msr_write_intercept(struct cpu_u
     switch ( ecx )
     {
      case MSR_IA32_TSC:
-        hvm_set_guest_time(v, msr_content);
+        hvm_set_guest_tsc(v, msr_content);
         pt_reset(v);
         break;
 
@@ -2071,6 +2080,13 @@ void hvm_vcpu_reset_state(struct vcpu *v
     if ( v->is_initialised )
         goto out;
 
+    if ( !paging_mode_hap(d) )
+    {
+        if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
+            put_page(pagetable_get_page(v->arch.guest_table));
+        v->arch.guest_table = pagetable_null();
+    }
+
     ctxt = &v->arch.guest_context;
     memset(ctxt, 0, sizeof(*ctxt));
     ctxt->flags = VGCF_online;
@@ -2122,6 +2138,8 @@ void hvm_vcpu_reset_state(struct vcpu *v
     v->arch.hvm_vcpu.cache_tsc_offset =
         v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+
+    paging_update_paging_modes(v);
 
     v->arch.flags |= TF_kernel_mode;
     v->is_initialised = 1;
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/i8254.c  Mon Jun 02 11:35:39 2008 +0900
@@ -31,6 +31,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
+#include <asm/time.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
@@ -52,6 +53,9 @@ static int handle_pit_io(
     int dir, uint32_t port, uint32_t bytes, uint32_t *val);
 static int handle_speaker_io(
     int dir, uint32_t port, uint32_t bytes, uint32_t *val);
+
+#define get_guest_time(v) \
+   (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time())
 
 /* Compute with 96 bit intermediate result: (a*b)/c */
 static uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
@@ -86,8 +90,8 @@ static int pit_get_count(PITState *pit, 
 
     ASSERT(spin_is_locked(&pit->lock));
 
-    d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel],
-                 PIT_FREQ, ticks_per_sec(v));
+    d = muldiv64(get_guest_time(v) - pit->count_load_time[channel],
+                 PIT_FREQ, SYSTEM_TIME_HZ);
 
     switch ( c->mode )
     {
@@ -117,8 +121,8 @@ static int pit_get_out(PITState *pit, in
 
     ASSERT(spin_is_locked(&pit->lock));
 
-    d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel], 
-                 PIT_FREQ, ticks_per_sec(v));
+    d = muldiv64(get_guest_time(v) - pit->count_load_time[channel], 
+                 PIT_FREQ, SYSTEM_TIME_HZ);
 
     switch ( s->mode )
     {
@@ -164,7 +168,7 @@ static void pit_set_gate(PITState *pit, 
     case 3:
         /* Restart counting on rising edge. */
         if ( s->gate < val )
-            pit->count_load_time[channel] = hvm_get_guest_time(v);
+            pit->count_load_time[channel] = get_guest_time(v);
         break;
     }
 
@@ -180,7 +184,7 @@ static void pit_time_fired(struct vcpu *
 static void pit_time_fired(struct vcpu *v, void *priv)
 {
     uint64_t *count_load_time = priv;
-    *count_load_time = hvm_get_guest_time(v);
+    *count_load_time = get_guest_time(v);
 }
 
 static void pit_load_count(PITState *pit, int channel, int val)
@@ -195,11 +199,11 @@ static void pit_load_count(PITState *pit
         val = 0x10000;
 
     if ( v == NULL )
-        rdtscll(pit->count_load_time[channel]);
-    else
-        pit->count_load_time[channel] = hvm_get_guest_time(v);
+        pit->count_load_time[channel] = 0;
+    else
+        pit->count_load_time[channel] = get_guest_time(v);
     s->count = val;
-    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
+    period = DIV_ROUND(val * SYSTEM_TIME_HZ, PIT_FREQ);
 
     if ( (v == NULL) || !is_hvm_vcpu(v) || (channel != 0) )
         return;
@@ -435,7 +439,7 @@ static int pit_load(struct domain *d, hv
      * time jitter here, but the wall-clock will have jumped massively, so 
      * we hope the guest can handle it.
      */
-    pit->pt0.last_plt_gtime = hvm_get_guest_time(d->vcpu[0]);
+    pit->pt0.last_plt_gtime = get_guest_time(d->vcpu[0]);
     for ( i = 0; i < 3; i++ )
         pit_load_count(pit, i, pit->hw.channels[i].count);
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/pmtimer.c
--- a/xen/arch/x86/hvm/pmtimer.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/pmtimer.c        Mon Jun 02 11:35:39 2008 +0900
@@ -257,7 +257,7 @@ void pmtimer_init(struct vcpu *v)
 
     spin_lock_init(&s->lock);
 
-    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / ticks_per_sec(v);
+    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / SYSTEM_TIME_HZ;
     s->vcpu = v;
 
     /* Intercept port I/O (need two handlers because PM1a_CNT is between
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Jun 02 11:35:39 2008 +0900
@@ -299,7 +299,7 @@ static void svm_save_cpu_state(struct vc
     data->msr_efer         = v->arch.hvm_vcpu.guest_efer;
     data->msr_flags        = -1ULL;
 
-    data->tsc = hvm_get_guest_time(v);
+    data->tsc = hvm_get_guest_tsc(v);
 }
 
 
@@ -315,7 +315,7 @@ static void svm_load_cpu_state(struct vc
     v->arch.hvm_vcpu.guest_efer = data->msr_efer;
     svm_update_guest_efer(v);
 
-    hvm_set_guest_time(v, data->tsc);
+    hvm_set_guest_tsc(v, data->tsc);
 }
 
 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/vlapic.c Mon Jun 02 11:35:39 2008 +0900
@@ -22,18 +22,19 @@
 #include <xen/types.h>
 #include <xen/mm.h>
 #include <xen/xmalloc.h>
+#include <xen/domain.h>
 #include <xen/domain_page.h>
-#include <asm/page.h>
 #include <xen/event.h>
 #include <xen/trace.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/numa.h>
+#include <asm/current.h>
+#include <asm/page.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <asm/current.h>
 #include <asm/hvm/vmx/vmx.h>
-#include <xen/numa.h>
 #include <public/hvm/ioreq.h>
 #include <public/hvm/params.h>
 
@@ -259,6 +260,7 @@ static void vlapic_init_action(unsigned 
 {
     struct vcpu *v = (struct vcpu *)_vcpu;
     struct domain *d = v->domain;
+    bool_t fpu_initialised;
 
     /* If the VCPU is not on its way down we have nothing to do. */
     if ( !test_bit(_VPF_down, &v->pause_flags) )
@@ -270,15 +272,12 @@ static void vlapic_init_action(unsigned 
         return;
     }
 
+    /* Reset necessary VCPU state. This does not include FPU state. */
     domain_lock(d);
-
-    /* Paranoia makes us re-assert VPF_down under the domain lock. */
-    set_bit(_VPF_down, &v->pause_flags);
-    v->is_initialised = 0;
-    clear_bit(_VPF_blocked, &v->pause_flags);
-
+    fpu_initialised = v->fpu_initialised;
+    vcpu_reset(v);
+    v->fpu_initialised = fpu_initialised;
     vlapic_reset(vcpu_vlapic(v));
-
     domain_unlock(d);
 
     vcpu_unpause(v);
@@ -474,7 +473,6 @@ static uint32_t vlapic_get_tmcct(struct 
     uint64_t counter_passed;
 
     counter_passed = ((hvm_get_guest_time(v) - vlapic->timer_last_update)
-                      * 1000000000ULL / ticks_per_sec(v)
                       / APIC_BUS_CYCLE_NS / vlapic->hw.timer_divisor);
     tmcct = tmict - counter_passed;
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Jun 02 11:35:39 2008 +0900
@@ -607,7 +607,7 @@ static void vmx_save_cpu_state(struct vc
     data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
 #endif
 
-    data->tsc = hvm_get_guest_time(v);
+    data->tsc = hvm_get_guest_tsc(v);
 }
 
 static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
@@ -625,7 +625,7 @@ static void vmx_load_cpu_state(struct vc
     v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
 #endif
 
-    hvm_set_guest_time(v, data->tsc);
+    hvm_set_guest_tsc(v, data->tsc);
 }
 
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/vpt.c    Mon Jun 02 11:35:39 2008 +0900
@@ -25,6 +25,39 @@
 #define mode_is(d, name) \
     ((d)->arch.hvm_domain.params[HVM_PARAM_TIMER_MODE] == HVMPTM_##name)
 
+void hvm_init_guest_time(struct domain *d)
+{
+    struct pl_time *pl = &d->arch.hvm_domain.pl_time;
+
+    spin_lock_init(&pl->pl_time_lock);
+    pl->stime_offset = -(u64)get_s_time();
+    pl->last_guest_time = 0;
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+    struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
+    u64 now;
+
+    /* Called from device models shared with PV guests. Be careful. */
+    ASSERT(is_hvm_vcpu(v));
+
+    spin_lock(&pl->pl_time_lock);
+    now = get_s_time() + pl->stime_offset;
+    if ( (int64_t)(now - pl->last_guest_time) >= 0 )
+        pl->last_guest_time = now;
+    else
+        now = pl->last_guest_time;
+    spin_unlock(&pl->pl_time_lock);
+
+    return now + v->arch.hvm_vcpu.stime_offset;
+}
+
+void hvm_set_guest_time(struct vcpu *v, u64 guest_time)
+{
+    v->arch.hvm_vcpu.stime_offset += guest_time - hvm_get_guest_time(v);
+}
+
 static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src)
 {
     struct vcpu *v = pt->vcpu;
@@ -348,7 +381,7 @@ void create_periodic_time(
     pt->vcpu = v;
     pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
     pt->irq = irq;
-    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+    pt->period_cycles = (u64)period;
     pt->one_shot = one_shot;
     pt->scheduled = NOW() + period;
     /*
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm.c Mon Jun 02 11:35:39 2008 +0900
@@ -1939,6 +1939,20 @@ int get_page_type(struct page_info *page
     }
     while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 
+    if ( unlikely((x & PGT_type_mask) != type) )
+    {
+        /* Special pages should not be accessible from devices. */
+        struct domain *d = page_get_owner(page);
+        if ( d && unlikely(need_iommu(d)) )
+        {
+            if ( (x & PGT_type_mask) == PGT_writable_page )
+                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
+            else if ( type == PGT_writable_page )
+                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
+                               page_to_mfn(page));
+        }
+    }
+
     if ( unlikely(!(nx & PGT_validated)) )
     {
         /* Try to validate page type; drop the new reference on failure. */
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Mon Jun 02 11:35:39 2008 +0900
@@ -266,12 +266,6 @@ out:
                 iommu_unmap_page(d, gfn);
         }
     }
-
-#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
-    /* If p2m table is shared with vtd page-table. */
-    if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
-        iommu_flush(d, gfn, (u64*)ept_entry);
-#endif
 
     return rv;
 }
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm/p2m.c     Mon Jun 02 11:35:39 2008 +0900
@@ -325,7 +325,7 @@ p2m_set_entry(struct domain *d, unsigned
     if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
         d->arch.p2m->max_mapped_pfn = gfn;
 
-    if ( iommu_enabled && is_hvm_domain(d) )
+    if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
     {
         if ( p2mt == p2m_ram_rw )
             for ( i = 0; i < (1UL << page_order); i++ )
@@ -868,7 +868,12 @@ p2m_remove_page(struct domain *d, unsign
     unsigned long i;
 
     if ( !paging_mode_translate(d) )
+    {
+        if ( need_iommu(d) )
+            for ( i = 0; i < (1 << page_order); i++ )
+                iommu_unmap_page(d, mfn + i);
         return;
+    }
 
     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
@@ -899,7 +904,19 @@ guest_physmap_add_entry(struct domain *d
     int rc = 0;
 
     if ( !paging_mode_translate(d) )
-        return -EINVAL;
+    {
+        if ( need_iommu(d) && t == p2m_ram_rw )
+        {
+            for ( i = 0; i < (1 << page_order); i++ )
+                if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
+                {
+                    while ( i-- > 0 )
+                        iommu_unmap_page(d, mfn + i);
+                    return rc;
+                }
+        }
+        return 0;
+    }
 
 #if CONFIG_PAGING_LEVELS == 3
     /*
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Jun 02 11:35:39 2008 +0900
@@ -2799,8 +2799,11 @@ int shadow_track_dirty_vram(struct domai
     if ( !d->dirty_vram )
     {
         /* Just recount from start. */
-        for ( i = begin_pfn; i < end_pfn; i++ )
-            flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, 
&t));
+        for ( i = begin_pfn; i < end_pfn; i++ ) {
+            mfn_t mfn = gfn_to_mfn(d, i, &t);
+            if (mfn_x(mfn) != INVALID_MFN)
+                flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
+        }
 
         gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
 
@@ -2840,61 +2843,70 @@ int shadow_track_dirty_vram(struct domai
         /* Iterate over VRAM to track dirty bits. */
         for ( i = 0; i < nr; i++ ) {
             mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
-            struct page_info *page = mfn_to_page(mfn);
-            u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+            struct page_info *page;
+            u32 count_info;
             int dirty = 0;
             paddr_t sl1ma = d->dirty_vram->sl1ma[i];
 
-            switch (count_info)
+            if (mfn_x(mfn) == INVALID_MFN)
             {
-            case 0:
-                /* No guest reference, nothing to track. */
-                break;
-            case 1:
-                /* One guest reference. */
-                if ( sl1ma == INVALID_PADDR )
+                dirty = 1;
+            }
+            else
+            {
+                page = mfn_to_page(mfn);
+                count_info = page->u.inuse.type_info & PGT_count_mask;
+                switch (count_info)
                 {
-                    /* We don't know which sl1e points to this, too bad. */
+                case 0:
+                    /* No guest reference, nothing to track. */
+                    break;
+                case 1:
+                    /* One guest reference. */
+                    if ( sl1ma == INVALID_PADDR )
+                    {
+                        /* We don't know which sl1e points to this, too bad. */
+                        dirty = 1;
+                        /* TODO: Heuristics for finding the single mapping of
+                         * this gmfn */
+                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
+                    }
+                    else
+                    {
+                        /* Hopefully the most common case: only one mapping,
+                         * whose dirty bit we can use. */
+                        l1_pgentry_t *sl1e;
+#ifdef __i386__
+                        void *sl1p = map_sl1p;
+                        unsigned long sl1mfn = paddr_to_pfn(sl1ma);
+
+                        if ( sl1mfn != map_mfn ) {
+                            if ( map_sl1p )
+                                sh_unmap_domain_page(map_sl1p);
+                            map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
+                            map_mfn = sl1mfn;
+                        }
+                        sl1e = sl1p + (sl1ma & ~PAGE_MASK);
+#else
+                        sl1e = maddr_to_virt(sl1ma);
+#endif
+
+                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                        {
+                            dirty = 1;
+                            /* Note: this is atomic, so we may clear a
+                             * _PAGE_ACCESSED set by another processor. */
+                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                            flush_tlb = 1;
+                        }
+                    }
+                    break;
+                default:
+                    /* More than one guest reference,
+                     * we don't afford tracking that. */
                     dirty = 1;
-                    /* TODO: Heuristics for finding the single mapping of
-                     * this gmfn */
-                    flush_tlb |= sh_remove_all_mappings(d->vcpu[0], 
gfn_to_mfn(d, begin_pfn + i, &t));
+                    break;
                 }
-                else
-                {
-                    /* Hopefully the most common case: only one mapping,
-                     * whose dirty bit we can use. */
-                    l1_pgentry_t *sl1e;
-#ifdef __i386__
-                    void *sl1p = map_sl1p;
-                    unsigned long sl1mfn = paddr_to_pfn(sl1ma);
-
-                    if ( sl1mfn != map_mfn ) {
-                        if ( map_sl1p )
-                            sh_unmap_domain_page(map_sl1p);
-                        map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
-                        map_mfn = sl1mfn;
-                    }
-                    sl1e = sl1p + (sl1ma & ~PAGE_MASK);
-#else
-                    sl1e = maddr_to_virt(sl1ma);
-#endif
-
-                    if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                    {
-                        dirty = 1;
-                        /* Note: this is atomic, so we may clear a
-                         * _PAGE_ACCESSED set by another processor. */
-                        l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                        flush_tlb = 1;
-                    }
-                }
-                break;
-            default:
-                /* More than one guest reference,
-                 * we don't afford tracking that. */
-                dirty = 1;
-                break;
             }
 
             if ( dirty )
@@ -2916,8 +2928,11 @@ int shadow_track_dirty_vram(struct domai
             {
                 /* was clean for more than two seconds, try to disable guest
                  * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ )
-                    flush_tlb |= sh_remove_write_access(d->vcpu[0], 
gfn_to_mfn(d, i, &t), 1, 0);
+                for ( i = begin_pfn; i < end_pfn; i++ ) {
+                    mfn_t mfn = gfn_to_mfn(d, i, &t);
+                    if (mfn_x(mfn) != INVALID_MFN)
+                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 
1, 0);
+                }
                 d->dirty_vram->last_dirty = -1;
             }
             rc = 0;
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/msi.c        Mon Jun 02 11:35:39 2008 +0900
@@ -25,6 +25,7 @@
 #include <mach_apic.h>
 #include <io_ports.h>
 #include <public/physdev.h>
+#include <xen/iommu.h>
 
 extern int msi_irq_enable;
 
@@ -156,6 +157,9 @@ void read_msi_msg(unsigned int irq, stru
     default:
         BUG();
     }
+
+    if ( vtd_enabled )
+        msi_msg_read_remap_rte(entry, msg);
 }
 
 static int set_vector_msi(struct msi_desc *entry)
@@ -201,6 +205,9 @@ void write_msi_msg(unsigned int irq, str
 void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
     struct msi_desc *entry = irq_desc[irq].msi_desc;
+
+    if ( vtd_enabled )
+        msi_msg_write_remap_rte(entry, msg);
 
     switch ( entry->msi_attrib.type )
     {
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/setup.c      Mon Jun 02 11:35:39 2008 +0900
@@ -1100,6 +1100,14 @@ void arch_get_xen_caps(xen_capabilities_
 #endif
 }
 
+int xen_in_range(paddr_t start, paddr_t end)
+{
+    start = max_t(paddr_t, start, xenheap_phys_start);
+    end = min_t(paddr_t, end, xenheap_phys_end);
+ 
+    return start < end; 
+}
+
 /*
  * Local variables:
  * mode: C
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/smpboot.c    Mon Jun 02 11:35:39 2008 +0900
@@ -1391,6 +1391,11 @@ void enable_nonboot_cpus(void)
                panic("Not enough cpus");
        }
        cpus_clear(frozen_cpus);
+
+       /*
+        * Cleanup possible dangling ends after sleep...
+        */
+       smpboot_restore_warm_reset_vector();
 }
 #else /* ... !CONFIG_HOTPLUG_CPU */
 int __cpu_disable(void)
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/tboot.c
--- a/xen/arch/x86/tboot.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/tboot.c      Mon Jun 02 11:35:39 2008 +0900
@@ -96,6 +96,18 @@ int tboot_in_measured_env(void)
     return (g_tboot_shared != NULL);
 }
 
+int tboot_in_range(paddr_t start, paddr_t end)
+{
+    if ( g_tboot_shared == NULL || g_tboot_shared->version < 0x02 )
+        return 0;
+
+    start = max_t(paddr_t, start, g_tboot_shared->tboot_base);
+    end = min_t(paddr_t, end, 
+                g_tboot_shared->tboot_base + g_tboot_shared->tboot_size);
+ 
+    return start < end; 
+}
+
 /*
  * Local variables:
  * mode: C
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c    Mon Jun 02 11:35:39 2008 +0900
@@ -2105,12 +2105,14 @@ x86_emulate(
         break;
     }
 
+    /* Inject #DB if single-step tracing was enabled at instruction start. */
+    if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
+         (ops->inject_hw_exception != NULL) )
+        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
+
     /* Commit shadow register state. */
     _regs.eflags &= ~EFLG_RF;
     *ctxt->regs = _regs;
-    if ( (_regs.eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
-         (ops->inject_hw_exception != NULL) )
-        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
 
  done:
     return rc;
diff -r d2a239224cb2 -r f1508348ffab xen/common/domain.c
--- a/xen/common/domain.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/domain.c       Mon Jun 02 11:35:39 2008 +0900
@@ -637,7 +637,7 @@ void vcpu_reset(struct vcpu *v)
 {
     struct domain *d = v->domain;
 
-    domain_pause(d);
+    vcpu_pause(v);
     domain_lock(d);
 
     arch_vcpu_reset(v);
@@ -653,7 +653,7 @@ void vcpu_reset(struct vcpu *v)
     clear_bit(_VPF_blocked, &v->pause_flags);
 
     domain_unlock(v->domain);
-    domain_unpause(d);
+    vcpu_unpause(v);
 }
 
 
diff -r d2a239224cb2 -r f1508348ffab xen/common/grant_table.c
--- a/xen/common/grant_table.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/grant_table.c  Mon Jun 02 11:35:39 2008 +0900
@@ -32,6 +32,8 @@
 #include <xen/trace.h>
 #include <xen/guest_access.h>
 #include <xen/domain_page.h>
+#include <xen/iommu.h>
+#include <xen/paging.h>
 #include <xsm/xsm.h>
 
 #ifndef max_nr_grant_frames
@@ -196,8 +198,9 @@ __gnttab_map_grant_ref(
     struct domain *ld, *rd;
     struct vcpu   *led;
     int            handle;
-    unsigned long  frame = 0;
+    unsigned long  frame = 0, nr_gets = 0;
     int            rc = GNTST_okay;
+    u32            old_pin;
     unsigned int   cache_flags;
     struct active_grant_entry *act;
     struct grant_mapping *mt;
@@ -318,6 +321,7 @@ __gnttab_map_grant_ref(
         }
     }
 
+    old_pin = act->pin;
     if ( op->flags & GNTMAP_device_map )
         act->pin += (op->flags & GNTMAP_readonly) ?
             GNTPIN_devr_inc : GNTPIN_devw_inc;
@@ -361,20 +365,17 @@ __gnttab_map_grant_ref(
             rc = GNTST_general_error;
             goto undo_out;
         }
-        
+
+        nr_gets++;
         if ( op->flags & GNTMAP_host_map )
         {
             rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0);
             if ( rc != GNTST_okay )
-            {
-                if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
-                    put_page_type(mfn_to_page(frame));
-                put_page(mfn_to_page(frame));
                 goto undo_out;
-            }
 
             if ( op->flags & GNTMAP_device_map )
             {
+                nr_gets++;
                 (void)get_page(mfn_to_page(frame), rd);
                 if ( !(op->flags & GNTMAP_readonly) )
                     get_page_type(mfn_to_page(frame), PGT_writable_page);
@@ -382,6 +383,17 @@ __gnttab_map_grant_ref(
         }
     }
 
+    if ( need_iommu(ld) &&
+         !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
+         (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
+    {
+        if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) )
+        {
+            rc = GNTST_general_error;
+            goto undo_out;
+        }
+    }
+
     TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
 
     mt = &maptrack_entry(ld->grant_table, handle);
@@ -397,6 +409,19 @@ __gnttab_map_grant_ref(
     return;
 
  undo_out:
+    if ( nr_gets > 1 )
+    {
+        if ( !(op->flags & GNTMAP_readonly) )
+            put_page_type(mfn_to_page(frame));
+        put_page(mfn_to_page(frame));
+    }
+    if ( nr_gets > 0 )
+    {
+        if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
+            put_page_type(mfn_to_page(frame));
+        put_page(mfn_to_page(frame));
+    }
+
     spin_lock(&rd->grant_table->lock);
 
     act = &active_entry(rd->grant_table, op->ref);
@@ -451,6 +476,7 @@ __gnttab_unmap_common(
     struct active_grant_entry *act;
     grant_entry_t   *sha;
     s16              rc = 0;
+    u32              old_pin;
 
     ld = current->domain;
 
@@ -497,6 +523,7 @@ __gnttab_unmap_common(
 
     act = &active_entry(rd->grant_table, op->map->ref);
     sha = &shared_entry(rd->grant_table, op->map->ref);
+    old_pin = act->pin;
 
     if ( op->frame == 0 )
     {
@@ -532,6 +559,17 @@ __gnttab_unmap_common(
             act->pin -= GNTPIN_hstr_inc;
         else
             act->pin -= GNTPIN_hstw_inc;
+    }
+
+    if ( need_iommu(ld) &&
+         (old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
+         !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
+    {
+        if ( iommu_unmap_page(ld, mfn_to_gmfn(ld, op->frame)) )
+        {
+            rc = GNTST_general_error;
+            goto unmap_out;
+        }
     }
 
     /* If just unmapped a writable mapping, mark as dirtied */
@@ -1073,6 +1111,11 @@ gnttab_transfer(
             gop.status = GNTST_bad_page;
             goto copyback;
         }
+
+#ifndef __ia64__ /* IA64 implicitly replaces the old page in steal_page(). */
+        guest_physmap_remove_page(d, gop.mfn, mfn, 0);
+#endif
+        flush_tlb_mask(d->domain_dirty_cpumask);
 
         /* Find the target domain. */
         if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) )
diff -r d2a239224cb2 -r f1508348ffab xen/common/libelf/libelf-private.h
--- a/xen/common/libelf/libelf-private.h        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/libelf/libelf-private.h        Mon Jun 02 11:35:39 2008 +0900
@@ -43,7 +43,7 @@
 #define bswap_16(x) swap16(x)
 #define bswap_32(x) swap32(x)
 #define bswap_64(x) swap64(x)
-#elif defined(__linux__) || defined(__Linux__)
+#elif defined(__linux__) || defined(__Linux__) || defined(__MINIOS__)
 #include <byteswap.h>
 #else
 #error Unsupported OS
diff -r d2a239224cb2 -r f1508348ffab xen/common/memory.c
--- a/xen/common/memory.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/memory.c       Mon Jun 02 11:35:39 2008 +0900
@@ -124,12 +124,9 @@ static void populate_physmap(struct memo
         }
 
         mfn = page_to_mfn(page);
-
-        if ( unlikely(paging_mode_translate(d)) )
-        {
-            guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
-        }
-        else
+        guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+
+        if ( !paging_mode_translate(d) )
         {
             for ( j = 0; j < (1 << a->extent_order); j++ )
                 set_gpfn_from_mfn(mfn + j, gpfn + j);
@@ -436,11 +433,9 @@ static long memory_exchange(XEN_GUEST_HA
                 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
 
             mfn = page_to_mfn(page);
-            if ( unlikely(paging_mode_translate(d)) )
-            {
-                guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
-            }
-            else
+            guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
+
+            if ( !paging_mode_translate(d) )
             {
                 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
                     set_gpfn_from_mfn(mfn + k, gpfn + k);
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Mon Jun 02 11:35:02 
2008 +0900
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Mon Jun 02 11:35:39 
2008 +0900
@@ -635,6 +635,16 @@ static void amd_iommu_return_device(
     reassign_device(s, t, bus, devfn);
 }
 
+static int amd_iommu_group_id(u8 bus, u8 devfn)
+{
+    int rt;
+    int bdf = (bus << 8) | devfn;
+    rt = ( bdf < ivrs_bdf_entries ) ?
+        ivrs_mappings[bdf].dte_requestor_id :
+        bdf;
+    return rt;
+}
+
 struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .assign_device  = amd_iommu_assign_device,
@@ -642,4 +652,5 @@ struct iommu_ops amd_iommu_ops = {
     .map_page = amd_iommu_map_page,
     .unmap_page = amd_iommu_unmap_page,
     .reassign_device = amd_iommu_return_device,
+    .get_device_group_id = amd_iommu_group_id,
 };
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/iommu.c   Mon Jun 02 11:35:39 2008 +0900
@@ -15,14 +15,20 @@
 
 #include <xen/sched.h>
 #include <xen/iommu.h>
+#include <xen/paging.h>
+#include <xen/guest_access.h>
 
 extern struct iommu_ops intel_iommu_ops;
 extern struct iommu_ops amd_iommu_ops;
+static int iommu_populate_page_table(struct domain *d);
 int intel_vtd_setup(void);
 int amd_iov_detect(void);
 
 int iommu_enabled = 1;
 boolean_param("iommu", iommu_enabled);
+
+int iommu_pv_enabled = 0;
+boolean_param("iommu_pv", iommu_pv_enabled);
 
 int iommu_domain_init(struct domain *domain)
 {
@@ -54,11 +60,46 @@ int assign_device(struct domain *d, u8 b
 int assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return 0;
-
-    return hd->platform_ops->assign_device(d, bus, devfn);
+    int rc;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return 0;
+
+    if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) )
+        return rc;
+
+    if ( has_iommu_pdevs(d) && !need_iommu(d) )
+    {
+        d->need_iommu = 1;
+        return iommu_populate_page_table(d);
+    }
+    return 0;
+}
+
+static int iommu_populate_page_table(struct domain *d)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct page_info *page;
+    int rc;
+
+    spin_lock(&d->page_alloc_lock);
+
+    list_for_each_entry ( page, &d->page_list, list )
+    {
+        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
+        {
+            rc = hd->platform_ops->map_page(
+                d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page));
+            if (rc)
+            {
+                spin_unlock(&d->page_alloc_lock);
+                hd->platform_ops->teardown(d);
+                return rc;
+            }
+        }
+    }
+    spin_unlock(&d->page_alloc_lock);
+    return 0;
 }
 
 void iommu_domain_destroy(struct domain *d)
@@ -137,7 +178,13 @@ void deassign_device(struct domain *d, u
     if ( !iommu_enabled || !hd->platform_ops )
         return;
 
-    return hd->platform_ops->reassign_device(d, dom0, bus, devfn);
+    hd->platform_ops->reassign_device(d, dom0, bus, devfn);
+
+    if ( !has_iommu_pdevs(d) && need_iommu(d) )
+    {
+        d->need_iommu = 0;
+        hd->platform_ops->teardown(d);
+    }
 }
 
 static int iommu_setup(void)
@@ -160,7 +207,56 @@ static int iommu_setup(void)
     iommu_enabled = (rc == 0);
 
  out:
+    if ( !iommu_enabled || !vtd_enabled )
+        iommu_pv_enabled = 0;
     printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
+    if (iommu_enabled)
+        printk("I/O virtualisation for PV guests %sabled\n",
+               iommu_pv_enabled ? "en" : "dis");
     return rc;
 }
 __initcall(iommu_setup);
+
+int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
+    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev;
+    int group_id, sdev_id;
+    u32 bdf;
+    int i = 0;
+    struct iommu_ops *ops = hd->platform_ops;
+
+    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
+        return 0;
+
+    group_id = ops->get_device_group_id(bus, devfn);
+
+    list_for_each_entry(pdev,
+        &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list)
+    {
+        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
+            continue;
+
+        sdev_id = ops->get_device_group_id(pdev->bus, pdev->devfn);
+        if ( (sdev_id == group_id) && (i < max_sdevs) )
+        {
+            bdf = 0;
+            bdf |= (pdev->bus & 0xff) << 16;
+            bdf |= (pdev->devfn & 0xff) << 8;
+            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+                return -1;
+            i++;
+        }
+    }
+
+    return i;
+}
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.c        Mon Jun 02 11:35:39 2008 +0900
@@ -147,39 +147,6 @@ struct acpi_drhd_unit * acpi_find_matche
     return NULL;
 }
 
-struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev)
-{
-    struct acpi_rmrr_unit *rmrr;
-
-    list_for_each_entry ( rmrr, &acpi_rmrr_units, list )
-        if ( acpi_pci_device_match(rmrr->devices,
-                                   rmrr->devices_cnt, dev) )
-            return rmrr;
-
-    return NULL;
-}
-
-struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev)
-{
-    struct acpi_atsr_unit *atsru;
-    struct acpi_atsr_unit *all_ports_atsru;
-
-    all_ports_atsru = NULL;
-    list_for_each_entry ( atsru, &acpi_atsr_units, list )
-    {
-        if ( atsru->all_ports )
-            all_ports_atsru = atsru;
-        if ( acpi_pci_device_match(atsru->devices,
-                                   atsru->devices_cnt, dev) )
-            return atsru;
-    }
-
-    if ( all_ports_atsru )
-        return all_ports_atsru;;
-
-    return NULL;
-}
-
 static int scope_device_count(void *start, void *end)
 {
     struct acpi_dev_scope *scope;
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/dmar.h
--- a/xen/drivers/passthrough/vtd/dmar.h        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.h        Mon Jun 02 11:35:39 2008 +0900
@@ -86,7 +86,6 @@ struct acpi_atsr_unit {
     }
 
 struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev);
-struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev);
 
 #define DMAR_TYPE 1
 #define RMRR_TYPE 2
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/extern.h
--- a/xen/drivers/passthrough/vtd/extern.h      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/extern.h      Mon Jun 02 11:35:39 2008 +0900
@@ -27,8 +27,7 @@ extern struct ir_ctrl *ir_ctrl;
 extern struct ir_ctrl *ir_ctrl;
 
 void print_iommu_regs(struct acpi_drhd_unit *drhd);
-void print_vtd_entries(struct domain *d, struct iommu *iommu,
-                       int bus, int devfn, unsigned long gmfn);
+void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
 void pdev_flr(u8 bus, u8 devfn);
 
 int qinval_setup(struct iommu *iommu);
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/intremap.c    Mon Jun 02 11:35:39 2008 +0900
@@ -48,14 +48,14 @@ static void remap_entry_to_ioapic_rte(
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct IO_APIC_route_remap_entry *remap_rte;
-    unsigned int index;
+    int index = 0;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( ir_ctrl == NULL )
+    if ( ir_ctrl == NULL || ir_ctrl->iremap_index < 0 )
     {
         dprintk(XENLOG_ERR VTDPREFIX,
-                "remap_entry_to_ioapic_rte: ir_ctl == NULL");
+                "remap_entry_to_ioapic_rte: ir_ctl is not ready\n");
         return;
     }
 
@@ -63,11 +63,8 @@ static void remap_entry_to_ioapic_rte(
     index = (remap_rte->index_15 << 15) + remap_rte->index_0_14;
 
     if ( index > ir_ctrl->iremap_index )
-    {
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "Index is larger than remap table entry size. Error!\n");
-        return;
-    }
+        panic("%s: index (%d) is larger than remap table entry size (%d)!\n",
+              __func__, index, ir_ctrl->iremap_index);
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
@@ -81,79 +78,90 @@ static void remap_entry_to_ioapic_rte(
     old_rte->trigger = iremap_entry->lo.tm;
     old_rte->__reserved_2 = 0;
     old_rte->dest.logical.__reserved_1 = 0;
-    old_rte->dest.logical.logical_dest = iremap_entry->lo.dst;
+    old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8;
 
     unmap_vtd_domain_page(iremap_entries);
     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
 }
 
 static void ioapic_rte_to_remap_entry(struct iommu *iommu,
-    int apic_id, struct IO_APIC_route_entry *old_rte)
+    int apic_id, struct IO_APIC_route_entry *old_rte,
+    unsigned int rte_upper, unsigned int value)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct iremap_entry new_ire;
     struct IO_APIC_route_remap_entry *remap_rte;
-    unsigned int index;
+    struct IO_APIC_route_entry new_rte;
+    int index;
     unsigned long flags;
-    int ret = 0;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
-    index = ir_ctrl->iremap_index;
+
+    if ( remap_rte->format == 0 )
+    {
+        ir_ctrl->iremap_index++;
+        index = ir_ctrl->iremap_index;
+    }
+    else
+        index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
+
     if ( index > IREMAP_ENTRY_NR - 1 )
-    {
-        dprintk(XENLOG_ERR VTDPREFIX,
-               "The interrupt number is more than 256!\n");
-        goto out;
-    }
+        panic("ioapic_rte_to_remap_entry: intremap index is more than 256!\n");
 
     iremap_entries =
         (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
     iremap_entry = &iremap_entries[index];
 
-    if ( *(u64 *)iremap_entry != 0 )
-        dprintk(XENLOG_WARNING VTDPREFIX,
-               "Interrupt remapping entry is in use already!\n");
-    iremap_entry->lo.fpd = 0;
-    iremap_entry->lo.dm = old_rte->dest_mode;
-    iremap_entry->lo.rh = 0;
-    iremap_entry->lo.tm = old_rte->trigger;
-    iremap_entry->lo.dlm = old_rte->delivery_mode;
-    iremap_entry->lo.avail = 0;
-    iremap_entry->lo.res_1 = 0;
-    iremap_entry->lo.vector = old_rte->vector;
-    iremap_entry->lo.res_2 = 0;
-    iremap_entry->lo.dst = (old_rte->dest.logical.logical_dest << 8);
-    iremap_entry->hi.sid = apicid_to_bdf(apic_id);
-    iremap_entry->hi.sq = 0;    /* comparing all 16-bit of SID */
-    iremap_entry->hi.svt = 1;   /* turn on requestor ID verification SID/SQ */
-    iremap_entry->hi.res_1 = 0;
-    iremap_entry->lo.p = 1;    /* finally, set present bit */
-    ir_ctrl->iremap_index++;
+    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
+
+    if ( rte_upper )
+        new_ire.lo.dst = (value >> 24) << 8;
+    else
+    {
+        *(((u32 *)&new_rte) + 0) = value;
+        new_ire.lo.fpd = 0;
+        new_ire.lo.dm = new_rte.dest_mode;
+        new_ire.lo.rh = 0;
+        new_ire.lo.tm = new_rte.trigger;
+        new_ire.lo.dlm = new_rte.delivery_mode;
+        new_ire.lo.avail = 0;
+        new_ire.lo.res_1 = 0;
+        new_ire.lo.vector = new_rte.vector;
+        new_ire.lo.res_2 = 0;
+        new_ire.hi.sid = apicid_to_bdf(apic_id);
+
+        new_ire.hi.sq = 0;    /* comparing all 16-bit of SID */
+        new_ire.hi.svt = 1;   /* requestor ID verification SID/SQ */
+        new_ire.hi.res_1 = 0;
+        new_ire.lo.p = 1;     /* finally, set present bit */
+
+        /* now construct new ioapic rte entry */
+        remap_rte->vector = new_rte.vector;
+        remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
+        remap_rte->index_15 = index & 0x8000;
+        remap_rte->index_0_14 = index & 0x7fff;
+
+        remap_rte->delivery_status = new_rte.delivery_status;
+        remap_rte->polarity = new_rte.polarity;
+        remap_rte->irr = new_rte.irr;
+        remap_rte->trigger = new_rte.trigger;
+        remap_rte->mask = new_rte.mask;
+        remap_rte->reserved = 0;
+        remap_rte->format = 1;    /* indicate remap format */
+    }
+
+    memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
+    iommu_flush_iec_index(iommu, 0, index);
+    invalidate_sync(iommu);
 
     unmap_vtd_domain_page(iremap_entries);
-    iommu_flush_iec_index(iommu, 0, index);
-    ret = invalidate_sync(iommu);
-
-    /* now construct new ioapic rte entry */
-    remap_rte->vector = old_rte->vector;
-    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
-    remap_rte->index_15 = index & 0x8000;
-    remap_rte->index_0_14 = index & 0x7fff;
-    remap_rte->delivery_status = old_rte->delivery_status;
-    remap_rte->polarity = old_rte->polarity;
-    remap_rte->irr = old_rte->irr;
-    remap_rte->trigger = old_rte->trigger;
-    remap_rte->mask = 1;
-    remap_rte->reserved = 0;
-    remap_rte->format = 1;    /* indicate remap format */
-out:
     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
     return;
 }
 
-unsigned int
-io_apic_read_remap_rte(
+unsigned int io_apic_read_remap_rte(
     unsigned int apic, unsigned int reg)
 {
     struct IO_APIC_route_entry old_rte = { 0 };
@@ -198,15 +206,15 @@ io_apic_read_remap_rte(
     }
 }
 
-void
-io_apic_write_remap_rte(
+void io_apic_write_remap_rte(
     unsigned int apic, unsigned int reg, unsigned int value)
 {
     struct IO_APIC_route_entry old_rte = { 0 };
     struct IO_APIC_route_remap_entry *remap_rte;
-    int rte_upper = (reg & 1) ? 1 : 0;
+    unsigned int rte_upper = (reg & 1) ? 1 : 0;
     struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    int saved_mask;
 
     if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
     {
@@ -225,21 +233,192 @@ io_apic_write_remap_rte(
     *(((u32 *)&old_rte) + 1) = *(IO_APIC_BASE(apic)+4);
 
     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
-    if ( remap_rte->mask || (remap_rte->format == 0) )
-    {
-        *IO_APIC_BASE(apic) = rte_upper ? ++reg : reg;
-        *(IO_APIC_BASE(apic)+4) = value;
-        return;
-    }
-
-    *(((u32 *)&old_rte) + rte_upper) = value;
-    ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, &old_rte);
+
+    /* mask the interrupt while we change the intremap table */
+    saved_mask = remap_rte->mask;
+    remap_rte->mask = 1;
+    *IO_APIC_BASE(apic) = reg;
+    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
+    remap_rte->mask = saved_mask;
+
+    ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid,
+                              &old_rte, rte_upper, value);
 
     /* write new entry to ioapic */
     *IO_APIC_BASE(apic) = reg;
-    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
+    *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0);
     *IO_APIC_BASE(apic) = reg + 1;
-    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+1);
+    *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1);
+}
+
+static void remap_entry_to_msi_msg(
+    struct iommu *iommu, struct msi_msg *msg)
+{
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct msi_msg_remap_entry *remap_rte;
+    int index;
+    unsigned long flags;
+    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+
+    if ( ir_ctrl == NULL )
+    {
+        dprintk(XENLOG_ERR VTDPREFIX,
+                "remap_entry_to_msi_msg: ir_ctl == NULL");
+        return;
+    }
+
+    remap_rte = (struct msi_msg_remap_entry *) msg;
+    index = (remap_rte->address_lo.index_15 << 15) |
+            remap_rte->address_lo.index_0_14;
+
+    if ( index > ir_ctrl->iremap_index )
+        panic("%s: index (%d) is larger than remap table entry size (%d)\n",
+              __func__, index, ir_ctrl->iremap_index);
+
+    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+    iremap_entry = &iremap_entries[index];
+
+    msg->address_hi = MSI_ADDR_BASE_HI;
+    msg->address_lo =
+        MSI_ADDR_BASE_LO |
+        ((iremap_entry->lo.dm == 0) ?
+            MSI_ADDR_DESTMODE_PHYS:
+            MSI_ADDR_DESTMODE_LOGIC) |
+        ((iremap_entry->lo.dlm != dest_LowestPrio) ?
+            MSI_ADDR_REDIRECTION_CPU:
+            MSI_ADDR_REDIRECTION_LOWPRI) |
+        iremap_entry->lo.dst >> 8;
+
+    msg->data =
+        MSI_DATA_TRIGGER_EDGE |
+        MSI_DATA_LEVEL_ASSERT |
+        ((iremap_entry->lo.dlm != dest_LowestPrio) ?
+            MSI_DATA_DELIVERY_FIXED:
+            MSI_DATA_DELIVERY_LOWPRI) |
+        iremap_entry->lo.vector;
+
+    unmap_vtd_domain_page(iremap_entries);
+    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+}
+
+static void msi_msg_to_remap_entry(
+    struct iommu *iommu, struct pci_dev *pdev, struct msi_msg *msg)
+{
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct iremap_entry new_ire;
+    struct msi_msg_remap_entry *remap_rte;
+    unsigned int index;
+    unsigned long flags;
+    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    int i = 0;
+
+    remap_rte = (struct msi_msg_remap_entry *) msg;
+    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+
+    /* If the entry for a PCI device has been there, use the old entry,
+     * Or, assign a new entry for it.
+     */
+    for ( i = 0; i <= ir_ctrl->iremap_index; i++ )
+    {
+        iremap_entry = &iremap_entries[i];
+        if ( iremap_entry->hi.sid ==
+             ((pdev->bus << 8) | pdev->devfn) )
+           break;
+    }
+
+    if ( i > ir_ctrl->iremap_index )
+    {
+       ir_ctrl->iremap_index++;
+        index = ir_ctrl->iremap_index;
+    }
+    else
+        index = i;
+
+    if ( index > IREMAP_ENTRY_NR - 1 )
+        panic("msi_msg_to_remap_entry: intremap index is more than 256!\n");
+
+    iremap_entry = &iremap_entries[index];
+    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
+
+    /* Set interrupt remapping table entry */
+    new_ire.lo.fpd = 0;
+    new_ire.lo.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
+    new_ire.lo.rh = 0;
+    new_ire.lo.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    new_ire.lo.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
+    new_ire.lo.avail = 0;
+    new_ire.lo.res_1 = 0;
+    new_ire.lo.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
+                        MSI_DATA_VECTOR_MASK;
+    new_ire.lo.res_2 = 0;
+    new_ire.lo.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
+                      & 0xff) << 8;
+
+    new_ire.hi.sid = (pdev->bus << 8) | pdev->devfn;
+    new_ire.hi.sq = 0;
+    new_ire.hi.svt = 1;
+    new_ire.hi.res_1 = 0;
+    new_ire.lo.p = 1;    /* finally, set present bit */
+
+    /* now construct new MSI/MSI-X rte entry */
+    remap_rte->address_lo.dontcare = 0;
+    remap_rte->address_lo.index_15 = index & 0x8000;
+    remap_rte->address_lo.index_0_14 = index & 0x7fff;
+    remap_rte->address_lo.SHV = 1;
+    remap_rte->address_lo.format = 1;
+
+    remap_rte->address_hi = 0;
+    remap_rte->data = 0;
+
+    memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
+    iommu_flush_iec_index(iommu, 0, index);
+    invalidate_sync(iommu);
+
+    unmap_vtd_domain_page(iremap_entries);
+    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+    return;
+}
+
+void msi_msg_read_remap_rte(
+    struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+    struct pci_dev *pdev = msi_desc->dev;
+    struct acpi_drhd_unit *drhd = NULL;
+    struct iommu *iommu = NULL;
+    struct ir_ctrl *ir_ctrl;
+
+    drhd = acpi_find_matched_drhd_unit(pdev);
+    iommu = drhd->iommu;
+
+    ir_ctrl = iommu_ir_ctrl(iommu);
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
+        return;
+
+    remap_entry_to_msi_msg(iommu, msg);
+}
+
+void msi_msg_write_remap_rte(
+    struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+    struct pci_dev *pdev = msi_desc->dev;
+    struct acpi_drhd_unit *drhd = NULL;
+    struct iommu *iommu = NULL;
+    struct ir_ctrl *ir_ctrl;
+
+    drhd = acpi_find_matched_drhd_unit(msi_desc->dev);
+    iommu = drhd->iommu;
+
+    ir_ctrl = iommu_ir_ctrl(iommu);
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
+        return;
+
+    msi_msg_to_remap_entry(iommu, pdev, msg);
 }
 
 int intremap_setup(struct iommu *iommu)
@@ -260,6 +439,7 @@ int intremap_setup(struct iommu *iommu)
                     "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
             return -ENODEV;
         }
+        ir_ctrl->iremap_index = -1;
     }
 
 #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Mon Jun 02 11:35:39 2008 +0900
@@ -112,28 +112,27 @@ struct iommu_flush *iommu_get_flush(stru
     return iommu ? &iommu->intel->flush : NULL;
 }
 
-unsigned int clflush_size;
-void clflush_cache_range(void *adr, int size)
+static unsigned int clflush_size;
+static int iommus_incoherent;
+static void __iommu_flush_cache(void *addr, int size)
 {
     int i;
+
+    if ( !iommus_incoherent )
+        return;
+
     for ( i = 0; i < size; i += clflush_size )
-        clflush(adr + i);
-}
-
-static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
-{
-    if ( !ecap_coherent(iommu->ecap) )
-        clflush_cache_range(addr, size);
-}
-
-void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
-{
-    __iommu_flush_cache(iommu, addr, 8);
-}
-
-void iommu_flush_cache_page(struct iommu *iommu, void *addr)
-{
-    __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
+        clflush((char *)addr + i);
+}
+
+void iommu_flush_cache_entry(void *addr)
+{
+    __iommu_flush_cache(addr, 8);
+}
+
+void iommu_flush_cache_page(void *addr)
+{
+    __iommu_flush_cache(addr, PAGE_SIZE_4K);
 }
 
 int nr_iommus;
@@ -157,7 +156,7 @@ static u64 bus_to_context_maddr(struct i
         }
         set_root_value(*root, maddr);
         set_root_present(*root);
-        iommu_flush_cache_entry(iommu, root);
+        iommu_flush_cache_entry(root);
     }
     maddr = (u64) get_context_addr(*root);
     unmap_vtd_domain_page(root_entries);
@@ -191,30 +190,22 @@ static int device_context_mapped(struct 
     return ret;
 }
 
-static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
+static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
     struct dma_pte *parent, *pte = NULL;
     int level = agaw_to_level(hd->agaw);
     int offset;
     unsigned long flags;
-    u64 pte_maddr = 0;
+    u64 pte_maddr = 0, maddr;
     u64 *vaddr = NULL;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
 
     addr &= (((u64)1) << addr_width) - 1;
     spin_lock_irqsave(&hd->mapping_lock, flags);
     if ( hd->pgd_maddr == 0 )
-    {
-        hd->pgd_maddr = alloc_pgtable_maddr();
-        if ( hd->pgd_maddr == 0 )
-            return 0;
-    }
+        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
+            goto out;
 
     parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
     while ( level > 1 )
@@ -224,7 +215,9 @@ static u64 addr_to_dma_page_maddr(struct
 
         if ( dma_pte_addr(*pte) == 0 )
         {
-            u64 maddr = alloc_pgtable_maddr();
+            if ( !alloc )
+                break;
+            maddr = alloc_pgtable_maddr();
             dma_set_pte_addr(*pte, maddr);
             vaddr = map_vtd_domain_page(maddr);
             if ( !vaddr )
@@ -236,7 +229,7 @@ static u64 addr_to_dma_page_maddr(struct
              */
             dma_set_pte_readable(*pte);
             dma_set_pte_writable(*pte);
-            iommu_flush_cache_entry(iommu, pte);
+            iommu_flush_cache_entry(pte);
         }
         else
         {
@@ -259,43 +252,9 @@ static u64 addr_to_dma_page_maddr(struct
     }
 
     unmap_vtd_domain_page(parent);
+ out:
     spin_unlock_irqrestore(&hd->mapping_lock, flags);
     return pte_maddr;
-}
-
-/* return address's page at specific level */
-static u64 dma_addr_level_page_maddr(
-    struct domain *domain, u64 addr, int level)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct dma_pte *parent, *pte = NULL;
-    int total = agaw_to_level(hd->agaw);
-    int offset;
-    u64 pg_maddr = hd->pgd_maddr;
-
-    if ( pg_maddr == 0 )
-        return 0;
-
-    parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
-    while ( level <= total )
-    {
-        offset = address_level_offset(addr, total);
-        pte = &parent[offset];
-        if ( dma_pte_addr(*pte) == 0 )
-            break;
-
-        pg_maddr = pte->val & PAGE_MASK_4K;
-        unmap_vtd_domain_page(parent);
-
-        if ( level == total )
-            return pg_maddr;
-
-        parent = map_vtd_domain_page(pte->val);
-        total--;
-    }
-
-    unmap_vtd_domain_page(parent);
-    return 0;
 }
 
 static void iommu_flush_write_buffer(struct iommu *iommu)
@@ -485,9 +444,12 @@ static int flush_iotlb_reg(void *_iommu,
     /* check IOTLB invalidation granularity */
     if ( DMA_TLB_IAIG(val) == 0 )
         printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
+
+#ifdef VTD_DEBUG
     if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
         printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
                (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
+#endif
     /* flush context entry will implictly flush write buffer */
     return 0;
 }
@@ -572,34 +534,36 @@ void iommu_flush_all(void)
 /* clear one page's page table */
 static void dma_pte_clear_one(struct domain *domain, u64 addr)
 {
+    struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-
     /* get last level pte */
-    pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
+    pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
     if ( pg_maddr == 0 )
         return;
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + address_level_offset(addr, 1);
-    if ( pte )
-    {
-        dma_clear_pte(*pte);
-        iommu_flush_cache_entry(drhd->iommu, pte);
-
-        for_each_drhd_unit ( drhd )
-        {
-            iommu = drhd->iommu;
-            if ( cap_caching_mode(iommu->cap) )
-                iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
-                                      addr, 1, 0);
-            else if (cap_rwbf(iommu->cap))
-                iommu_flush_write_buffer(iommu);
-        }
-    }
+
+    if ( !dma_pte_present(*pte) )
+    {
+        unmap_vtd_domain_page(page);
+        return;
+    }
+
+    dma_clear_pte(*pte); 
+    iommu_flush_cache_entry(pte);
+
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        if ( test_bit(iommu->index, &hd->iommu_bitmap) )
+            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+                                  addr, 1, 0);
+    }
+
     unmap_vtd_domain_page(page);
 }
 
@@ -626,7 +590,6 @@ static void iommu_free_next_pagetable(u6
 static void iommu_free_next_pagetable(u64 pt_maddr, unsigned long index,
                                       int level)
 {
-    struct acpi_drhd_unit *drhd;
     unsigned long next_index;
     struct dma_pte *pt_vaddr, *pde;
     int next_level;
@@ -636,50 +599,38 @@ static void iommu_free_next_pagetable(u6
 
     pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
     pde = &pt_vaddr[index];
-    if ( dma_pte_addr(*pde) != 0 )
-    {
-        next_level = level - 1;
-        if ( next_level > 1 )
-        {
-            next_index = 0;
-            do
-            {
-                iommu_free_next_pagetable(pde->val,
-                                          next_index, next_level);
-                next_index++;
-            } while ( next_index < PTE_NUM );
-        }
-
-        dma_clear_pte(*pde);
-        drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-        iommu_flush_cache_entry(drhd->iommu, pde);
-        free_pgtable_maddr(pde->val);
-        unmap_vtd_domain_page(pt_vaddr);
-    }
-    else
-        unmap_vtd_domain_page(pt_vaddr);
+    if ( dma_pte_addr(*pde) == 0 )
+        goto out;
+
+    next_level = level - 1;
+    if ( next_level > 1 )
+    {
+        for ( next_index = 0; next_index < PTE_NUM; next_index++ )
+            iommu_free_next_pagetable(pde->val, next_index, next_level);
+    }
+
+    dma_clear_pte(*pde);
+    iommu_flush_cache_entry(pde);
+    free_pgtable_maddr(pde->val);
+
+ out:
+    unmap_vtd_domain_page(pt_vaddr);
 }
 
 /* free all VT-d page tables when shut down or destroy domain. */
 static void iommu_free_pagetable(struct domain *domain)
 {
-    unsigned long index;
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    int total_level = agaw_to_level(hd->agaw);
-
-    if ( hd->pgd_maddr != 0 )
-    {
-        index = 0;
-        do
-        {
-            iommu_free_next_pagetable(hd->pgd_maddr,
-                                      index, total_level + 1);
-            index++;
-        } while ( index < PTE_NUM );
-
-        free_pgtable_maddr(hd->pgd_maddr);
-        hd->pgd_maddr = 0;
-    }
+    int i, total_level = agaw_to_level(hd->agaw);
+
+    if ( hd->pgd_maddr == 0 )
+        return;
+
+    for ( i = 0; i < PTE_NUM; i++ )
+        iommu_free_next_pagetable(hd->pgd_maddr, i, total_level + 1);
+
+    free_pgtable_maddr(hd->pgd_maddr);
+    hd->pgd_maddr = 0;
 }
 
 static int iommu_set_root_entry(struct iommu *iommu)
@@ -777,16 +728,17 @@ int iommu_disable_translation(struct iom
 
 static struct iommu *vector_to_iommu[NR_VECTORS];
 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
-                                   u8 fault_reason, u16 source_id, u32 addr)
+                                   u8 fault_reason, u16 source_id, u64 addr)
 {
     dprintk(XENLOG_WARNING VTDPREFIX,
-            "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
+            "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x "
+            "iommu->reg = %p\n",
             (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
             PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
             fault_reason, iommu->reg);
 
     if ( fault_reason < 0x20 )
-        print_vtd_entries(current->domain, iommu, (source_id >> 8),
+        print_vtd_entries(iommu, (source_id >> 8),
                           (source_id & 0xff), (addr >> PAGE_SHIFT));
 
     return 0;
@@ -844,7 +796,8 @@ static void iommu_page_fault(int vector,
     {
         u8 fault_reason;
         u16 source_id;
-        u32 guest_addr, data;
+        u32 data;
+        u64 guest_addr;
         int type;
 
         /* highest 32 bits */
@@ -998,6 +951,8 @@ static int iommu_alloc(struct acpi_drhd_
 static int iommu_alloc(struct acpi_drhd_unit *drhd)
 {
     struct iommu *iommu;
+    unsigned long sagaw;
+    int agaw;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
@@ -1020,10 +975,27 @@ static int iommu_alloc(struct acpi_drhd_
 
     set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
     iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
-    nr_iommus++;
+    iommu->index = nr_iommus++;
 
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
+
+    /* Calculate number of pagetable levels: between 2 and 4. */
+    sagaw = cap_sagaw(iommu->cap);
+    for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
+        if ( test_bit(agaw, &sagaw) )
+            break;
+    if ( agaw < 0 )
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                 "IOMMU: unsupported sagaw %lx\n", sagaw);
+        xfree(iommu);
+        return -ENODEV;
+    }
+    iommu->nr_pt_levels = agaw_to_level(agaw);
+
+    if ( !ecap_coherent(iommu->ecap) )
+        iommus_incoherent = 1;
 
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
@@ -1066,9 +1038,7 @@ static int intel_iommu_domain_init(struc
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct iommu *iommu = NULL;
-    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
-    int i, adjust_width, agaw;
-    unsigned long sagaw;
+    u64 i;
     struct acpi_drhd_unit *drhd;
 
     INIT_LIST_HEAD(&hd->pdev_list);
@@ -1076,28 +1046,25 @@ static int intel_iommu_domain_init(struc
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
 
-    /* Calculate AGAW. */
-    if ( guest_width > cap_mgaw(iommu->cap) )
-        guest_width = cap_mgaw(iommu->cap);
-    adjust_width = guestwidth_to_adjustwidth(guest_width);
-    agaw = width_to_agaw(adjust_width);
-    /* FIXME: hardware doesn't support it, choose a bigger one? */
-    sagaw = cap_sagaw(iommu->cap);
-    if ( !test_bit(agaw, &sagaw) )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "IOMMU: hardware doesn't support the agaw\n");
-        agaw = find_next_bit(&sagaw, 5, agaw);
-        if ( agaw >= 5 )
-            return -ENODEV;
-    }
-    hd->agaw = agaw;
+    hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
 
     if ( d->domain_id == 0 )
     {
-        /* Set up 1:1 page table for dom0. */
+        extern int xen_in_range(paddr_t start, paddr_t end);
+        extern int tboot_in_range(paddr_t start, paddr_t end);
+
+        /* 
+         * Set up 1:1 page table for dom0 except the critical segments
+         * like Xen and tboot.
+         */
         for ( i = 0; i < max_page; i++ )
+        {
+            if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ||
+                 tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) )
+                continue;
+
             iommu_map_page(d, i, i);
+        }
 
         setup_dom0_devices(d);
         setup_dom0_rmrr(d);
@@ -1123,7 +1090,8 @@ static int domain_context_mapping_one(
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct context_entry *context, *context_entries;
     unsigned long flags;
-    u64 maddr;
+    u64 maddr, pgd_maddr;
+    int agaw;
 
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
@@ -1136,38 +1104,64 @@ static int domain_context_mapping_one(
     }
 
     spin_lock_irqsave(&iommu->lock, flags);
+
+#ifdef CONTEXT_PASSTHRU
+    if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+    else
+    {
+#endif
+        /* Ensure we have pagetables allocated down to leaf PTE. */
+        if ( hd->pgd_maddr == 0 )
+        {
+            addr_to_dma_page_maddr(domain, 0, 1);
+            if ( hd->pgd_maddr == 0 )
+            {
+            nomem:
+                unmap_vtd_domain_page(context_entries);
+                spin_unlock_irqrestore(&iommu->lock, flags);
+                return -ENOMEM;
+            }
+        }
+
+        /* Skip top levels of page tables for 2- and 3-level DRHDs. */
+        pgd_maddr = hd->pgd_maddr;
+        for ( agaw = level_to_agaw(4);
+              agaw != level_to_agaw(iommu->nr_pt_levels);
+              agaw-- )
+        {
+            struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
+            pgd_maddr = dma_pte_addr(*p);
+            unmap_vtd_domain_page(p);
+            if ( pgd_maddr == 0 )
+                goto nomem;
+        }
+
+        context_set_address_root(*context, pgd_maddr);
+        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+#ifdef CONTEXT_PASSTHRU
+    }
+#endif
+
     /*
      * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
      * be 1 based as required by intel's iommu hw.
      */
     context_set_domain_id(context, domain);
-    context_set_address_width(*context, hd->agaw);
-
-    if ( ecap_pass_thru(iommu->ecap) )
-        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
-#ifdef CONTEXT_PASSTHRU
-    else
-    {
-#endif
-        ASSERT(hd->pgd_maddr != 0);
-        context_set_address_root(*context, hd->pgd_maddr);
-        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
-#ifdef CONTEXT_PASSTHRU
-    }
-#endif
-
+    context_set_address_width(*context, agaw);
     context_set_fault_enable(*context);
     context_set_present(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
 
     unmap_vtd_domain_page(context_entries);
 
-    if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
-                                    (((u16)bus) << 8) | devfn,
-                                    DMA_CCMD_MASK_NOBIT, 1) )
+    /* Context entry was previously non-present (with domid 0). */
+    iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
+                               DMA_CCMD_MASK_NOBIT, 1);
+    if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
         iommu_flush_write_buffer(iommu);
-    else
-        iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
+
+    set_bit(iommu->index, &hd->iommu_bitmap);
     spin_unlock_irqrestore(&iommu->lock, flags);
 
     return 0;
@@ -1314,7 +1308,7 @@ static int domain_context_unmap_one(
     spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
     context_clear_entry(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
     iommu_flush_context_global(iommu, 0);
     iommu_flush_iotlb_global(iommu, 0);
     unmap_vtd_domain_page(context_entries);
@@ -1395,11 +1389,12 @@ void reassign_device_ownership(
 {
     struct hvm_iommu *source_hd = domain_hvm_iommu(source);
     struct hvm_iommu *target_hd = domain_hvm_iommu(target);
-    struct pci_dev *pdev;
+    struct pci_dev *pdev, *pdev2;
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int status;
     unsigned long flags;
+    int found = 0;
 
     pdev_flr(bus, devfn);
 
@@ -1420,6 +1415,18 @@ void reassign_device_ownership(
     list_move(&pdev->list, &target_hd->pdev_list);
     spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
     spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
+
+    for_each_pdev ( source, pdev2 )
+    {
+        drhd = acpi_find_matched_drhd_unit(pdev2);
+        if ( drhd->iommu == iommu )
+        {
+            found = 1;
+            break;
+        }
+    }
+    if ( !found )
+        clear_bit(iommu->index, &source_hd->iommu_bitmap);
 
     status = domain_context_mapping(target, iommu, pdev);
     if ( status != 0 )
@@ -1477,13 +1484,12 @@ int intel_iommu_map_page(
 int intel_iommu_map_page(
     struct domain *d, unsigned long gfn, unsigned long mfn)
 {
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
+    int pte_present;
 
 #ifdef CONTEXT_PASSTHRU
     /* do nothing if dom0 and iommu supports pass thru */
@@ -1491,23 +1497,27 @@ int intel_iommu_map_page(
         return 0;
 #endif
 
-    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
+    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
     if ( pg_maddr == 0 )
         return -ENOMEM;
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + (gfn & LEVEL_MASK);
+    pte_present = dma_pte_present(*pte);
     dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
-    iommu_flush_cache_entry(iommu, pte);
+    iommu_flush_cache_entry(pte);
     unmap_vtd_domain_page(page);
 
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
-        if ( cap_caching_mode(iommu->cap) )
-            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
-                                  (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
-        else if ( cap_rwbf(iommu->cap) )
+
+        if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
+            continue;
+
+        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
+                                   (paddr_t)gfn << PAGE_SHIFT_4K, 1,
+                                   !pte_present) )
             iommu_flush_write_buffer(iommu);
     }
 
@@ -1536,6 +1546,7 @@ int iommu_page_mapping(struct domain *do
 int iommu_page_mapping(struct domain *domain, paddr_t iova,
                        paddr_t hpa, size_t size, int prot)
 {
+    struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     u64 start_pfn, end_pfn;
@@ -1543,24 +1554,23 @@ int iommu_page_mapping(struct domain *do
     int index;
     u64 pg_maddr;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
     if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
         return -EINVAL;
+
     iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
     start_pfn = hpa >> PAGE_SHIFT_4K;
     end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
     index = 0;
     while ( start_pfn < end_pfn )
     {
-        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
+        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 
1);
         if ( pg_maddr == 0 )
             return -ENOMEM;
         page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
         pte = page + (start_pfn & LEVEL_MASK);
         dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
-        iommu_flush_cache_entry(iommu, pte);
+        iommu_flush_cache_entry(pte);
         unmap_vtd_domain_page(page);
         start_pfn++;
         index++;
@@ -1569,10 +1579,12 @@ int iommu_page_mapping(struct domain *do
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
-        if ( cap_caching_mode(iommu->cap) )
-            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
-                                  iova, index, 0);
-        else if ( cap_rwbf(iommu->cap) )
+
+        if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
+            continue;
+
+        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+                                   iova, index, 1) )
             iommu_flush_write_buffer(iommu);
     }
 
@@ -1584,25 +1596,6 @@ int iommu_page_unmapping(struct domain *
     dma_pte_clear_range(domain, addr, addr + size);
 
     return 0;
-}
-
-void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu = NULL;
-    struct dma_pte *pte = (struct dma_pte *) p2m_entry;
-
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        if ( cap_caching_mode(iommu->cap) )
-            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
-                                  (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
-        else if ( cap_rwbf(iommu->cap) )
-            iommu_flush_write_buffer(iommu);
-    }
-
-    iommu_flush_cache_entry(iommu, pte);
 }
 
 static int iommu_prepare_rmrr_dev(
@@ -1916,6 +1909,7 @@ struct iommu_ops intel_iommu_ops = {
     .map_page = intel_iommu_map_page,
     .unmap_page = intel_iommu_unmap_page,
     .reassign_device = reassign_device_ownership,
+    .get_device_group_id = NULL,
 };
 
 /*
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.h       Mon Jun 02 11:35:39 2008 +0900
@@ -236,6 +236,7 @@ struct context_entry {
 #define LEVEL_STRIDE       (9)
 #define LEVEL_MASK         ((1 << LEVEL_STRIDE) - 1)
 #define PTE_NUM            (1 << LEVEL_STRIDE)
+#define level_to_agaw(val) ((val) - 2)
 #define agaw_to_level(val) ((val) + 2)
 #define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
 #define width_to_agaw(w)   ((w - 30)/LEVEL_STRIDE)
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/utils.c       Mon Jun 02 11:35:39 2008 +0900
@@ -213,109 +213,97 @@ u32 get_level_index(unsigned long gmfn, 
     return gmfn & LEVEL_MASK;
 }
 
-void print_vtd_entries(
-    struct domain *d,
-    struct iommu *iommu,
-    int bus, int devfn,
-    unsigned long gmfn)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-    struct acpi_drhd_unit *drhd;
+void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn)
+{
     struct context_entry *ctxt_entry;
     struct root_entry *root_entry;
     struct dma_pte pte;
     u64 *l;
-    u32 l_index;
-    u32 i = 0;
-    int level = agaw_to_level(hd->agaw);
-
-    printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
-           d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
-
-    if ( hd->pgd_maddr == 0 )
-    {
-        printk("    hd->pgd_maddr == 0\n");
-        return;
-    }
-    printk("    hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr);
-
-    for_each_drhd_unit ( drhd )
-    {
-        printk("---- print_vtd_entries %d ----\n", i++);
-
-        if ( iommu->root_maddr == 0 )
-        {
-            printk("    iommu->root_maddr = 0\n");
-            continue;
-        }
-
-        root_entry =
-            (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+    u32 l_index, level;
+
+    printk("print_vtd_entries: iommu = %p bdf = %x:%x:%x gmfn = %"PRIx64"\n",
+           iommu, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
+
+    if ( iommu->root_maddr == 0 )
+    {
+        printk("    iommu->root_maddr = 0\n");
+        return;
+    }
+
+    root_entry = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
  
-        printk("    root_entry = %p\n", root_entry);
-        printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
-        if ( !root_present(root_entry[bus]) )
-        {
-            unmap_vtd_domain_page(root_entry);
-            printk("    root_entry[%x] not present\n", bus);
-            continue;
-        }
-
-        ctxt_entry =
-            (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
-        if ( ctxt_entry == NULL )
-        {
-            unmap_vtd_domain_page(root_entry);
-            printk("    ctxt_entry == NULL\n");
-            continue;
-        }
-
-        printk("    context = %p\n", ctxt_entry);
-        printk("    context[%x] = %"PRIx64" %"PRIx64"\n",
-               devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
-        if ( !context_present(ctxt_entry[devfn]) )
+    printk("    root_entry = %p\n", root_entry);
+    printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
+    if ( !root_present(root_entry[bus]) )
+    {
+        unmap_vtd_domain_page(root_entry);
+        printk("    root_entry[%x] not present\n", bus);
+        return;
+    }
+
+    ctxt_entry =
+        (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
+    if ( ctxt_entry == NULL )
+    {
+        unmap_vtd_domain_page(root_entry);
+        printk("    ctxt_entry == NULL\n");
+        return;
+    }
+
+    printk("    context = %p\n", ctxt_entry);
+    printk("    context[%x] = %"PRIx64"_%"PRIx64"\n",
+           devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
+    if ( !context_present(ctxt_entry[devfn]) )
+    {
+        unmap_vtd_domain_page(ctxt_entry);
+        unmap_vtd_domain_page(root_entry);
+        printk("    ctxt_entry[%x] not present\n", devfn);
+        return;
+    }
+
+    level = agaw_to_level(context_address_width(ctxt_entry[devfn]));
+    if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
+         level != VTD_PAGE_TABLE_LEVEL_4)
+    {
+        unmap_vtd_domain_page(ctxt_entry);
+        unmap_vtd_domain_page(root_entry);
+        printk("Unsupported VTD page table level (%d)!\n", level);
+    }
+
+    l = maddr_to_virt(ctxt_entry[devfn].lo);
+    do
+    {
+        l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+        printk("    l%d = %p\n", level, l);
+        if ( l == NULL )
         {
             unmap_vtd_domain_page(ctxt_entry);
             unmap_vtd_domain_page(root_entry);
-            printk("    ctxt_entry[%x] not present\n", devfn);
-            continue;
-        }
-
-        if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
-             level != VTD_PAGE_TABLE_LEVEL_4)
+            printk("    l%d == NULL\n", level);
+            break;
+        }
+        l_index = get_level_index(gmfn, level);
+        printk("    l%d_index = %x\n", level, l_index);
+        printk("    l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]);
+
+        pte.val = l[l_index];
+        if ( !dma_pte_present(pte) )
         {
             unmap_vtd_domain_page(ctxt_entry);
             unmap_vtd_domain_page(root_entry);
-            printk("Unsupported VTD page table level (%d)!\n", level);
-            continue;
-        }
-
-        l = maddr_to_virt(ctxt_entry[devfn].lo);
-        do
-        {
-            l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
-            printk("    l%d = %p\n", level, l);
-            if ( l == NULL )
-            {
-                unmap_vtd_domain_page(ctxt_entry);
-                unmap_vtd_domain_page(root_entry);
-                printk("    l%d == NULL\n", level);
-                break;
-            }
-            l_index = get_level_index(gmfn, level);
-            printk("    l%d_index = %x\n", level, l_index);
-            printk("    l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]);
-
-            pte.val = l[l_index];
-            if ( !dma_pte_present(pte) )
-            {
-                unmap_vtd_domain_page(ctxt_entry);
-                unmap_vtd_domain_page(root_entry);
-                printk("    l%d[%x] not present\n", level, l_index);
-                break;
-            }
-
-            l = maddr_to_virt(l[l_index]);
-        } while ( --level );
-    }
-}
+            printk("    l%d[%x] not present\n", level, l_index);
+            break;
+        }
+
+        l = maddr_to_virt(l[l_index]);
+    } while ( --level );
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/vtd.h Mon Jun 02 11:35:39 2008 +0900
@@ -42,13 +42,31 @@ struct IO_APIC_route_remap_entry {
     };
 };
 
+struct msi_msg_remap_entry {
+    union {
+        u32 val;
+        struct {
+            u32 dontcare:2,
+                index_15:1,
+                SHV:1,
+                format:1,
+                index_0_14:15,
+                addr_id_val:12; /* Interrupt address identifier value,
+                                   must be 0FEEh */
+        };
+    } address_lo;   /* low 32 bits of msi message address */
+
+    u32        address_hi;     /* high 32 bits of msi message address */
+    u32        data;           /* msi message data */
+};
+
 unsigned int get_clflush_size(void);
 u64 alloc_pgtable_maddr(void);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
 void unmap_vtd_domain_page(void *va);
 
-void iommu_flush_cache_entry(struct iommu *iommu, void *addr);
-void iommu_flush_cache_page(struct iommu *iommu, void *addr);
+void iommu_flush_cache_entry(void *addr);
+void iommu_flush_cache_page(void *addr);
 
 #endif // _VTD_H_
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Mon Jun 02 11:35:39 2008 +0900
@@ -41,8 +41,6 @@ u64 alloc_pgtable_maddr(void)
 {
     struct page_info *pg;
     u64 *vaddr;
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
 
     pg = alloc_domheap_page(NULL, 0);
     vaddr = map_domain_page(page_to_mfn(pg));
@@ -50,9 +48,7 @@ u64 alloc_pgtable_maddr(void)
         return 0;
     memset(vaddr, 0, PAGE_SIZE);
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
-    iommu_flush_cache_page(iommu, vaddr);
+    iommu_flush_cache_page(vaddr);
     unmap_domain_page(vaddr);
 
     return page_to_maddr(pg);
@@ -123,181 +119,3 @@ void hvm_dpci_isairq_eoi(struct domain *
         }
     }
 }
-
-void iommu_set_pgd(struct domain *d)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    unsigned long p2m_table;
-
-    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
-
-    if ( paging_mode_hap(d) )
-    {
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *dpte = NULL;
-        mfn_t pgd_mfn;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            dpte = map_domain_page(p2m_table);
-            if ( !dma_pte_present(*dpte) )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(dpte);
-                return;
-            }
-            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            unmap_domain_page(dpte);
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-    }
-    else
-    {
-#if CONFIG_PAGING_LEVELS == 3
-        struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL;
-        int i;
-        u64 pmd_maddr;
-        unsigned long flags;
-        l3_pgentry_t *l3e;
-        int level = agaw_to_level(hd->agaw);
-
-        spin_lock_irqsave(&hd->mapping_lock, flags);
-        hd->pgd_maddr = alloc_pgtable_maddr();
-        if ( hd->pgd_maddr == 0 )
-        {
-            spin_unlock_irqrestore(&hd->mapping_lock, flags);
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "Allocate pgd memory failed!\n");
-            return;
-        }
-
-        pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr);
-        l3e = map_domain_page(p2m_table);
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
-            /* We only support 8 entries for the PAE L3 p2m table */
-            for ( i = 0; i < 8 ; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-            break;
-
-        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
-            /* We allocate one more page for the top vtd page table. */
-            pmd_maddr = alloc_pgtable_maddr();
-            if ( pmd_maddr == 0 )
-            {
-                unmap_vtd_domain_page(pgd_vaddr);
-                unmap_domain_page(l3e);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "Allocate pmd memory failed!\n");
-                return;
-            }
-
-            pte = &pgd_vaddr[0];
-            dma_set_pte_addr(*pte, pmd_maddr);
-            dma_set_pte_readable(*pte);
-            dma_set_pte_writable(*pte);
-
-            pmd_vaddr = map_vtd_domain_page(pmd_maddr);
-            for ( i = 0; i < 8; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-
-            unmap_vtd_domain_page(pmd_vaddr);
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-
-        unmap_vtd_domain_page(pgd_vaddr);
-        unmap_domain_page(l3e);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
-
-#elif CONFIG_PAGING_LEVELS == 4
-        mfn_t pgd_mfn;
-        l3_pgentry_t *l3e;
-        int level = agaw_to_level(hd->agaw);
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            l3e = map_domain_page(p2m_table);
-            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(l3e);
-                return;
-            }
-
-            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            unmap_domain_page(l3e);
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-#endif
-    }
-}
-
-void iommu_free_pgd(struct domain *d)
-{
-#if CONFIG_PAGING_LEVELS == 3
-    struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    int level = agaw_to_level(hd->agaw);
-    struct dma_pte *pgd_vaddr = NULL;
-
-    switch ( level )
-    {
-    case VTD_PAGE_TABLE_LEVEL_3:
-        if ( hd->pgd_maddr != 0 )
-        {
-            free_pgtable_maddr(hd->pgd_maddr);
-            hd->pgd_maddr = 0;
-        }
-        break;
-    case VTD_PAGE_TABLE_LEVEL_4:
-        if ( hd->pgd_maddr != 0 )
-        {
-            pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr);
-            if ( pgd_vaddr[0].val != 0 )
-                free_pgtable_maddr(pgd_vaddr[0].val);
-            unmap_vtd_domain_page(pgd_vaddr);
-            free_pgtable_maddr(hd->pgd_maddr);
-            hd->pgd_maddr = 0;
-        }
-        break;
-    default:
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "Unsupported p2m table sharing level!\n");
-        break;
-    }
-#endif
-}
-
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/hvm.h     Mon Jun 02 11:35:39 2008 +0900
@@ -147,8 +147,10 @@ void hvm_send_assist_req(struct vcpu *v)
 
 void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc);
 u64 hvm_get_guest_tsc(struct vcpu *v);
-#define hvm_set_guest_time(vcpu, gtime) hvm_set_guest_tsc(vcpu, gtime)
-#define hvm_get_guest_time(vcpu)        hvm_get_guest_tsc(vcpu)
+
+void hvm_init_guest_time(struct domain *d);
+void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
+u64 hvm_get_guest_time(struct vcpu *v);
 
 #define hvm_paging_enabled(v) \
     (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG))
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/vcpu.h    Mon Jun 02 11:35:39 2008 +0900
@@ -68,6 +68,9 @@ struct hvm_vcpu {
     struct mtrr_state   mtrr;
     u64                 pat_cr;
 
+    /* In mode delay_for_missed_ticks, VCPUs have differing guest times. */
+    int64_t             stime_offset;
+
     /* Which cache mode is this VCPU in (CR0:CD/NW)? */
     u8                  cache_mode;
 
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jun 02 11:35:39 2008 +0900
@@ -49,7 +49,6 @@ void vmx_asm_do_vmentry(void);
 void vmx_asm_do_vmentry(void);
 void vmx_intr_assist(void);
 void vmx_do_resume(struct vcpu *);
-void set_guest_time(struct vcpu *v, u64 gtime);
 void vmx_vlapic_msr_changed(struct vcpu *v);
 void vmx_realmode(struct cpu_user_regs *regs);
 
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/vpt.h     Mon Jun 02 11:35:39 2008 +0900
@@ -57,7 +57,7 @@ typedef struct HPETState {
 typedef struct HPETState {
     struct hpet_registers hpet;
     struct vcpu *vcpu;
-    uint64_t tsc_freq;
+    uint64_t stime_freq;
     uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
     uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns      */
     uint64_t mc_offset;
@@ -137,6 +137,11 @@ struct pl_time {    /* platform time */
     struct RTCState  vrtc;
     struct HPETState vhpet;
     struct PMTState  vpmt;
+    /* guest_time = Xen sys time + stime_offset */
+    int64_t stime_offset;
+    /* Ensures monotonicity in appropriate timer modes. */
+    uint64_t last_guest_time;
+    spinlock_t pl_time_lock;
 };
 
 #define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency)
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/tboot.h
--- a/xen/include/asm-x86/tboot.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/tboot.h       Mon Jun 02 11:35:39 2008 +0900
@@ -46,7 +46,15 @@ typedef struct __attribute__ ((__packed_
 } uuid_t;
 
 /* used to communicate between tboot and the launched kernel (i.e. Xen) */
-#define MAX_TB_ACPI_SINFO_SIZE   64
+
+typedef struct __attribute__ ((__packed__)) {
+    uint16_t pm1a_cnt;
+    uint16_t pm1b_cnt;
+    uint16_t pm1a_evt;
+    uint16_t pm1b_evt;
+    uint16_t pm1a_cnt_val;
+    uint16_t pm1b_cnt_val;
+} tboot_acpi_sleep_info;
 
 typedef struct __attribute__ ((__packed__)) {
     /* version 0x01+ fields: */
@@ -58,8 +66,9 @@ typedef struct __attribute__ ((__packed_
     uint32_t  shutdown_type;     /* type of shutdown (TB_SHUTDOWN_*) */
     uint32_t  s3_tb_wakeup_entry;/* entry point for tboot s3 wake up */
     uint32_t  s3_k_wakeup_entry; /* entry point for xen s3 wake up */
-    uint8_t   acpi_sinfo[MAX_TB_ACPI_SINFO_SIZE];
-                                 /* where kernel put acpi sleep info in Sx */
+    tboot_acpi_sleep_info
+              acpi_sinfo;        /* where kernel put acpi sleep info in Sx */
+    uint8_t   reserved[52];      /* this pad is for compat with old field */
     /* version 0x02+ fields: */
     uint32_t  tboot_base;        /* starting addr for tboot */
     uint32_t  tboot_size;        /* size of tboot */
diff -r d2a239224cb2 -r f1508348ffab xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/public/domctl.h       Mon Jun 02 11:35:39 2008 +0900
@@ -448,6 +448,16 @@ typedef struct xen_domctl_assign_device 
 typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
 
+/* Retrieve sibling devices infomation of machine_bdf */
+#define XEN_DOMCTL_get_device_group 50
+struct xen_domctl_get_device_group {
+    uint32_t  machine_bdf;      /* IN */
+    uint32_t  max_sdevs;        /* IN */
+    uint32_t  num_sdevs;        /* OUT */
+    XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */
+};
+typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
 
 /* Pass-through interrupts: bind real irq -> hvm devfn. */
 #define XEN_DOMCTL_bind_pt_irq       38
@@ -619,6 +629,7 @@ struct xen_domctl {
         struct xen_domctl_hvmcontext        hvmcontext;
         struct xen_domctl_address_size      address_size;
         struct xen_domctl_sendtrigger       sendtrigger;
+        struct xen_domctl_get_device_group  get_device_group;
         struct xen_domctl_assign_device     assign_device;
         struct xen_domctl_bind_pt_irq       bind_pt_irq;
         struct xen_domctl_memory_mapping    memory_mapping;
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/elfcore.h
--- a/xen/include/xen/elfcore.h Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/elfcore.h Mon Jun 02 11:35:39 2008 +0900
@@ -66,6 +66,7 @@ typedef struct {
     unsigned long xen_compile_time;
     unsigned long tainted;
 #if defined(__i386__) || defined(__x86_64__)
+    unsigned long xen_phys_start;
     unsigned long dom0_pfn_to_mfn_frame_list_list;
 #endif
 #if defined(__ia64__)
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/hvm/iommu.h
--- a/xen/include/xen/hvm/iommu.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/hvm/iommu.h       Mon Jun 02 11:35:39 2008 +0900
@@ -43,6 +43,7 @@ struct hvm_iommu {
     int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
     struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
     domid_t iommu_domid;           /* domain id stored in iommu */
+    u64 iommu_bitmap;              /* bitmap of iommu(s) that the domain uses 
*/
 
     /* amd iommu support */
     int domain_id;
@@ -54,4 +55,7 @@ struct hvm_iommu {
     struct iommu_ops *platform_ops;
 };
 
+#define has_iommu_pdevs(domain) \
+    (!list_empty(&(domain->arch.hvm_domain.hvm_iommu.pdev_list)))
+
 #endif /* __ASM_X86_HVM_IOMMU_H__ */
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/iommu.h   Mon Jun 02 11:35:39 2008 +0900
@@ -29,6 +29,7 @@
 
 extern int vtd_enabled;
 extern int iommu_enabled;
+extern int iommu_pv_enabled;
 
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
@@ -43,7 +44,9 @@ struct iommu {
 struct iommu {
     struct list_head list;
     void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+    u32        index;         /* Sequence number of iommu */
     u32        gcmd;          /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+    u32 nr_pt_levels;
     u64        cap;
     u64        ecap;
     spinlock_t lock; /* protect context, domain ids */
@@ -58,14 +61,13 @@ int device_assigned(u8 bus, u8 devfn);
 int device_assigned(u8 bus, u8 devfn);
 int assign_device(struct domain *d, u8 bus, u8 devfn);
 void deassign_device(struct domain *d, u8 bus, u8 devfn);
+int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
+    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
 void reassign_device_ownership(struct domain *source,
                                struct domain *target,
                                u8 bus, u8 devfn);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
-void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry);
-void iommu_set_pgd(struct domain *d);
-void iommu_free_pgd(struct domain *d);
 void iommu_domain_teardown(struct domain *d);
 int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
 int dpci_ioport_intercept(ioreq_t *p);
@@ -76,6 +78,11 @@ unsigned int io_apic_read_remap_rte(unsi
 unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg);
 void io_apic_write_remap_rte(unsigned int apic,
                              unsigned int reg, unsigned int value);
+
+struct msi_desc;
+struct msi_msg;
+void msi_msg_read_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg);
+void msi_msg_write_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg);
 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu);
 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu);
 struct iommu_flush *iommu_get_flush(struct iommu *iommu);
@@ -94,6 +101,7 @@ struct iommu_ops {
     int (*unmap_page)(struct domain *d, unsigned long gfn);
     void (*reassign_device)(struct domain *s, struct domain *t,
                             u8 bus, u8 devfn);
+    int (*get_device_group_id)(u8 bus, u8 devfn);
 };
 
 #endif /* _IOMMU_H_ */
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/sched.h   Mon Jun 02 11:35:39 2008 +0900
@@ -186,6 +186,8 @@ struct domain
 
     /* Is this an HVM guest? */
     bool_t           is_hvm;
+    /* Does this guest need iommu mappings? */
+    bool_t           need_iommu;
     /* Is this guest fully privileged (aka dom0)? */
     bool_t           is_privileged;
     /* Which guest this guest has privileges on */
@@ -515,6 +517,7 @@ static inline void vcpu_unblock(struct v
 
 #define is_hvm_domain(d) ((d)->is_hvm)
 #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
+#define need_iommu(d)    ((d)->need_iommu && !(d)->is_hvm)
 
 extern enum cpufreq_controller {
     FREQCTL_none, FREQCTL_dom0_kernel
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/time.h
--- a/xen/include/xen/time.h    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/time.h    Mon Jun 02 11:35:39 2008 +0900
@@ -47,6 +47,7 @@ struct tm {
 };
 struct tm gmtime(unsigned long t);
 
+#define SYSTEM_TIME_HZ  1000000000ULL
 #define NOW()           ((s_time_t)get_s_time())
 #define SECONDS(_s)     ((s_time_t)((_s)  * 1000000000ULL))
 #define MILLISECS(_ms)  ((s_time_t)((_ms) * 1000000ULL))
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
 |