WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 02 Jun 2008 04:40:11 -0700
Delivery-date: Mon, 02 Jun 2008 04:40:39 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1212374139 -32400
# Node ID f1508348ffabedf2eab0b666a5b8e2c9515e52d7
# Parent  d2a239224cb23f5a7a8059e4167c50bd3abeb9df
# Parent  85fa199b4b7bd1e7511ce7fc2361bae808c27ba6
merge with xen-unstable.hg
---
 extras/mini-os/main-caml.c                         |   42 --
 tools/examples/xend-config-xenapi.sxp              |  196 ---------
 tools/examples/xm-config-xenapi.xml                |   41 --
 extras/mini-os/Config.mk                           |   13 
 extras/mini-os/Makefile                            |   35 -
 extras/mini-os/arch/ia64/minios-ia64.lds           |   17 
 extras/mini-os/arch/ia64/mm.c                      |    8 
 extras/mini-os/arch/x86/minios-x86_32.lds          |   18 
 extras/mini-os/arch/x86/minios-x86_64.lds          |   18 
 extras/mini-os/arch/x86/mm.c                       |   17 
 extras/mini-os/hypervisor.c                        |    4 
 extras/mini-os/include/lib.h                       |    1 
 extras/mini-os/include/mm.h                        |    3 
 extras/mini-os/include/posix/pthread.h             |   52 ++
 extras/mini-os/include/x86/arch_mm.h               |    5 
 extras/mini-os/lib/sys.c                           |   67 +--
 extras/mini-os/lib/xmalloc.c                       |    2 
 extras/mini-os/main.c                              |   12 
 extras/mini-os/mm.c                                |   38 -
 extras/mini-os/sched.c                             |    2 
 stubdom/Makefile                                   |   33 -
 stubdom/c/Makefile                                 |    7 
 stubdom/c/main.c                                   |    2 
 stubdom/caml/Makefile                              |   10 
 stubdom/caml/main-caml.c                           |   42 ++
 tools/examples/Makefile                            |    2 
 tools/examples/xend-config.sxp                     |   16 
 tools/firmware/hvmloader/util.c                    |   12 
 tools/firmware/rombios/rombios.c                   |   26 -
 tools/ioemu/Makefile.target                        |    7 
 tools/ioemu/hw/cirrus_vga.c                        |    2 
 tools/ioemu/vl.c                                   |    5 
 tools/ioemu/vl.h                                   |    2 
 tools/ioemu/xenstore.c                             |    2 
 tools/libxc/Makefile                               |    8 
 tools/libxc/xc_core.c                              |   18 
 tools/libxc/xc_domain.c                            |   31 +
 tools/libxc/xc_minios.c                            |   17 
 tools/libxc/xenctrl.h                              |    7 
 tools/libxc/xg_private.c                           |   16 
 tools/python/xen/lowlevel/xc/xc.c                  |  230 +++++++++--
 tools/python/xen/xend/XendAPI.py                   |    6 
 tools/python/xen/xend/XendDomain.py                |   31 -
 tools/python/xen/xend/XendDomainInfo.py            |   22 -
 tools/python/xen/xend/XendOptions.py               |   27 +
 tools/python/xen/xend/server/SrvDomain.py          |   13 
 tools/python/xen/xend/server/pciif.py              |   52 ++
 tools/python/xen/xend/server/relocate.py           |   24 -
 tools/python/xen/xm/migrate.py                     |   10 
 tools/xenstat/libxenstat/src/xenstat.c             |   18 
 unmodified_drivers/linux-2.6/platform-pci/evtchn.c |    2 
 xen/arch/x86/acpi/cpu_idle.c                       |    2 
 xen/arch/x86/acpi/power.c                          |   14 
 xen/arch/x86/cpu/amd.c                             |    8 
 xen/arch/x86/crash.c                               |    1 
 xen/arch/x86/domain.c                              |   30 -
 xen/arch/x86/domctl.c                              |   63 ++-
 xen/arch/x86/hvm/hpet.c                            |   18 
 xen/arch/x86/hvm/hvm.c                             |  158 ++++---
 xen/arch/x86/hvm/i8254.c                           |   26 -
 xen/arch/x86/hvm/pmtimer.c                         |    2 
 xen/arch/x86/hvm/svm/svm.c                         |    4 
 xen/arch/x86/hvm/vlapic.c                          |   24 -
 xen/arch/x86/hvm/vmx/vmx.c                         |    4 
 xen/arch/x86/hvm/vpt.c                             |   35 +
 xen/arch/x86/mm.c                                  |   14 
 xen/arch/x86/mm/hap/p2m-ept.c                      |    6 
 xen/arch/x86/mm/p2m.c                              |   21 -
 xen/arch/x86/mm/shadow/common.c                    |  119 +++---
 xen/arch/x86/msi.c                                 |    7 
 xen/arch/x86/setup.c                               |    8 
 xen/arch/x86/smpboot.c                             |    5 
 xen/arch/x86/tboot.c                               |   12 
 xen/arch/x86/x86_emulate/x86_emulate.c             |    8 
 xen/common/domain.c                                |    4 
 xen/common/grant_table.c                           |   57 ++
 xen/common/libelf/libelf-private.h                 |    2 
 xen/common/memory.c                                |   17 
 xen/drivers/passthrough/amd/pci_amd_iommu.c        |   11 
 xen/drivers/passthrough/iommu.c                    |  108 +++++
 xen/drivers/passthrough/vtd/dmar.c                 |   33 -
 xen/drivers/passthrough/vtd/dmar.h                 |    1 
 xen/drivers/passthrough/vtd/extern.h               |    3 
 xen/drivers/passthrough/vtd/intremap.c             |  318 ++++++++++++----
 xen/drivers/passthrough/vtd/iommu.c                |  416 ++++++++++-----------
 xen/drivers/passthrough/vtd/iommu.h                |    1 
 xen/drivers/passthrough/vtd/utils.c                |  178 ++++----
 xen/drivers/passthrough/vtd/vtd.h                  |   22 +
 xen/drivers/passthrough/vtd/x86/vtd.c              |  184 ---------
 xen/include/asm-x86/hvm/hvm.h                      |    6 
 xen/include/asm-x86/hvm/vcpu.h                     |    3 
 xen/include/asm-x86/hvm/vmx/vmx.h                  |    1 
 xen/include/asm-x86/hvm/vpt.h                      |    7 
 xen/include/asm-x86/tboot.h                        |   15 
 xen/include/public/domctl.h                        |   11 
 xen/include/xen/elfcore.h                          |    1 
 xen/include/xen/hvm/iommu.h                        |    4 
 xen/include/xen/iommu.h                            |   14 
 xen/include/xen/sched.h                            |    3 
 xen/include/xen/time.h                             |    1 
 100 files changed, 1874 insertions(+), 1427 deletions(-)

diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/Config.mk
--- a/extras/mini-os/Config.mk  Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/Config.mk  Mon Jun 02 11:35:39 2008 +0900
@@ -41,10 +41,7 @@ extra_incl := $(foreach dir,$(EXTRA_INC)
 extra_incl := $(foreach dir,$(EXTRA_INC),-I$(MINI-OS_ROOT)/include/$(dir))
 
 DEF_CPPFLAGS += -I$(MINI-OS_ROOT)/include
-
-ifeq ($(stubdom),y)
-DEF_CPPFLAGS += -DCONFIG_STUBDOM
-endif
+DEF_CPPFLAGS += -D__MINIOS__
 
 ifeq ($(libc),y)
 DEF_CPPFLAGS += -DHAVE_LIBC
@@ -58,11 +55,3 @@ DEF_CPPFLAGS += -I$(LWIPDIR)/src/include
 DEF_CPPFLAGS += -I$(LWIPDIR)/src/include
 DEF_CPPFLAGS += -I$(LWIPDIR)/src/include/ipv4
 endif
-
-ifneq ($(QEMUDIR),)
-qemu=y
-endif
-
-ifneq ($(CAMLDIR),)
-caml=y
-endif
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/Makefile   Mon Jun 02 11:35:39 2008 +0900
@@ -73,44 +73,25 @@ OBJS += lwip.a
 OBJS += lwip.a
 endif
 
-OBJS := $(filter-out lwip%.o $(LWO), $(OBJS))
-
-ifeq ($(caml),y)
-CAMLLIB = $(shell ocamlc -where)
-APP_OBJS += main-caml.o
-APP_OBJS += $(CAMLDIR)/caml.o
-APP_OBJS += $(CAMLLIB)/libasmrun.a
-CFLAGS += -I$(CAMLLIB)
-APP_LDLIBS += -lm
-endif
-OBJS := $(filter-out main-caml.o, $(OBJS))
-
-ifeq ($(qemu),y)
-APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a 
$(QEMUDIR)/i386-dm-stubdom/libqemu.a
-CFLAGS += -DCONFIG_QEMU
-endif
-
-ifneq ($(CDIR),)
-APP_OBJS += $(CDIR)/main.a
-APP_LDLIBS += 
-endif
+OBJS := $(filter-out main.o lwip%.o $(LWO), $(OBJS))
 
 ifeq ($(libc),y)
-LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest
+APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -whole-archive -lxenguest -lxenctrl 
-no-whole-archive
 APP_LDLIBS += -lpci
 APP_LDLIBS += -lz
+APP_LDLIBS += -lm
 LDLIBS += -lc
 endif
 
-ifneq ($(caml)-$(qemu)-$(CDIR)-$(lwip),---y)
+ifneq ($(APP_OBJS)-$(lwip),-y)
 OBJS := $(filter-out daytime.o, $(OBJS))
 endif
 
-app.o: $(APP_OBJS) app.lds
-       $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@
+$(TARGET)_app.o: $(APP_OBJS) app.lds
+       $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined app_main -o $@
 
-$(TARGET): links $(OBJS) app.o arch_lib
-       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o 
$@.o
+$(TARGET): links $(OBJS) $(TARGET)_app.o arch_lib
+       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(TARGET)_app.o $(OBJS) $(LDARCHLIB) 
$(LDLIBS) -o $@.o
        $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
        $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
        gzip -f -9 -c $@ >$@.gz
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/ia64/minios-ia64.lds
--- a/extras/mini-os/arch/ia64/minios-ia64.lds  Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/ia64/minios-ia64.lds  Mon Jun 02 11:35:39 2008 +0900
@@ -52,6 +52,23 @@ SECTIONS
   .fini_array     : { *(.fini_array) }
   PROVIDE (__fini_array_end = .);
 
+  .ctors : {
+        __CTOR_LIST__ = .;
+        QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.ctors))
+       SORT_BY_NAME(CONSTRUCTORS)
+        QUAD(0)
+        __CTOR_END__ = .;
+        }
+
+  .dtors : {
+        __DTOR_LIST__ = .;
+        QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.dtors))
+        QUAD(0)
+        __DTOR_END__ = .;
+        }
+
   .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - (((5<<(61))+0x100000000) 
- (1 << 20)))
   { *(.IA_64.unwind_info) }
 
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/ia64/mm.c
--- a/extras/mini-os/arch/ia64/mm.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/ia64/mm.c     Mon Jun 02 11:35:39 2008 +0900
@@ -131,6 +131,14 @@ arch_init_demand_mapping_area(unsigned l
 }
 
 /* Helper function used in gnttab.c. */
+void do_map_frames(unsigned long addr,
+        unsigned long *f, unsigned long n, unsigned long stride,
+       unsigned long increment, domid_t id, int may_fail, unsigned long prot)
+{
+       /* TODO */
+       ASSERT(0);
+}
+
 void*
 map_frames_ex(unsigned long* frames, unsigned long n, unsigned long stride,
        unsigned long increment, unsigned long alignment, domid_t id,
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/minios-x86_32.lds
--- a/extras/mini-os/arch/x86/minios-x86_32.lds Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/x86/minios-x86_32.lds Mon Jun 02 11:35:39 2008 +0900
@@ -28,9 +28,25 @@ SECTIONS
   .fini_array     : { *(.fini_array) }
   PROVIDE (__fini_array_end = .);
 
+  .ctors : {
+        __CTOR_LIST__ = .;
+        LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 2)
+        *(SORT_BY_NAME(.ctors))
+       SORT_BY_NAME(CONSTRUCTORS)
+        LONG(0)
+        __CTOR_END__ = .;
+        }
+
+  .dtors : {
+        __DTOR_LIST__ = .;
+        LONG((__DTOR_END__ - __DTOR_LIST__) / 4 - 2)
+        *(SORT_BY_NAME(.dtors))
+        LONG(0)
+        __DTOR_END__ = .;
+        }
+
   .data : {                    /* Data */
        *(.data)
-       CONSTRUCTORS
        }
 
   _edata = .;                  /* End of data section */
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/minios-x86_64.lds
--- a/extras/mini-os/arch/x86/minios-x86_64.lds Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/x86/minios-x86_64.lds Mon Jun 02 11:35:39 2008 +0900
@@ -28,9 +28,25 @@ SECTIONS
   .fini_array     : { *(.fini_array) }
   PROVIDE (__fini_array_end = .);
 
+  .ctors : {
+        __CTOR_LIST__ = .;
+        QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.ctors))
+       SORT_BY_NAME(CONSTRUCTORS)
+        QUAD(0)
+        __CTOR_END__ = .;
+        }
+
+  .dtors : {
+        __DTOR_LIST__ = .;
+        QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2)
+        *(SORT_BY_NAME(.dtors))
+        QUAD(0)
+        __DTOR_END__ = .;
+        }
+
   .data : {                    /* Data */
        *(.data)
-       CONSTRUCTORS
        }
 
   _edata = .;                  /* End of data section */
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/arch/x86/mm.c      Mon Jun 02 11:35:39 2008 +0900
@@ -59,11 +59,10 @@ void new_pt_frame(unsigned long *pt_pfn,
 {   
     pgentry_t *tab = (pgentry_t *)start_info.pt_base;
     unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); 
-    unsigned long prot_e, prot_t, pincmd;
+    unsigned long prot_e, prot_t;
     mmu_update_t mmu_updates[1];
-    struct mmuext_op pin_request;
     
-    prot_e = prot_t = pincmd = 0;
+    prot_e = prot_t = 0;
     DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, "
            "prev_l_mfn=%lx, offset=%lx", 
            level, *pt_pfn, prev_l_mfn, offset);
@@ -77,18 +76,15 @@ void new_pt_frame(unsigned long *pt_pfn,
     case L1_FRAME:
          prot_e = L1_PROT;
          prot_t = L2_PROT;
-         pincmd = MMUEXT_PIN_L1_TABLE;
          break;
     case L2_FRAME:
          prot_e = L2_PROT;
          prot_t = L3_PROT;
-         pincmd = MMUEXT_PIN_L2_TABLE;
          break;
 #if defined(__x86_64__)
     case L3_FRAME:
          prot_e = L3_PROT;
          prot_t = L4_PROT;
-         pincmd = MMUEXT_PIN_L3_TABLE;
          break;
 #endif
     default:
@@ -113,15 +109,6 @@ void new_pt_frame(unsigned long *pt_pfn,
          do_exit();
     }
                         
-    /* Pin the page to provide correct protection */
-    pin_request.cmd = pincmd;
-    pin_request.arg1.mfn = pfn_to_mfn(*pt_pfn);
-    if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
-    {
-        printk("ERROR: pinning failed\n");
-        do_exit();
-    }
-
     /* Now fill the new page table page with entries.
        Update the page directory as well. */
     mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + 
sizeof(pgentry_t) * offset;
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/hypervisor.c
--- a/extras/mini-os/hypervisor.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/hypervisor.c       Mon Jun 02 11:35:39 2008 +0900
@@ -55,12 +55,12 @@ void do_hypervisor_callback(struct pt_re
     while ( l1 != 0 )
     {
         l1i = __ffs(l1);
-        l1 &= ~(1 << l1i);
+        l1 &= ~(1UL << l1i);
         
         while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 )
         {
             l2i = __ffs(l2);
-            l2 &= ~(1 << l2i);
+            l2 &= ~(1UL << l2i);
 
             port = (l1i * (sizeof(unsigned long) * 8)) + l2i;
                        do_event(port, regs);
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/lib.h
--- a/extras/mini-os/include/lib.h      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/lib.h      Mon Jun 02 11:35:39 2008 +0900
@@ -136,6 +136,7 @@ enum fd_type {
     FTYPE_CONSOLE,
     FTYPE_FILE,
     FTYPE_XENBUS,
+    FTYPE_XC,
     FTYPE_EVTCHN,
     FTYPE_SOCKET,
     FTYPE_TAP,
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/mm.h       Mon Jun 02 11:35:39 2008 +0900
@@ -67,6 +67,9 @@ void *map_frames_ex(unsigned long *f, un
 void *map_frames_ex(unsigned long *f, unsigned long n, unsigned long stride,
        unsigned long increment, unsigned long alignment, domid_t id,
        int may_fail, unsigned long prot);
+void do_map_frames(unsigned long addr,
+        unsigned long *f, unsigned long n, unsigned long stride,
+       unsigned long increment, domid_t id, int may_fail, unsigned long prot);
 #ifdef HAVE_LIBC
 extern unsigned long heap, brk, heap_mapped, heap_end;
 #endif
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/posix/pthread.h
--- a/extras/mini-os/include/posix/pthread.h    Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/posix/pthread.h    Mon Jun 02 11:35:39 2008 +0900
@@ -1,18 +1,56 @@
 #ifndef _POSIX_PTHREAD_H
 #define _POSIX_PTHREAD_H
 
+#include <stdlib.h>
+
 /* Let's be single-threaded for now.  */
 
-typedef void *pthread_key_t;
-typedef struct {} pthread_mutex_t, pthread_once_t;
+typedef struct {
+    void *ptr;
+} *pthread_key_t;
+static inline int pthread_key_create(pthread_key_t *key, void 
(*destr_function)(void*))
+{
+    *key = malloc(sizeof(**key));
+    (*key)->ptr = NULL;
+    return 0;
+}
+static inline int pthread_setspecific(pthread_key_t key, const void *pointer)
+{
+    key->ptr = (void*) pointer;
+    return 0;
+}
+static inline void *pthread_getspecific(pthread_key_t key)
+{
+    return key->ptr;
+}
+static inline int pthread_key_delete(pthread_key_t key)
+{
+    free(key);
+    return 0;
+}
+
+
+
+typedef struct {} pthread_mutex_t;
 #define PTHREAD_MUTEX_INITIALIZER {}
-#define PTHREAD_ONCE_INIT {}
 static inline int pthread_mutex_lock(pthread_mutex_t *mutex) { return 0; }
 static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
-static inline int pthread_key_create(pthread_key_t *key, void 
(*destr_function)(void*)) { *key = NULL; return 0; }
-static inline int pthread_setspecific(pthread_key_t *key, const void *pointer) 
{ *key = (void*) pointer; return 0; }
-static inline void *pthread_getspecific(pthread_key_t *key) { return *key; }
-static inline int pthread_once(pthread_once_t *once_control, void 
(*init_routine)(void)) { init_routine(); return 0; }
+
+
+
+typedef struct {
+    int done;
+} pthread_once_t;
+#define PTHREAD_ONCE_INIT { 0 }
+
+static inline int pthread_once(pthread_once_t *once_control, void 
(*init_routine)(void))
+{
+    if (!once_control->done) {
+        once_control->done = 1;
+        init_routine();
+    }
+    return 0;
+}
 
 #define __thread
 
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/include/x86/arch_mm.h
--- a/extras/mini-os/include/x86/arch_mm.h      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/include/x86/arch_mm.h      Mon Jun 02 11:35:39 2008 +0900
@@ -219,11 +219,6 @@ static __inline__ paddr_t machine_to_phy
 
 #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT)
 #define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, 
L1_PROT_RO)
-#ifndef __ASSEMBLY__
-void do_map_frames(unsigned long addr,
-        unsigned long *f, unsigned long n, unsigned long stride,
-       unsigned long increment, domid_t id, int may_fail, unsigned long prot);
-#endif
 #define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, 
DOMID_SELF, 0, L1_PROT_RO)
 
 #endif /* _ARCH_MM_H_ */
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/lib/sys.c
--- a/extras/mini-os/lib/sys.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/lib/sys.c  Mon Jun 02 11:35:39 2008 +0900
@@ -81,6 +81,7 @@
 
 #define NOFILE 32
 extern int xc_evtchn_close(int fd);
+extern int xc_interface_close(int fd);
 
 pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER;
 struct file files[NOFILE] = {
@@ -259,10 +260,7 @@ int read(int fd, void *buf, size_t nbyte
            }
            return ret * sizeof(union xenfb_in_event);
         }
-       case FTYPE_NONE:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_BLK:
+       default:
            break;
     }
     printk("read(%d): Bad descriptor\n", fd);
@@ -295,12 +293,7 @@ int write(int fd, const void *buf, size_
        case FTYPE_TAP:
            netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes);
            return nbytes;
-       case FTYPE_NONE:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
     printk("write(%d): Bad descriptor\n", fd);
@@ -351,15 +344,7 @@ int fsync(int fd) {
            }
            return 0;
        }
-       case FTYPE_NONE:
-       case FTYPE_CONSOLE:
-       case FTYPE_SOCKET:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_TAP:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
     printk("fsync(%d): Bad descriptor\n", fd);
@@ -391,6 +376,9 @@ int close(int fd)
            files[fd].type = FTYPE_NONE;
            return res;
        }
+       case FTYPE_XC:
+           xc_interface_close(fd);
+           return 0;
        case FTYPE_EVTCHN:
             xc_evtchn_close(fd);
             return 0;
@@ -495,13 +483,7 @@ int fstat(int fd, struct stat *buf)
            stat_from_fs(buf, &stat);
            return 0;
        }
-       case FTYPE_NONE:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_TAP:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
 
@@ -522,15 +504,7 @@ int ftruncate(int fd, off_t length)
            }
            return 0;
        }
-       case FTYPE_NONE:
-       case FTYPE_CONSOLE:
-       case FTYPE_SOCKET:
-       case FTYPE_XENBUS:
-       case FTYPE_EVTCHN:
-       case FTYPE_TAP:
-       case FTYPE_BLK:
-       case FTYPE_KBD:
-       case FTYPE_FB:
+       default:
            break;
     }
 
@@ -636,9 +610,10 @@ static const char file_types[] = {
     [FTYPE_NONE]       = 'N',
     [FTYPE_CONSOLE]    = 'C',
     [FTYPE_FILE]       = 'F',
-    [FTYPE_XENBUS]     = 'X',
+    [FTYPE_XENBUS]     = 'S',
+    [FTYPE_XC]         = 'X',
     [FTYPE_EVTCHN]     = 'E',
-    [FTYPE_SOCKET]     = 'S',
+    [FTYPE_SOCKET]     = 's',
     [FTYPE_TAP]                = 'T',
     [FTYPE_BLK]                = 'B',
     [FTYPE_KBD]                = 'K',
@@ -722,7 +697,7 @@ static int select_poll(int nfds, fd_set 
     /* Then see others as well. */
     for (i = 0; i < nfds; i++) {
        switch(files[i].type) {
-       case FTYPE_NONE:
+       default:
            if (FD_ISSET(i, readfds) || FD_ISSET(i, writefds) || FD_ISSET(i, 
exceptfds))
                printk("bogus fd %d in select\n", i);
            /* Fallthrough.  */
@@ -1083,14 +1058,20 @@ int clock_gettime(clockid_t clk_id, stru
 
 void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t 
offset)
 {
+    unsigned long n = (length + PAGE_SIZE - 1) / PAGE_SIZE;
+
     ASSERT(!start);
-    length = (length + PAGE_SIZE - 1) & PAGE_MASK;
     ASSERT(prot == (PROT_READ|PROT_WRITE));
-    ASSERT(flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON));
-    ASSERT(fd == -1);
+    ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == 
(MAP_PRIVATE|MAP_ANON)))
+        || (fd != -1 && flags == MAP_SHARED));
     ASSERT(offset == 0);
 
-    return map_zero(length / PAGE_SIZE, 1);
+    if (fd == -1)
+        return map_zero(n, 1);
+    else if (files[fd].type == FTYPE_XC) {
+        unsigned long zero = 0;
+        return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, 0, 0);
+    } else ASSERT(0);
 }
 #if defined(__x86_64__) || defined(__ia64__)
 __typeof__(mmap) mmap64 __attribute__((__alias__("mmap")));
@@ -1110,7 +1091,7 @@ int munmap(void *start, size_t length)
        call[i].args[0] = (unsigned long) &data[i];
        call[i].args[1] = 0;
        call[i].args[2] = 0;
-       call[i].args[3] = UVMF_INVLPG | UVMF_ALL;
+       call[i].args[3] = UVMF_INVLPG;
     }
 
     ret = HYPERVISOR_multicall(call, n);
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/lib/xmalloc.c
--- a/extras/mini-os/lib/xmalloc.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/lib/xmalloc.c      Mon Jun 02 11:35:39 2008 +0900
@@ -127,7 +127,7 @@ static void *xmalloc_whole_pages(size_t 
     if ( hdr == NULL )
         return NULL;
 
-    hdr->size = (1 << (pageorder + PAGE_SHIFT));
+    hdr->size = (1UL << (pageorder + PAGE_SHIFT));
     /* Debugging aid. */
     hdr->freelist.next = hdr->freelist.prev = NULL;
 
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/main-caml.c
--- a/extras/mini-os/main-caml.c        Mon Jun 02 11:35:02 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * Caml bootstrap
- *
- * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, January 2008
- */
-
-#include <stdio.h>
-#include <errno.h>
-
-#include <caml/mlvalues.h>
-#include <caml/callback.h>
-#include <unistd.h>
-
-/* Ugly binary compatibility with Linux */
-FILE *_stderr asm("stderr");
-int *__errno_location;
-/* Will probably break everything, probably need to fetch from glibc */
-void *__ctype_b_loc;
-
-int main(int argc, char *argv[], char *envp[])
-{
-    value *val;
-
-    /* Get current thread's value */
-    _stderr = stderr;
-    __errno_location = &errno;
-
-    printf("starting caml\n");
-
-    /* Wait before things might hang up */
-    sleep(1);
-
-    caml_startup(argv);
-    val = caml_named_value("main");
-    if (!val) {
-        printf("Couldn't find Caml main");
-        return 1;
-    }
-    caml_callback(*val, Val_int(0));
-    printf("callback returned\n");
-    return 0;
-}
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/main.c
--- a/extras/mini-os/main.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/main.c     Mon Jun 02 11:35:39 2008 +0900
@@ -4,7 +4,6 @@
  * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, October 2007
  */
 
-#ifdef HAVE_LIBC
 #include <os.h>
 #include <sched.h>
 #include <console.h>
@@ -19,8 +18,8 @@ extern int main(int argc, char *argv[], 
 extern int main(int argc, char *argv[], char *envp[]);
 extern void __libc_init_array(void);
 extern void __libc_fini_array(void);
-
-struct thread *main_thread;
+extern unsigned long __CTOR_LIST__[];
+extern unsigned long __DTOR_LIST__[];
 
 #if 0
 #include <stdio.h>
@@ -147,6 +146,8 @@ static void call_main(void *p)
 
     __libc_init_array();
     environ = envp;
+    for (i = 1; i <= __CTOR_LIST__[0]; i++)
+        ((void((*)(void)))__CTOR_LIST__[i]) ();
     tzset();
 
     exit(main(argc, argv, envp));
@@ -154,6 +155,10 @@ static void call_main(void *p)
 
 void _exit(int ret)
 {
+    int i;
+
+    for (i = 1; i <= __DTOR_LIST__[0]; i++)
+        ((void((*)(void)))__DTOR_LIST__[i]) ();
     close_all_files();
     __libc_fini_array();
     printk("main returned %d\n", ret);
@@ -172,4 +177,3 @@ int app_main(start_info_t *si)
     main_thread = create_thread("main", call_main, si);
     return 0;
 }
-#endif
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/mm.c       Mon Jun 02 11:35:39 2008 +0900
@@ -58,7 +58,7 @@ static unsigned long *alloc_bitmap;
 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
 
 #define allocated_in_map(_pn) \
-(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
+(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1UL<<((_pn)&(PAGES_PER_MAPWORD-1))))
 
 /*
  * Hint regarding bitwise arithmetic in map_{alloc,free}:
@@ -80,13 +80,13 @@ static void map_alloc(unsigned long firs
 
     if ( curr_idx == end_idx )
     {
-        alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
+        alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
     }
     else 
     {
-        alloc_bitmap[curr_idx] |= -(1<<start_off);
-        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
-        alloc_bitmap[curr_idx] |= (1<<end_off)-1;
+        alloc_bitmap[curr_idx] |= -(1UL<<start_off);
+        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
+        alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
     }
 }
 
@@ -102,13 +102,13 @@ static void map_free(unsigned long first
 
     if ( curr_idx == end_idx )
     {
-        alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
+        alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
     }
     else 
     {
-        alloc_bitmap[curr_idx] &= (1<<start_off)-1;
+        alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
         while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
-        alloc_bitmap[curr_idx] &= -(1<<end_off);
+        alloc_bitmap[curr_idx] &= -(1UL<<end_off);
     }
 }
 
@@ -178,7 +178,7 @@ USED static void print_chunks(void *star
         head = free_head[order];
         while(!FREELIST_EMPTY(head))
         {
-            for(count = 0; count < 1<< head->level; count++)
+            for(count = 0; count < 1UL<< head->level; count++)
             {
                 if(count + virt_to_pfn(head) - pfn_start < 1000)
                     chunks[count + virt_to_pfn(head) - pfn_start] = current;
@@ -235,13 +235,13 @@ static void init_page_allocator(unsigned
          * Next chunk is limited by alignment of min, but also
          * must not be bigger than remaining range.
          */
-        for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
-            if ( min & (1<<i) ) break;
+        for ( i = PAGE_SHIFT; (1UL<<(i+1)) <= range; i++ )
+            if ( min & (1UL<<i) ) break;
 
 
         ch = (chunk_head_t *)min;
-        min   += (1<<i);
-        range -= (1<<i);
+        min   += (1UL<<i);
+        range -= (1UL<<i);
         ct = (chunk_tail_t *)min-1;
         i -= PAGE_SHIFT;
         ch->level       = i;
@@ -280,8 +280,8 @@ unsigned long alloc_pages(int order)
     {
         /* Split into two equal parts. */
         i--;
-        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
-        spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
+        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1UL<<(i+PAGE_SHIFT)));
+        spare_ct = (chunk_tail_t *)((char *)spare_ch + 
(1UL<<(i+PAGE_SHIFT)))-1;
 
         /* Create new header for spare chunk. */
         spare_ch->level = i;
@@ -294,7 +294,7 @@ unsigned long alloc_pages(int order)
         free_head[i] = spare_ch;
     }
     
-    map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1<<order);
+    map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1UL<<order);
 
     return((unsigned long)alloc_ch);
 
@@ -312,16 +312,16 @@ void free_pages(void *pointer, int order
     unsigned long mask;
     
     /* First free the chunk */
-    map_free(virt_to_pfn(pointer), 1 << order);
+    map_free(virt_to_pfn(pointer), 1UL << order);
     
     /* Create free chunk */
     freed_ch = (chunk_head_t *)pointer;
-    freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
+    freed_ct = (chunk_tail_t *)((char *)pointer + (1UL<<(order + 
PAGE_SHIFT)))-1;
     
     /* Now, possibly we can conseal chunks together */
     while(order < FREELIST_SIZE)
     {
-        mask = 1 << (order + PAGE_SHIFT);
+        mask = 1UL << (order + PAGE_SHIFT);
         if((unsigned long)freed_ch & mask) 
         {
             to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
diff -r d2a239224cb2 -r f1508348ffab extras/mini-os/sched.c
--- a/extras/mini-os/sched.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/extras/mini-os/sched.c    Mon Jun 02 11:35:39 2008 +0900
@@ -57,6 +57,8 @@ struct thread *idle_thread = NULL;
 struct thread *idle_thread = NULL;
 LIST_HEAD(exited_threads);
 static int threads_started;
+
+struct thread *main_thread;
 
 void inline print_runqueue(void)
 {
diff -r d2a239224cb2 -r f1508348ffab stubdom/Makefile
--- a/stubdom/Makefile  Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/Makefile  Mon Jun 02 11:35:39 2008 +0900
@@ -37,7 +37,7 @@ export PATH:=$(CROSS_PREFIX)/bin:$(PATH)
 export PATH:=$(CROSS_PREFIX)/bin:$(PATH)
 
 .PHONY: all
-all: qemu-stubdom
+all: ioemu-stubdom c-stubdom
 
 ################
 # Cross-binutils
@@ -174,6 +174,7 @@ mk-symlinks:
          ([ ! -h config-host.h ] || rm -f config-host.h) && \
          ([ ! -h config-host.mak ] || rm -f config-host.mak) )
        [ -h mini-os ] || ln -sf ../extras/mini-os .
+       [ -h mini-os/include/xen ] || ln -sf ../../../xen/include/public 
mini-os/include/xen
 
 #######
 # libxc
@@ -198,40 +199,41 @@ ioemu: cross-zlib cross-libpci mk-symlin
 ######
 
 .PHONY: caml
-caml:
-       $(MAKE) -C $@
+caml: mk-symlinks
+       $(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs 
 
 ###
 # C
 ###
 
 .PHONY: c
-c:
-       $(MAKE) -C $@
+c: mk-symlinks
+       $(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs 
 
 ########
 # minios
 ########
 
-.PHONY: qemu-stubdom
-qemu-stubdom: mk-symlinks lwip-cvs libxc ioemu
-       $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs QEMUDIR=$(CURDIR)/ioemu
-
+.PHONY: ioemu-stubdom
+ioemu-stubdom: lwip-cvs libxc ioemu
+       $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs 
APP_OBJS="$(CURDIR)/ioemu/i386-dm-stubdom/qemu.a 
$(CURDIR)/ioemu/i386-dm-stubdom/libqemu.a"
+
+CAMLLIB = $(shell ocamlc -where)
 .PHONY: caml-stubdom
-caml-stubdom: mk-symlinks lwip-cvs libxc cross-libpci caml
-       $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CAMLDIR=$(CURDIR)/caml
+caml-stubdom: lwip-cvs libxc caml
+       $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs 
APP_OBJS="$(CURDIR)/caml/main-c.o $(CURDIR)/caml/main-caml.o 
$(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
 
 .PHONY: c-stubdom
-c-stubdom: mk-symlinks lwip-cvs libxc cross-libpci c
-       $(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CDIR=$(CURDIR)/c
+c-stubdom: lwip-cvs libxc c
+       $(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs 
APP_OBJS=$(CURDIR)/c/main.a
 
 #########
 # install
 #########
 
-install: mini-os/mini-os.gz
+install: mini-os/ioemu-stubdom.gz
        $(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin"
-       $(INSTALL_PROG) mini-os/mini-os.gz 
"$(DESTDIR)/usr/lib/xen/boot/stubdom.gz"
+       $(INSTALL_PROG) $< "$(DESTDIR)/usr/lib/xen/boot/stubdom.gz"
 
 #######
 # clean
@@ -242,6 +244,7 @@ clean:
 clean:
        -$(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs clean
        $(MAKE) -C caml clean
+       $(MAKE) -C c clean
        rm -fr libxc ioemu mini-os include
 
 # clean the cross-compilation result
diff -r d2a239224cb2 -r f1508348ffab stubdom/c/Makefile
--- a/stubdom/c/Makefile        Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/c/Makefile        Mon Jun 02 11:35:39 2008 +0900
@@ -2,7 +2,12 @@ XEN_ROOT = ../..
 
 include $(XEN_ROOT)/Config.mk
 
-main.a: main.o
+all: main.a
+
+main-c.c:
+       ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@
+
+main.a: main-c.o main.o 
        $(AR) cr $@ $^
 
 clean:
diff -r d2a239224cb2 -r f1508348ffab stubdom/c/main.c
--- a/stubdom/c/main.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/c/main.c  Mon Jun 02 11:35:39 2008 +0900
@@ -1,4 +1,6 @@
 #include <stdio.h>
+#include <unistd.h>
+
 int main(void) {
         sleep(2);
         printf("Hello, world!\n");
diff -r d2a239224cb2 -r f1508348ffab stubdom/caml/Makefile
--- a/stubdom/caml/Makefile     Mon Jun 02 11:35:02 2008 +0900
+++ b/stubdom/caml/Makefile     Mon Jun 02 11:35:39 2008 +0900
@@ -1,12 +1,20 @@ XEN_ROOT = ../..
 XEN_ROOT = ../..
 
 include $(XEN_ROOT)/Config.mk
+
+CAMLLIB = $(shell ocamlc -where)
+DEF_CPPFLAGS += -I$(CAMLLIB)
 
 OCAMLFIND=ocamlfind
 OCAMLOPT=ocamlopt
 
 OBJS := hello.cmx
 LIBS := 
+
+all: main-c.o main-caml.o caml.o
+
+main-c.c:
+       ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@
 
 %.cmx: %.ml
        $(OCAMLFIND) $(OCAMLOPT) -c $< -o $@
@@ -15,4 +23,4 @@ caml.o: $(OBJS)
        $(OCAMLFIND) $(OCAMLOPT) $(LIBS) $^ -output-obj -o $@
 
 clean:
-       rm -f *.o *.cmx *.cmi
+       rm -f *.a *.o *.cmx *.cmi
diff -r d2a239224cb2 -r f1508348ffab stubdom/caml/main-caml.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/stubdom/caml/main-caml.c  Mon Jun 02 11:35:39 2008 +0900
@@ -0,0 +1,42 @@
+/*
+ * Caml bootstrap
+ *
+ * Samuel Thibault <Samuel.Thibault@xxxxxxxxxxxxx>, January 2008
+ */
+
+#include <stdio.h>
+#include <errno.h>
+
+#include <caml/mlvalues.h>
+#include <caml/callback.h>
+#include <unistd.h>
+
+/* Ugly binary compatibility with Linux */
+FILE *_stderr asm("stderr");
+int *__errno_location;
+/* Will probably break everything, probably need to fetch from glibc */
+void *__ctype_b_loc;
+
+int main(int argc, char *argv[], char *envp[])
+{
+    value *val;
+
+    /* Get current thread's value */
+    _stderr = stderr;
+    __errno_location = &errno;
+
+    printf("starting caml\n");
+
+    /* Wait before things might hang up */
+    sleep(1);
+
+    caml_startup(argv);
+    val = caml_named_value("main");
+    if (!val) {
+        printf("Couldn't find Caml main");
+        return 1;
+    }
+    caml_callback(*val, Val_int(0));
+    printf("callback returned\n");
+    return 0;
+}
diff -r d2a239224cb2 -r f1508348ffab tools/examples/Makefile
--- a/tools/examples/Makefile   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/examples/Makefile   Mon Jun 02 11:35:39 2008 +0900
@@ -9,9 +9,7 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig.
 # Xen configuration dir and configs to go there.
 XEN_CONFIG_DIR = /etc/xen
 XEN_CONFIGS = xend-config.sxp
-XEN_CONFIGS += xend-config-xenapi.sxp
 XEN_CONFIGS += xm-config.xml
-XEN_CONFIGS += xm-config-xenapi.xml
 XEN_CONFIGS += xmexample1 
 XEN_CONFIGS += xmexample2
 XEN_CONFIGS += xmexample.hvm
diff -r d2a239224cb2 -r f1508348ffab tools/examples/xend-config-xenapi.sxp
--- a/tools/examples/xend-config-xenapi.sxp     Mon Jun 02 11:35:02 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,196 +0,0 @@
-# -*- sh -*-
-
-#
-# Xend configuration file.
-#
-
-# This example configuration is appropriate for an installation that 
-# utilizes a bridged network configuration. Access to xend via http
-# is disabled.  
-
-# Commented out entries show the default for that entry, unless otherwise
-# specified.
-
-#(logfile /var/log/xen/xend.log)
-#(loglevel DEBUG)
-
-
-# The Xen-API server configuration.  (Please note that this server is
-# available as an UNSUPPORTED PREVIEW in Xen 3.0.4, and should not be relied
-# upon).
-#
-# This value configures the ports, interfaces, and access controls for the
-# Xen-API server.  Each entry in the list starts with either unix, a port
-# number, or an address:port pair.  If this is "unix", then a UDP socket is
-# opened, and this entry applies to that.  If it is a port, then Xend will
-# listen on all interfaces on that TCP port, and if it is an address:port
-# pair, then Xend will listen on the specified port, using the interface with
-# the specified address.
-#
-# The subsequent string configures the user-based access control for the
-# listener in question.  This can be one of "none" or "pam", indicating either
-# that users should be allowed access unconditionally, or that the local
-# Pluggable Authentication Modules configuration should be used.  If this
-# string is missing or empty, then "pam" is used.
-#
-# The final string gives the host-based access control for that listener. If
-# this is missing or empty, then all connections are accepted.  Otherwise,
-# this should be a space-separated sequence of regular expressions; any host
-# with a fully-qualified domain name or an IP address that matches one of
-# these regular expressions will be accepted.
-#
-# Example: listen on TCP port 9363 on all interfaces, accepting connections
-# only from machines in example.com or localhost, and allow access through
-# the unix domain socket unconditionally:
-#
-   (xen-api-server ((9363 none)))
-#                    (unix none)))
-#
-# Optionally, the TCP Xen-API server can use SSL by specifying the private
-# key and certificate location:
-#
-#                    (9367 pam '' /etc/xen/xen-api.key /etc/xen/xen-api.crt)
-#
-# Default:
-#   (xen-api-server ((unix)))
-
-
-#(xend-http-server no)
-#(xend-unix-server no)
-#(xend-tcp-xmlrpc-server no)
-#(xend-unix-xmlrpc-server yes)
-#(xend-relocation-server no)
-(xend-relocation-server yes)
-
-#(xend-unix-path /var/lib/xend/xend-socket)
-
-
-# Address and port xend should use for the legacy TCP XMLRPC interface, 
-# if xend-tcp-xmlrpc-server is set.
-#(xend-tcp-xmlrpc-server-address 'localhost')
-#(xend-tcp-xmlrpc-server-port 8006)
-
-# SSL key and certificate to use for the legacy TCP XMLRPC interface.
-# Setting these will mean that this port serves only SSL connections as
-# opposed to plaintext ones.
-#(xend-tcp-xmlrpc-server-ssl-key-file  /etc/xen/xmlrpc.key)
-#(xend-tcp-xmlrpc-server-ssl-cert-file /etc/xen/xmlrpc.crt)
-
-
-# Port xend should use for the HTTP interface, if xend-http-server is set.
-#(xend-port            8000)
-
-# Port xend should use for the relocation interface, if xend-relocation-server
-# is set.
-#(xend-relocation-port 8002)
-
-# Address xend should listen on for HTTP connections, if xend-http-server is
-# set.
-# Specifying 'localhost' prevents remote connections.
-# Specifying the empty string '' (the default) allows all connections.
-#(xend-address '')
-#(xend-address localhost)
-
-# Address xend should listen on for relocation-socket connections, if
-# xend-relocation-server is set.
-# Meaning and default as for xend-address above.
-#(xend-relocation-address '')
-
-# The hosts allowed to talk to the relocation port.  If this is empty (the
-# default), then all connections are allowed (assuming that the connection
-# arrives on a port and interface on which we are listening; see
-# xend-relocation-port and xend-relocation-address above).  Otherwise, this
-# should be a space-separated sequence of regular expressions.  Any host with
-# a fully-qualified domain name or an IP address that matches one of these
-# regular expressions will be accepted.
-#
-# For example:
-#  (xend-relocation-hosts-allow '^localhost$ ^.*\\.example\\.org$')
-#
-#(xend-relocation-hosts-allow '')
-(xend-relocation-hosts-allow '^localhost$ ^localhost\\.localdomain$')
-
-# The limit (in kilobytes) on the size of the console buffer
-#(console-limit 1024)
-
-##
-# To bridge network traffic, like this:
-#
-# dom0: ----------------- bridge -> real eth0 -> the network
-#                            |
-# domU: fake eth0 -> vifN.0 -+
-#
-# use
-#
-# (network-script network-bridge)
-#
-# Your default ethernet device is used as the outgoing interface, by default. 
-# To use a different one (e.g. eth1) use
-#
-# (network-script 'network-bridge netdev=eth1')
-#
-# The bridge is named xenbr0, by default.  To rename the bridge, use
-#
-# (network-script 'network-bridge bridge=<name>')
-#
-# It is possible to use the network-bridge script in more complicated
-# scenarios, such as having two outgoing interfaces, with two bridges, and
-# two fake interfaces per guest domain.  To do things like this, write
-# yourself a wrapper script, and call network-bridge from it, as appropriate.
-#
-(network-script network-bridge)
-
-# The script used to control virtual interfaces.  This can be overridden on a
-# per-vif basis when creating a domain or a configuring a new vif.  The
-# vif-bridge script is designed for use with the network-bridge script, or
-# similar configurations.
-#
-# If you have overridden the bridge name using
-# (network-script 'network-bridge bridge=<name>') then you may wish to do the
-# same here.  The bridge name can also be set when creating a domain or
-# configuring a new vif, but a value specified here would act as a default.
-#
-# If you are using only one bridge, the vif-bridge script will discover that,
-# so there is no need to specify it explicitly.
-#
-(vif-script vif-bridge)
-
-
-## Use the following if network traffic is routed, as an alternative to the
-# settings for bridged networking given above.
-#(network-script network-route)
-#(vif-script     vif-route)
-
-
-## Use the following if network traffic is routed with NAT, as an alternative
-# to the settings for bridged networking given above.
-#(network-script network-nat)
-#(vif-script     vif-nat)
-
-# dom0-min-mem is the lowest permissible memory level (in MB) for dom0.
-# This is a minimum both for auto-ballooning (as enabled by
-# enable-dom0-ballooning below) and for xm mem-set when applied to dom0.
-(dom0-min-mem 196)
-
-# Whether to enable auto-ballooning of dom0 to allow domUs to be created.
-# If enable-dom0-ballooning = no, dom0 will never balloon out.
-(enable-dom0-ballooning yes)
-
-# In SMP system, dom0 will use dom0-cpus # of CPUS
-# If dom0-cpus = 0, dom0 will take all cpus available
-(dom0-cpus 0)
-
-# Whether to enable core-dumps when domains crash.
-#(enable-dump no)
-
-# The tool used for initiating virtual TPM migration
-#(external-migration-tool '')
-
-# The interface for VNC servers to listen on. Defaults
-# to 127.0.0.1  To restore old 'listen everywhere' behaviour
-# set this to 0.0.0.0
-#(vnc-listen '127.0.0.1')
-
-# The default password for VNC console on HVM domain.
-# Empty string is no authentication.
-(vncpasswd '')
diff -r d2a239224cb2 -r f1508348ffab tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/examples/xend-config.sxp    Mon Jun 02 11:35:39 2008 +0900
@@ -59,6 +59,7 @@
 #(xend-unix-xmlrpc-server yes)
 #(xend-relocation-server no)
 (xend-relocation-server yes)
+#(xend-relocation-ssl-server no)
 
 #(xend-unix-path /var/lib/xend/xend-socket)
 
@@ -82,14 +83,17 @@
 # is set.
 #(xend-relocation-port 8002)
 
-# Whether to use tls when relocating.
-#(xend-relocation-tls no)
-
-# SSL key and certificate to use for the relocation interface.
-# Setting these will mean that this port serves only SSL connections as
-# opposed to plaintext ones.
+# Port xend should use for the ssl relocation interface, if
+# xend-relocation-ssl-server is set.
+#(xend-relocation-ssl-port 8003)
+
+# SSL key and certificate to use for the ssl relocation interface, if
+# xend-relocation-ssl-server is set.
 #(xend-relocation-server-ssl-key-file  /etc/xen/xmlrpc.key)
 #(xend-relocation-server-ssl-cert-file  /etc/xen/xmlrpc.crt)
+
+# Whether to use ssl as default when relocating.
+#(xend-relocation-ssl no)
 
 # Address xend should listen on for HTTP connections, if xend-http-server is
 # set.
diff -r d2a239224cb2 -r f1508348ffab tools/examples/xm-config-xenapi.xml
--- a/tools/examples/xm-config-xenapi.xml       Mon Jun 02 11:35:02 2008 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-<!--
-
-Copyright (C) 2006 XenSource Inc.
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of version 2.1 of the GNU Lesser General Public
-License as published by the Free Software Foundation.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
--->
-
-<!--
-
-This is a configuration file for xm; it should be placed in
-/etc/xen/xm-config.xml.  If this file is missing, then xm will fall back to
-the normal behaviour that's in Xen 3.0.4 and below.  The settings here are
-most useful for experimenting with the Xen-API preview in Xen 3.0.4.
-
--->
-
-<xm>
-  <!-- The server element describes how to talk to Xend.  The type may be 
-       Xen-API or LegacyXMLRPC (the default).  The URI is that of the
-       server; you might try http://server:9363/ or
-       httpu:///var/run/xend/xen-api.sock for the Xen-API, or
-       httpu:///var/run/xend/xmlrpc.sock for the legacy server.
-
-       The username and password attributes will be used to log in if Xen-API
-       is being used.
-    -->
-  <server type='Xen-API'
-          uri='http://localhost:9363/'
-          username='me'
-          password='mypassword' />
-</xm>
diff -r d2a239224cb2 -r f1508348ffab tools/firmware/hvmloader/util.c
--- a/tools/firmware/hvmloader/util.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/firmware/hvmloader/util.c   Mon Jun 02 11:35:39 2008 +0900
@@ -609,7 +609,7 @@ uint16_t get_cpu_mhz(void)
 uint16_t get_cpu_mhz(void)
 {
     struct xen_add_to_physmap xatp;
-    struct shared_info *shared_info = (struct shared_info *)0xa0000;
+    struct shared_info *shared_info = (struct shared_info *)0xfffff000;
     struct vcpu_time_info *info = &shared_info->vcpu_info[0].time;
     uint64_t cpu_khz;
     uint32_t tsc_to_nsec_mul, version;
@@ -619,7 +619,7 @@ uint16_t get_cpu_mhz(void)
     if ( cpu_mhz != 0 )
         return cpu_mhz;
 
-    /* Map shared-info page to 0xa0000 (i.e., overlap VGA hole). */
+    /* Map shared-info page. */
     xatp.domid = DOMID_SELF;
     xatp.space = XENMAPSPACE_shared_info;
     xatp.idx   = 0;
@@ -643,14 +643,6 @@ uint16_t get_cpu_mhz(void)
         cpu_khz = cpu_khz << -tsc_shift;
     else
         cpu_khz = cpu_khz >> tsc_shift;
-
-    /* Get the VGA MMIO hole back by remapping shared info to scratch. */
-    xatp.domid = DOMID_SELF;
-    xatp.space = XENMAPSPACE_shared_info;
-    xatp.idx   = 0;
-    xatp.gpfn  = 0xfffff; /* scratch pfn */
-    if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
-        BUG();
 
     cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000);
     return cpu_mhz;
diff -r d2a239224cb2 -r f1508348ffab tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/firmware/rombios/rombios.c  Mon Jun 02 11:35:39 2008 +0900
@@ -2225,26 +2225,12 @@ void interactive_bootkey()
     Bit16u i;
     Bit8u scan = 0;
 
-    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\nPress F10 to 
select boot device.\n");
-    for (i = 3; i > 0; i--)
-    {
-        scan = wait(WAIT_HZ, 0);
-        switch (scan) {
-        case 0x3D:
-        case 0x3E:
-        case 0x3F:
-        case 0x58:
-            break;
-        case 0x44:
-            scan = bootmenu(inb_cmos(0x3d) & 0x0f);
-            break;
-        default:
-            scan = 0;
-            break;
-        }
-        if (scan != 0)
-            break;
-    }
+    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO,
+                "\n\nPress F10 to select boot device.\n");
+
+    scan = wait(1, 0);
+    if (scan == 0x44)
+        scan = bootmenu(inb_cmos(0x3d) & 0x0f);
 
     /* set the default based on the keypress or menu */
     switch(scan) {
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/Makefile.target
--- a/tools/ioemu/Makefile.target       Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/Makefile.target       Mon Jun 02 11:35:39 2008 +0900
@@ -358,6 +358,13 @@ endif
 endif
 
 ifdef CONFIG_STUBDOM
+VL_OBJS+=main-qemu.o
+CFLAGS += -DCONFIG_QEMU
+main-qemu.c:
+       ln -s $(XEN_ROOT)/extras/mini-os/main.c $@
+endif
+
+ifdef CONFIG_STUBDOM
 #CONFIG_PASSTHROUGH=1
 else
   ifeq (,$(wildcard /usr/include/pci))
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/hw/cirrus_vga.c       Mon Jun 02 11:35:39 2008 +0900
@@ -281,8 +281,6 @@ typedef struct PCICirrusVGAState {
 
 static uint8_t rop_to_index[256];
     
-void *shared_vram;
-
 /***************************************
  *
  *  prototypes.
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/vl.c  Mon Jun 02 11:35:39 2008 +0900
@@ -7807,8 +7807,9 @@ int main(int argc, char **argv)
                 bdrv_set_type_hint(fd_table[i], BDRV_TYPE_FLOPPY);
             }
             if (fd_filename[i] != '\0') {
-                if (bdrv_open(fd_table[i], fd_filename[i],
-                              snapshot ? BDRV_O_SNAPSHOT : 0) < 0) {
+                if (bdrv_open2(fd_table[i], fd_filename[i],
+                               snapshot ? BDRV_O_SNAPSHOT : 0,
+                               &bdrv_raw) < 0) {
                     fprintf(stderr, "qemu: could not open floppy disk image 
'%s'\n",
                             fd_filename[i]);
                     exit(1);
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/vl.h  Mon Jun 02 11:35:39 2008 +0900
@@ -153,8 +153,6 @@ int unset_mm_mapping(int xc_handle, uint
                      unsigned int address_bits, unsigned long *extent_start);
 int set_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages,
                    unsigned int address_bits, unsigned long *extent_start);
-
-extern void *shared_vram;
 
 extern FILE *logfile;
 
diff -r d2a239224cb2 -r f1508348ffab tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/ioemu/xenstore.c    Mon Jun 02 11:35:39 2008 +0900
@@ -112,7 +112,7 @@ void xenstore_parse_domain_config(int hv
 
     e = xs_directory(xsh, XBT_NULL, buf, &num);
     if (e == NULL)
-        goto out;
+        num = 0;
 
     for (i = 0; i < num; i++) {
         /* read the backend path */
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/Makefile
--- a/tools/libxc/Makefile      Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/Makefile      Mon Jun 02 11:35:39 2008 +0900
@@ -5,11 +5,9 @@ MINOR    = 0
 MINOR    = 0
 
 CTRL_SRCS-y       :=
-ifneq ($(stubdom),y)
 CTRL_SRCS-y       += xc_core.c
 CTRL_SRCS-$(CONFIG_X86) += xc_core_x86.c
 CTRL_SRCS-$(CONFIG_IA64) += xc_core_ia64.c
-endif
 CTRL_SRCS-y       += xc_domain.c
 CTRL_SRCS-y       += xc_evtchn.c
 CTRL_SRCS-y       += xc_misc.c
@@ -21,9 +19,7 @@ CTRL_SRCS-y       += xc_csched.c
 CTRL_SRCS-y       += xc_csched.c
 CTRL_SRCS-y       += xc_tbuf.c
 CTRL_SRCS-y       += xc_pm.c
-ifneq ($(stubdom),y)
 CTRL_SRCS-y       += xc_resume.c
-endif
 CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c
 CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c
 CTRL_SRCS-$(CONFIG_SunOS) += xc_solaris.c
@@ -33,15 +29,12 @@ CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios.
 
 GUEST_SRCS-y :=
 GUEST_SRCS-y += xg_private.c
-ifneq ($(stubdom),y)
 GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
 GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
-endif
 
 VPATH = ../../xen/common/libelf
 CFLAGS += -I../../xen/common/libelf
 
-ifneq ($(stubdom),y)
 GUEST_SRCS-y += libelf-tools.c libelf-loader.c
 GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c
 
@@ -55,7 +48,6 @@ GUEST_SRCS-$(CONFIG_X86)     += xc_dom_x
 GUEST_SRCS-$(CONFIG_X86)     += xc_dom_x86.c
 GUEST_SRCS-$(CONFIG_X86)     += xc_cpuid_x86.c
 GUEST_SRCS-$(CONFIG_IA64)    += xc_dom_ia64.c
-endif
 
 -include $(XEN_TARGET_ARCH)/Makefile
 
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xc_core.c     Mon Jun 02 11:35:39 2008 +0900
@@ -64,7 +64,7 @@
 /* string table */
 struct xc_core_strtab {
     char       *strings;
-    uint16_t    current;
+    uint16_t    length;
     uint16_t    max;
 };
 
@@ -89,7 +89,7 @@ xc_core_strtab_init(void)
 
     /* index 0 represents none */
     strtab->strings[0] = '\0';
-    strtab->current = 1;
+    strtab->length = 1;
 
     return strtab;
 }
@@ -107,14 +107,14 @@ xc_core_strtab_get(struct xc_core_strtab
     uint16_t ret = 0;
     uint16_t len = strlen(name) + 1;
 
-    if ( strtab->current > UINT16_MAX - len )
+    if ( strtab->length > UINT16_MAX - len )
     {
         PERROR("too long string table");
         errno = E2BIG;
         return ret;
     }
     
-    if ( strtab->current + len > strtab->max )
+    if ( strtab->length + len > strtab->max )
     {
         char *tmp;
         if ( strtab->max > UINT16_MAX / 2 )
@@ -135,9 +135,9 @@ xc_core_strtab_get(struct xc_core_strtab
         strtab->max *= 2;
     }
 
-    ret = strtab->current;
-    strcpy(strtab->strings + strtab->current, name);
-    strtab->current += len;
+    ret = strtab->length;
+    strcpy(strtab->strings + strtab->length, name);
+    strtab->length += len;
     return ret;
 }
 
@@ -669,7 +669,7 @@ xc_domain_dumpcore_via_callback(int xc_h
     offset += filesz;
 
     /* fixing up section header string table section header */
-    filesz = strtab->current;
+    filesz = strtab->length;
     sheaders->shdrs[strtab_idx].sh_offset = offset;
     sheaders->shdrs[strtab_idx].sh_size = filesz;
 
@@ -829,7 +829,7 @@ copy_done:
         goto out;
 
     /* elf section header string table: .shstrtab */
-    sts = dump_rtn(args, strtab->strings, strtab->current);
+    sts = dump_rtn(args, strtab->strings, strtab->length);
     if ( sts != 0 )
         goto out;
 
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xc_domain.c   Mon Jun 02 11:35:39 2008 +0900
@@ -767,6 +767,37 @@ int xc_assign_device(
     return do_domctl(xc_handle, &domctl);
 }
 
+int xc_get_device_group(
+    int xc_handle,
+    uint32_t domid,
+    uint32_t machine_bdf,
+    uint32_t max_sdevs,
+    uint32_t *num_sdevs,
+    uint32_t *sdev_array)
+{
+    int rc;
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_get_device_group;
+    domctl.domain = (domid_t)domid;
+
+    domctl.u.get_device_group.machine_bdf = machine_bdf;
+    domctl.u.get_device_group.max_sdevs = max_sdevs;
+
+    set_xen_guest_handle(domctl.u.get_device_group.sdev_array, sdev_array);
+
+    if ( lock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)) != 0 )
+    {
+        PERROR("Could not lock memory for xc_get_device_group\n");
+        return -ENOMEM;
+    }
+    rc = do_domctl(xc_handle, &domctl);
+    unlock_pages(sdev_array, max_sdevs * sizeof(*sdev_array));
+
+    *num_sdevs = domctl.u.get_device_group.num_sdevs;
+    return rc;
+}
+
 int xc_test_assign_device(
     int xc_handle,
     uint32_t domid,
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xc_minios.c
--- a/tools/libxc/xc_minios.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xc_minios.c   Mon Jun 02 11:35:39 2008 +0900
@@ -35,11 +35,12 @@ extern struct wait_queue_head event_queu
 
 int xc_interface_open(void)
 {
-    return 0;
+    return alloc_fd(FTYPE_XC);
 }
 
 int xc_interface_close(int xc_handle)
 {
+    files[xc_handle].type = FTYPE_NONE;
     return 0;
 }
 
@@ -79,8 +80,12 @@ int xc_map_foreign_ranges(int xc_handle,
 int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
                           privcmd_mmap_entry_t *entries, int nr)
 {
-    printf("xc_map_foreign_ranges, TODO\n");
-    do_exit();
+    int i;
+    for (i = 0; i < nr; i++) {
+       unsigned long mfn = entries[i].mfn;
+        do_map_frames(entries[i].va, &mfn, entries[i].npages, 0, 1, dom, 0, 
L1_PROT);
+    }
+    return 0;
 }
 
 int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall)
@@ -294,6 +299,12 @@ int xc_evtchn_unmask(int xce_handle, evt
     return 0;
 }
 
+/* Optionally flush file to disk and discard page cache */
+void discard_file_cache(int fd, int flush)
+{
+    if (flush)
+        fsync(fd);
+}
 /*
  * Local variables:
  * mode: C
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xenctrl.h     Mon Jun 02 11:35:39 2008 +0900
@@ -955,6 +955,13 @@ int xc_assign_device(int xc_handle,
                      uint32_t domid,
                      uint32_t machine_bdf);
 
+int xc_get_device_group(int xc_handle,
+                     uint32_t domid,
+                     uint32_t machine_bdf,
+                     uint32_t max_sdevs,
+                     uint32_t *num_sdevs,
+                     uint32_t *sdev_array);
+
 int xc_test_assign_device(int xc_handle,
                           uint32_t domid,
                           uint32_t machine_bdf);
diff -r d2a239224cb2 -r f1508348ffab tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/libxc/xg_private.c  Mon Jun 02 11:35:39 2008 +0900
@@ -11,22 +11,6 @@
 #include <malloc.h>
 
 #include "xg_private.h"
-
-int lock_pages(void *addr, size_t len)
-{
-    int e = 0;
-#ifndef __sun__
-    e = mlock(addr, len);
-#endif
-    return (e);
-}
-
-void unlock_pages(void *addr, size_t len)
-{
-#ifndef __sun__
-    safe_munlock(addr, len);
-#endif
-}
 
 char *xc_read_image(const char *filename, unsigned long *size)
 {
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jun 02 11:35:39 2008 +0900
@@ -106,7 +106,7 @@ static PyObject *pyxc_domain_create(XcOb
     static char *kwd_list[] = { "domid", "ssidref", "handle", "flags", 
"target", NULL };
 
     if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|iiOii", kwd_list,
-                                     &dom, &ssidref, &pyhandle, &flags, 
&target))
+                                      &dom, &ssidref, &pyhandle, &flags, 
&target))
         return NULL;
     if ( pyhandle != NULL )
     {
@@ -434,44 +434,44 @@ static PyObject *pyxc_linux_build(XcObje
     dom->vhpt_size_log2 = vhpt;
 
     if ( xc_dom_linux_build(self->xc_handle, dom, domid, mem_mb, image,
-                           ramdisk, flags, store_evtchn, &store_mfn,
-                           console_evtchn, &console_mfn) != 0 ) {
-       goto out;
+                            ramdisk, flags, store_evtchn, &store_mfn,
+                            console_evtchn, &console_mfn) != 0 ) {
+        goto out;
     }
 
     if ( !(elfnote_dict = PyDict_New()) )
-       goto out;
+        goto out;
     
     for ( i = 0; i < ARRAY_SIZE(dom->parms.elf_notes); i++ )
     {
-       switch ( dom->parms.elf_notes[i].type )
+        switch ( dom->parms.elf_notes[i].type )
         {
-       case XEN_ENT_NONE:
-           continue;
-       case XEN_ENT_LONG:
-           elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num);
-           break;
-       case XEN_ENT_STR:
-           elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str);
-           break;
-       }
-       PyDict_SetItemString(elfnote_dict,
-                            dom->parms.elf_notes[i].name,
-                            elfnote);
-       Py_DECREF(elfnote);
+        case XEN_ENT_NONE:
+            continue;
+        case XEN_ENT_LONG:
+            elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num);
+            break;
+        case XEN_ENT_STR:
+            elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str);
+            break;
+        }
+        PyDict_SetItemString(elfnote_dict,
+                             dom->parms.elf_notes[i].name,
+                             elfnote);
+        Py_DECREF(elfnote);
     }
 
     ret = Py_BuildValue("{s:i,s:i,s:N}",
-                       "store_mfn", store_mfn,
-                       "console_mfn", console_mfn,
-                       "notes", elfnote_dict);
+                        "store_mfn", store_mfn,
+                        "console_mfn", console_mfn,
+                        "notes", elfnote_dict);
 
     if ( dom->arch_hooks->native_protocol )
     {
-       PyObject *native_protocol =
-           Py_BuildValue("s", dom->arch_hooks->native_protocol);
-       PyDict_SetItemString(ret, "native_protocol", native_protocol);
-       Py_DECREF(native_protocol);
+        PyObject *native_protocol =
+            Py_BuildValue("s", dom->arch_hooks->native_protocol);
+        PyDict_SetItemString(ret, "native_protocol", native_protocol);
+        Py_DECREF(native_protocol);
     }
 
     xc_dom_release(dom);
@@ -556,7 +556,7 @@ static PyObject *pyxc_test_assign_device
 {
     uint32_t dom;
     char *pci_str;
-    uint32_t bdf = 0;
+    int32_t bdf = 0;
     int seg, bus, dev, func;
 
     static char *kwd_list[] = { "domid", "pci", NULL };
@@ -571,12 +571,141 @@ static PyObject *pyxc_test_assign_device
         bdf |= (func & 0x7) << 8;
 
         if ( xc_test_assign_device(self->xc_handle, dom, bdf) != 0 )
+        {
+            if (errno == ENOSYS)
+                bdf = -1;
             break;
-
+        }
         bdf = 0;
     }
 
     return Py_BuildValue("i", bdf);
+}
+
+static PyObject *pyxc_assign_device(XcObject *self,
+                                    PyObject *args,
+                                    PyObject *kwds)
+{
+    uint32_t dom;
+    char *pci_str;
+    int32_t bdf = 0;
+    int seg, bus, dev, func;
+
+    static char *kwd_list[] = { "domid", "pci", NULL };
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list,
+                                      &dom, &pci_str) )
+        return NULL;
+
+    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
+    {
+        bdf |= (bus & 0xff) << 16;
+        bdf |= (dev & 0x1f) << 11;
+        bdf |= (func & 0x7) << 8;
+
+        if ( xc_assign_device(self->xc_handle, dom, bdf) != 0 )
+        {
+            if (errno == ENOSYS)
+                bdf = -1;
+            break;
+        }
+        bdf = 0;
+    }
+
+    return Py_BuildValue("i", bdf);
+}
+
+static PyObject *pyxc_deassign_device(XcObject *self,
+                                      PyObject *args,
+                                      PyObject *kwds)
+{
+    uint32_t dom;
+    char *pci_str;
+    int32_t bdf = 0;
+    int seg, bus, dev, func;
+
+    static char *kwd_list[] = { "domid", "pci", NULL };
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list,
+                                      &dom, &pci_str) )
+        return NULL;
+
+    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
+    {
+        bdf |= (bus & 0xff) << 16;
+        bdf |= (dev & 0x1f) << 11;
+        bdf |= (func & 0x7) << 8;
+
+        if ( xc_deassign_device(self->xc_handle, dom, bdf) != 0 )
+        {
+            if (errno == ENOSYS)
+                bdf = -1;
+            break;
+        }
+        bdf = 0;
+    }
+
+    return Py_BuildValue("i", bdf);
+}
+
+static PyObject *pyxc_get_device_group(XcObject *self,
+                                         PyObject *args)
+{
+    domid_t domid;
+    uint32_t bdf = 0;
+    uint32_t max_sdevs, num_sdevs;
+    int seg, bus, dev, func, rc, i;
+    PyObject *Pystr;
+    char *group_str;
+    char dev_str[9];
+    uint32_t *sdev_array;
+
+    if ( !PyArg_ParseTuple(args, "iiiii", &domid, &seg, &bus, &dev, &func) )
+        return NULL;
+
+    /* Maximum allowed siblings device number per group */
+    max_sdevs = 1024;
+
+    if ( (sdev_array = malloc(max_sdevs * sizeof(*sdev_array))) == NULL )
+        return PyErr_NoMemory();
+    memset(sdev_array, 0, max_sdevs * sizeof(*sdev_array));
+
+    bdf |= (bus & 0xff) << 16;
+    bdf |= (dev & 0x1f) << 11;
+    bdf |= (func & 0x7) << 8;
+
+    rc = xc_get_device_group(self->xc_handle,
+        domid, bdf, max_sdevs, &num_sdevs, sdev_array);
+
+    if ( rc < 0 )
+    {
+      free(sdev_array); 
+      return pyxc_error_to_exception();
+    }
+
+    if ( !num_sdevs )
+    {
+       free(sdev_array);
+       return Py_BuildValue("s", "");
+    }
+
+    if ( (group_str = malloc(num_sdevs * sizeof(dev_str))) == NULL )
+        return PyErr_NoMemory();
+    memset(group_str, '\0', num_sdevs * sizeof(dev_str));
+
+    for ( i = 0; i < num_sdevs; i++ )
+    {
+        bus = (sdev_array[i] >> 16) & 0xff;
+        dev = (sdev_array[i] >> 11) & 0x1f;
+        func = (sdev_array[i] >> 8) & 0x7;
+        sprintf(dev_str, "%02x:%02x.%x,", bus, dev, func);
+        strcat(group_str, dev_str);
+    }
+
+    Pystr = Py_BuildValue("s", group_str);
+
+    free(sdev_array);
+    free(group_str);
+
+    return Pystr;
 }
 
 #ifdef __ia64__
@@ -729,8 +858,8 @@ static PyObject *pyxc_hvm_build(XcObject
     int memsize, vcpus = 1, acpi = 0, apic = 1;
 
     static char *kwd_list[] = { "domid",
-                               "memsize", "image", "vcpus", "acpi",
-                               "apic", NULL };
+                                "memsize", "image", "vcpus", "acpi",
+                                "apic", NULL };
     if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
                                       &dom, &memsize,
                                       &image, &vcpus, &acpi, &apic) )
@@ -782,8 +911,8 @@ static PyObject *pyxc_evtchn_alloc_unbou
 }
 
 static PyObject *pyxc_evtchn_reset(XcObject *self,
-                                  PyObject *args,
-                                  PyObject *kwds)
+                                   PyObject *args,
+                                   PyObject *kwds)
 {
     uint32_t dom;
 
@@ -947,11 +1076,11 @@ static PyObject *pyxc_physinfo(XcObject 
 
     for ( i = 0; i < info.nr_nodes; i++ )
     {
-       xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
-       PyList_Append(node_to_memory_obj,
-           PyInt_FromLong(free_heap / 1024));
-    }
-       
+        xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
+        PyList_Append(node_to_memory_obj,
+                      PyInt_FromLong(free_heap / 1024));
+    }
+
     PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
     PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
  
@@ -1517,6 +1646,17 @@ static PyMethodDef pyxc_methods[] = {
       " value   [long]:     Value of param.\n"
       "Returns: [int] 0 on success.\n" },
 
+    { "get_device_group",
+      (PyCFunction)pyxc_get_device_group,
+      METH_VARARGS, "\n"
+      "get sibling devices infomation.\n"
+      " dom     [int]:      Domain to assign device to.\n"
+      " seg     [int]:      PCI segment.\n"
+      " bus     [int]:      PCI bus.\n"
+      " dev     [int]:      PCI dev.\n"
+      " func    [int]:      PCI func.\n"
+      "Returns: [string]:   Sibling devices \n" },
+
      { "test_assign_device",
        (PyCFunction)pyxc_test_assign_device,
        METH_VARARGS | METH_KEYWORDS, "\n"
@@ -1524,6 +1664,22 @@ static PyMethodDef pyxc_methods[] = {
        " dom     [int]:      Identifier of domain to build into.\n"
        " pci_str [str]:      PCI devices.\n"
        "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" 
},
+
+     { "assign_device",
+       (PyCFunction)pyxc_assign_device,
+       METH_VARARGS | METH_KEYWORDS, "\n"
+       "Assign device to IOMMU domain.\n"
+       " dom     [int]:      Domain to assign device to.\n"
+       " pci_str [str]:      PCI devices.\n"
+       "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" 
},
+
+     { "deassign_device",
+       (PyCFunction)pyxc_deassign_device,
+       METH_VARARGS | METH_KEYWORDS, "\n"
+       "Deassign device from IOMMU domain.\n"
+       " dom     [int]:      Domain to deassign device from.\n"
+       " pci_str [str]:      PCI devices.\n"
+       "Returns: [int] 0 on success, or device bdf that can't be 
deassigned.\n" },
   
     { "sched_id_get",
       (PyCFunction)pyxc_sched_id_get,
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py  Mon Jun 02 11:35:39 2008 +0900
@@ -1759,12 +1759,12 @@ class XendAPI(object):
         xendom = XendDomain.instance()
         xeninfo = xendom.get_vm_by_uuid(vm_ref)
 
-        resource = other_config.get("resource", 0)
         port = other_config.get("port", 0)
-        node = other_config.get("node", 0)
+        node = other_config.get("node", -1)
+        ssl = other_config.get("ssl", None)
         
         xendom.domain_migrate(xeninfo.getDomid(), destination_url,
-                              bool(live), resource, port, node)
+                              bool(live), port, node, ssl)
         return xen_api_success_void()
 
     def VM_save(self, _, vm_ref, dest, checkpoint):
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendDomain.py       Mon Jun 02 11:35:39 2008 +0900
@@ -43,8 +43,8 @@ from xen.xend.XendConstants import DOM_S
 from xen.xend.XendConstants import DOM_STATE_HALTED, DOM_STATE_PAUSED
 from xen.xend.XendConstants import DOM_STATE_RUNNING, DOM_STATE_SUSPENDED
 from xen.xend.XendConstants import DOM_STATE_SHUTDOWN, DOM_STATE_UNKNOWN
-from xen.xend.XendConstants import DOM_STATE_CRASHED
-from xen.xend.XendConstants import TRIGGER_TYPE
+from xen.xend.XendConstants import DOM_STATE_CRASHED, HVM_PARAM_ACPI_S_STATE
+from xen.xend.XendConstants import TRIGGER_TYPE, TRIGGER_S3RESUME
 from xen.xend.XendDevices import XendDevices
 from xen.xend.XendAPIConstants import *
 
@@ -1258,22 +1258,24 @@ class XendDomain:
 
         return val       
 
-    def domain_migrate(self, domid, dst, live=False, port=0, node=-1):
+    def domain_migrate(self, domid, dst, live=False, port=0, node=-1, 
ssl=None):
         """Start domain migration.
         
         @param domid: Domain ID or Name
         @type domid: int or string.
         @param dst: Destination IP address
         @type dst: string
-        @keyword port: relocation port on destination
-        @type port: int        
         @keyword live: Live migration
         @type live: bool
+        @keyword port: relocation port on destination
+        @type port: int
+        @keyword node: use node number for target
+        @type node: int
+        @keyword ssl: use ssl connection
+        @type ssl: bool
         @rtype: None
-        @keyword node: use node number for target
-        @rtype: int 
         @raise XendError: Failed to migrate
-        @raise XendInvalidDomain: Domain is not valid        
+        @raise XendInvalidDomain: Domain is not valid
         """
 
         dominfo = self.domain_lookup_nr(domid)
@@ -1294,13 +1296,14 @@ class XendDomain:
             """ Make sure there's memory free for enabling shadow mode """
             dominfo.checkLiveMigrateMemory()
 
-        if port == 0:
-            port = xoptions.get_xend_relocation_port()
-
-        tls = xoptions.get_xend_relocation_tls()
-        if tls:
+        if ssl is None:
+            ssl = xoptions.get_xend_relocation_ssl()
+
+        if ssl:
             from OpenSSL import SSL
             from xen.web import connection
+            if port == 0:
+                port = xoptions.get_xend_relocation_ssl_port()
             try:
                 ctx = SSL.Context(SSL.SSLv23_METHOD)
                 sock = SSL.Connection(ctx,
@@ -1328,6 +1331,8 @@ class XendDomain:
             os.close(p2cread)
             os.close(p2cwrite)
         else:
+            if port == 0:
+                port = xoptions.get_xend_relocation_port()
             try:
                 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                 # When connecting to our ssl enabled relocation server using a
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Mon Jun 02 11:35:39 2008 +0900
@@ -2091,28 +2091,28 @@ class XendDomainInfo:
                         xc.vcpu_setaffinity(self.domid, v, 
self.info['cpus'][v])
             else:
                 def find_relaxed_node(node_list):
-                    import sys 
+                    import sys
+                    nr_nodes = info['nr_nodes']
                     if node_list is None:
-                        node_list = range(0, info['nr_nodes'])
+                        node_list = range(0, nr_nodes)
                     nodeload = [0]
-                    nodeload = nodeload * info['nr_nodes']
+                    nodeload = nodeload * nr_nodes
                     from xen.xend import XendDomain
                     doms = XendDomain.instance().list('all')
-                    for dom in doms:
+                    for dom in filter (lambda d: d.domid != self.domid, doms):
                         cpuinfo = dom.getVCPUInfo()
                         for vcpu in sxp.children(cpuinfo, 'vcpu'):
-                            def vinfo(n, t):
-                                return t(sxp.child_value(vcpu, n))
-                            cpumap = vinfo('cpumap', list)
-                            for i in node_list:
+                            if sxp.child_value(vcpu, 'online') == 0: continue
+                            cpumap = list(sxp.child_value(vcpu,'cpumap'))
+                            for i in range(0, nr_nodes):
                                 node_cpumask = info['node_to_cpu'][i]
                                 for j in node_cpumask:
                                     if j in cpumap:
                                         nodeload[i] += 1
                                         break
-                    for i in node_list:
-                        if len(info['node_to_cpu'][i]) > 0:
-                            nodeload[i] = int(nodeload[i] / 
len(info['node_to_cpu'][i]))
+                    for i in range(0, nr_nodes):
+                        if len(info['node_to_cpu'][i]) > 0 and i in node_list:
+                            nodeload[i] = int(nodeload[i] * 16 / 
len(info['node_to_cpu'][i]))
                         else:
                             nodeload[i] = sys.maxint
                     index = nodeload.index( min(nodeload) )    
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/XendOptions.py
--- a/tools/python/xen/xend/XendOptions.py      Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/XendOptions.py      Mon Jun 02 11:35:39 2008 +0900
@@ -72,6 +72,9 @@ class XendOptions:
     """Default for the flag indicating whether xend should run a relocation 
server."""
     xend_relocation_server_default = 'no'
 
+    """Default for the flag indicating whether xend should run a ssl 
relocation server."""
+    xend_relocation_ssl_server_default = 'no'
+
     """Default interface address the xend relocation server listens at. """
     xend_relocation_address_default = ''
 
@@ -80,6 +83,9 @@ class XendOptions:
 
     """Default port xend serves relocation at. """
     xend_relocation_port_default = 8002
+
+    """Default port xend serves ssl relocation at. """
+    xend_relocation_ssl_port_default = 8003
 
     xend_relocation_hosts_allow_default = ''
 
@@ -192,6 +198,12 @@ class XendOptions:
         return self.get_config_bool("xend-relocation-server",
                                     self.xend_relocation_server_default)
 
+    def get_xend_relocation_ssl_server(self):
+        """Get the flag indicating whether xend should run a ssl relocation 
server.
+        """
+        return self.get_config_bool("xend-relocation-ssl-server",
+                                    self.xend_relocation_ssl_server_default)
+
     def get_xend_relocation_server_ssl_key_file(self):
         return self.get_config_string("xend-relocation-server-ssl-key-file")
 
@@ -209,10 +221,17 @@ class XendOptions:
         return self.get_config_int('xend-relocation-port',
                                    self.xend_relocation_port_default)
 
-    def get_xend_relocation_tls(self):
-        """Whether to use tls when relocating.
-        """
-        return self.get_config_bool('xend-relocation-tls', 'no')
+    def get_xend_relocation_ssl_port(self):
+        """Get the port xend listens at for ssl connection to its relocation
+        server.
+        """
+        return self.get_config_int('xend-relocation-ssl-port',
+                                   self.xend_relocation_ssl_port_default)
+
+    def get_xend_relocation_ssl(self):
+        """Whether to use ssl when relocating.
+        """
+        return self.get_config_bool('xend-relocation-ssl', 'no')
 
     def get_xend_relocation_hosts_allow(self):
         return self.get_config_string("xend-relocation-hosts-allow",
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/server/SrvDomain.py Mon Jun 02 11:35:39 2008 +0900
@@ -115,7 +115,9 @@ class SrvDomain(SrvDir):
                     [['dom',         'int'],
                      ['destination', 'str'],
                      ['live',        'int'],
-                     ['port',        'int']])
+                     ['port',        'int'],
+                     ['node',        'int'],
+                     ['ssl',         'int']])
         return fn(req.args, {'dom': self.dom.domid})
 
     def op_pincpu(self, _, req):
@@ -215,6 +217,11 @@ class SrvDomain(SrvDir):
 
     def op_vcpuinfo(self, _1, req):
         return self.call(self.dom.getVCPUInfo, [], req)
+
+
+    def op_reset(self, _, req):
+        self.acceptCommand(req)
+        return self.xd.domain_reset(self.dom.getName())
 
 
     def render_POST(self, req):
@@ -257,6 +264,10 @@ class SrvDomain(SrvDir):
         req.write('</form>')
 
         req.write('<form method="post" action="%s">' % url)
+        req.write('<input type="submit" name="op" value="reset">')
+        req.write('</form>')
+
+        req.write('<form method="post" action="%s">' % url)
         req.write('<input type="submit" name="op" value="shutdown">')
         req.write('<input type="radio" name="reason" value="poweroff" 
checked>Poweroff')
         req.write('<input type="radio" name="reason" value="halt">Halt')
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/server/pciif.py     Mon Jun 02 11:35:39 2008 +0900
@@ -226,6 +226,39 @@ class PciController(DevController):
 
         return sxpr    
 
+    def CheckSiblingDevices(self, domid, dev):
+        """ Check if all sibling devices of dev are owned by pciback
+        """
+        if not self.vm.info.is_hvm():
+            return
+
+        group_str = xc.get_device_group(domid, dev.domain, dev.bus, dev.slot, 
dev.func)
+        if group_str == "":
+            return
+
+        #group string format xx:xx.x,xx:xx.x,
+        devstr_len = group_str.find(',')
+        for i in range(0, len(group_str), devstr_len + 1):
+            (bus, slotfunc) = group_str[i:i + devstr_len].split(':')
+            (slot, func) = slotfunc.split('.')
+            b = parse_hex(bus)
+            d = parse_hex(slot)
+            f = parse_hex(func)
+            try:
+                sdev = PciDevice(dev.domain, b, d, f)
+            except Exception, e:
+                #no dom0 drivers bound to sdev
+                continue
+
+            if sdev.driver!='pciback':
+                raise VmError(("pci: PCI Backend does not own\n "+ \
+                    "sibling device %s of device %s\n"+ \
+                    "See the pciback.hide kernel "+ \
+                    "command-line parameter or\n"+ \
+                    "bind your slot/device to the PCI backend using sysfs" \
+                    )%(sdev.name, dev.name))
+        return
+
     def setupOneDevice(self, domain, bus, slot, func):
         """ Attach I/O resources for device to frontend domain
         """
@@ -245,8 +278,19 @@ class PciController(DevController):
                     "bind your slot/device to the PCI backend using sysfs" \
                     )%(dev.name))
 
+        self.CheckSiblingDevices(fe_domid, dev)
+
         PCIQuirk(dev.vendor, dev.device, dev.subvendor, dev.subdevice, domain, 
                 bus, slot, func)
+
+        if not self.vm.info.is_hvm():
+            # Setup IOMMU device assignment
+            pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func)
+            bdf = xc.assign_device(fe_domid, pci_str)
+            if bdf > 0:
+                raise VmError("Failed to assign device to IOMMU (%x:%x.%x)"
+                              % (bus, slot, func))
+            log.debug("pci: assign device %x:%x.%x" % (bus, slot, func))
 
         for (start, size) in dev.ioports:
             log.debug('pci: enabling ioport 0x%x/0x%x'%(start,size))
@@ -329,6 +373,14 @@ class PciController(DevController):
                     "command-line parameter or\n"+ \
                     "bind your slot/device to the PCI backend using sysfs" \
                     )%(dev.name))
+
+        if not self.vm.info.is_hvm():
+            pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func)
+            bdf = xc.deassign_device(fe_domid, pci_str)
+            if bdf > 0:
+                raise VmError("Failed to deassign device from IOMMU (%x:%x.%x)"
+                              % (bus, slot, func))
+            log.debug("pci: deassign device %x:%x.%x" % (bus, slot, func))
 
         for (start, size) in dev.ioports:
             log.debug('pci: disabling ioport 0x%x/0x%x'%(start,size))
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xend/server/relocate.py  Mon Jun 02 11:35:39 2008 +0900
@@ -142,16 +142,22 @@ def listenRelocation():
     if xoptions.get_xend_unix_server():
         path = '/var/lib/xend/relocation-socket'
         unix.UnixListener(path, RelocationProtocol)
+
+    interface = xoptions.get_xend_relocation_address()
+
+    hosts_allow = xoptions.get_xend_relocation_hosts_allow()
+    if hosts_allow == '':
+        hosts_allow = None
+    else:
+        hosts_allow = map(re.compile, hosts_allow.split(" "))
+
     if xoptions.get_xend_relocation_server():
         port = xoptions.get_xend_relocation_port()
-        interface = xoptions.get_xend_relocation_address()
+        tcp.TCPListener(RelocationProtocol, port, interface = interface,
+                        hosts_allow = hosts_allow)
 
-        hosts_allow = xoptions.get_xend_relocation_hosts_allow()
-        if hosts_allow == '':
-            hosts_allow = None
-        else:
-            hosts_allow = map(re.compile, hosts_allow.split(" "))
-
+    if xoptions.get_xend_relocation_ssl_server():
+        port = xoptions.get_xend_relocation_ssl_port()
         ssl_key_file = xoptions.get_xend_relocation_server_ssl_key_file()
         ssl_cert_file = xoptions.get_xend_relocation_server_ssl_cert_file()
 
@@ -161,5 +167,5 @@ def listenRelocation():
                                ssl_key_file = ssl_key_file,
                                ssl_cert_file = ssl_cert_file)
         else:
-            tcp.TCPListener(RelocationProtocol, port, interface = interface,
-                            hosts_allow = hosts_allow)
+            raise XendError("ssl_key_file or ssl_cert_file for ssl relocation 
server is missing.")
+
diff -r d2a239224cb2 -r f1508348ffab tools/python/xen/xm/migrate.py
--- a/tools/python/xen/xm/migrate.py    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/python/xen/xm/migrate.py    Mon Jun 02 11:35:39 2008 +0900
@@ -47,6 +47,10 @@ gopts.opt('node', short='n', val='nodenu
           fn=set_int, default=-1,
           use="Use specified NUMA node on target.")
 
+gopts.opt('ssl', short='s',
+          fn=set_true, default=None,
+          use="Use ssl connection for migration.")
+
 def help():
     return str(gopts)
     
@@ -65,11 +69,13 @@ def main(argv):
         vm_ref = get_single_vm(dom)
         other_config = {
             "port":     opts.vals.port,
-            "node":     opts.vals.node
+            "node":     opts.vals.node,
+            "ssl":      opts.vals.ssl
             }
         server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live),
                                  other_config)
     else:
         server.xend.domain.migrate(dom, dst, opts.vals.live,
                                    opts.vals.port,
-                                   opts.vals.node)
+                                   opts.vals.node,
+                                   opts.vals.ssl)
diff -r d2a239224cb2 -r f1508348ffab tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Mon Jun 02 11:35:39 2008 +0900
@@ -655,12 +655,20 @@ unsigned long long xenstat_vbd_wr_reqs(x
 
 static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int 
domain_id)
 {
-       char path[80];
-
-       snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id);
-       
+       char path[80], *vmpath;
+
+       snprintf(path, sizeof(path),"/local/domain/%i/vm", domain_id);
+
+       vmpath = xs_read(handle->xshandle, XBT_NULL, path, NULL);
+
+       if (vmpath == NULL)
+               return NULL;
+
+       snprintf(path, sizeof(path),"%s/name", vmpath);
+       free(vmpath);
+
        return xs_read(handle->xshandle, XBT_NULL, path, NULL);
-}      
+}
 
 /* Remove specified entry from list of domains */
 static void xenstat_prune_domain(xenstat_node *node, unsigned int entry)
diff -r d2a239224cb2 -r f1508348ffab 
unmodified_drivers/linux-2.6/platform-pci/evtchn.c
--- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c        Mon Jun 02 
11:35:02 2008 +0900
+++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c        Mon Jun 02 
11:35:39 2008 +0900
@@ -284,7 +284,7 @@ static irqreturn_t evtchn_interrupt(int 
 
 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
        /* Clear master flag /before/ clearing selector flag. */
-       rmb();
+       wmb();
 #endif
        l1 = xchg(&v->evtchn_pending_sel, 0);
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Mon Jun 02 11:35:39 2008 +0900
@@ -173,6 +173,8 @@ static inline u32 ticks_elapsed(u32 t1, 
 {
     if ( t2 >= t1 )
         return (t2 - t1);
+    else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) )
+        return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
     else
         return ((0xFFFFFFFF - t1) + t2);
 }
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/acpi/power.c Mon Jun 02 11:35:39 2008 +0900
@@ -238,9 +238,17 @@ static void tboot_sleep(u8 sleep_state)
 static void tboot_sleep(u8 sleep_state)
 {
    uint32_t shutdown_type;
-   
-   *((struct acpi_sleep_info *)(unsigned long)g_tboot_shared->acpi_sinfo) =
-       acpi_sinfo;
+
+   g_tboot_shared->acpi_sinfo.pm1a_cnt =
+                           (uint16_t)acpi_sinfo.pm1a_cnt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1b_cnt =
+                           (uint16_t)acpi_sinfo.pm1b_cnt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1a_evt =
+                           (uint16_t)acpi_sinfo.pm1a_evt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1b_evt =
+                           (uint16_t)acpi_sinfo.pm1b_evt_blk.address;
+   g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
+   g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
 
    switch ( sleep_state )
    {
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/cpu/amd.c    Mon Jun 02 11:35:39 2008 +0900
@@ -74,9 +74,11 @@ static void disable_c1_ramping(void)
 static void disable_c1_ramping(void) 
 {
        u8 pmm7;
-       int node;
-
-       for (node=0; node < NR_CPUS; node++) {
+       int node, nr_nodes;
+
+       /* Read the number of nodes from the first Northbridge. */
+       nr_nodes = ((pci_conf_read32(0, 0x18, 0x0, 0x60)>>4)&0x07)+1;
+       for (node = 0; node < nr_nodes; node++) {
                /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
                pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87);
                /* Invalid read means we've updated every Northbridge. */
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/crash.c
--- a/xen/arch/x86/crash.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/crash.c      Mon Jun 02 11:35:39 2008 +0900
@@ -102,6 +102,7 @@ void machine_crash_shutdown(void)
     hvm_cpu_down();
 
     info = kexec_crash_save_info();
+    info->xen_phys_start = xen_phys_start;
     info->dom0_pfn_to_mfn_frame_list_list =
         arch_get_pfn_to_mfn_frame_list_list(dom0);
 }
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/domain.c     Mon Jun 02 11:35:39 2008 +0900
@@ -59,8 +59,6 @@ static void default_idle(void);
 static void default_idle(void);
 void (*pm_idle) (void) = default_idle;
 
-static void unmap_vcpu_info(struct vcpu *v);
-
 static void paravirt_ctxt_switch_from(struct vcpu *v);
 static void paravirt_ctxt_switch_to(struct vcpu *v);
 
@@ -432,8 +430,6 @@ void vcpu_destroy(struct vcpu *v)
 {
     if ( is_pv_32on64_vcpu(v) )
         release_compat_l4(v);
-
-    unmap_vcpu_info(v);
 
     if ( is_hvm_vcpu(v) )
         hvm_vcpu_destroy(v);
@@ -825,8 +821,15 @@ int arch_set_info_guest(
 
 void arch_vcpu_reset(struct vcpu *v)
 {
-    destroy_gdt(v);
-    vcpu_destroy_pagetables(v);
+    if ( !is_hvm_vcpu(v) )
+    {
+        destroy_gdt(v);
+        vcpu_destroy_pagetables(v);
+    }
+    else
+    {
+        vcpu_end_shutdown_deferral(v);
+    }
 }
 
 /* 
@@ -1857,16 +1860,19 @@ int domain_relinquish_resources(struct d
         /* Tear down paging-assistance stuff. */
         paging_teardown(d);
 
-        /* Drop the in-use references to page-table bases. */
         for_each_vcpu ( d, v )
+        {
+            /* Drop the in-use references to page-table bases. */
             vcpu_destroy_pagetables(v);
 
-        /*
-         * Relinquish GDT mappings. No need for explicit unmapping of the LDT
-         * as it automatically gets squashed when the guest's mappings go away.
-         */
-        for_each_vcpu(d, v)
+            /*
+             * Relinquish GDT mappings. No need for explicit unmapping of the
+             * LDT as it automatically gets squashed with the guest mappings.
+             */
             destroy_gdt(v);
+
+            unmap_vcpu_info(v);
+        }
 
         d->arch.relmem = RELMEM_xen_l4;
         /* fallthrough */
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/domctl.c     Mon Jun 02 11:35:39 2008 +0900
@@ -526,14 +526,54 @@ long arch_do_domctl(
     }
     break;
 
+    case XEN_DOMCTL_get_device_group:
+    {
+        struct domain *d;
+        u32 max_sdevs;
+        u8 bus, devfn;
+        XEN_GUEST_HANDLE_64(uint32) sdevs;
+        int num_sdevs;
+
+        ret = -ENOSYS;
+        if ( !iommu_enabled )
+            break;
+
+        ret = -EINVAL;
+        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
+            break;
+
+        bus = (domctl->u.get_device_group.machine_bdf >> 16) & 0xff;
+        devfn = (domctl->u.get_device_group.machine_bdf >> 8) & 0xff;
+        max_sdevs = domctl->u.get_device_group.max_sdevs;
+        sdevs = domctl->u.get_device_group.sdev_array;
+
+        num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs);
+        if ( num_sdevs < 0 )
+        {
+            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
+            ret = -EFAULT;
+            domctl->u.get_device_group.num_sdevs = 0;
+        }
+        else
+        {
+            ret = 0;
+            domctl->u.get_device_group.num_sdevs = num_sdevs;
+        }
+        if ( copy_to_guest(u_domctl, domctl, 1) )
+            ret = -EFAULT;
+        rcu_unlock_domain(d);
+    }
+    break;
+
     case XEN_DOMCTL_test_assign_device:
     {
         u8 bus, devfn;
 
-        ret = -EINVAL;
+        ret = -ENOSYS;
         if ( !iommu_enabled )
             break;
 
+        ret = -EINVAL;
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
 
@@ -553,10 +593,11 @@ long arch_do_domctl(
         struct domain *d;
         u8 bus, devfn;
 
-        ret = -EINVAL;
+        ret = -ENOSYS;
         if ( !iommu_enabled )
             break;
 
+        ret = -EINVAL;
         if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) )
         {
             gdprintk(XENLOG_ERR,
@@ -565,6 +606,12 @@ long arch_do_domctl(
         }
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
+
+        if ( !iommu_pv_enabled && !is_hvm_domain(d) )
+        {
+            ret = -ENOSYS;
+            break;
+        }
 
         if ( device_assigned(bus, devfn) )
         {
@@ -576,7 +623,7 @@ long arch_do_domctl(
 
         ret = assign_device(d, bus, devfn);
         gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
-            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         put_domain(d);
     }
     break;
@@ -586,10 +633,11 @@ long arch_do_domctl(
         struct domain *d;
         u8 bus, devfn;
 
-        ret = -EINVAL;
+        ret = -ENOSYS;
         if ( !iommu_enabled )
             break;
 
+        ret = -EINVAL;
         if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) )
         {
             gdprintk(XENLOG_ERR,
@@ -599,9 +647,16 @@ long arch_do_domctl(
         bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
         devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
 
+        if ( !iommu_pv_enabled && !is_hvm_domain(d) )
+        {
+            ret = -ENOSYS;
+            break;
+        }
+
         if ( !device_assigned(bus, devfn) )
             break;
 
+        ret = 0;
         deassign_device(d, bus, devfn);
         gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/hpet.c   Mon Jun 02 11:35:39 2008 +0900
@@ -29,9 +29,9 @@
 #define S_TO_NS  1000000000ULL           /* 1s  = 10^9  ns */
 #define S_TO_FS  1000000000000000ULL     /* 1s  = 10^15 fs */
 
-/* Frequency_of_TSC / frequency_of_HPET = 32 */
-#define TSC_PER_HPET_TICK 32
-#define guest_time_hpet(v) (hvm_get_guest_time(v) / TSC_PER_HPET_TICK)
+/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */
+#define STIME_PER_HPET_TICK 16
+#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK)
 
 #define HPET_ID         0x000
 #define HPET_PERIOD     0x004
@@ -192,7 +192,7 @@ static void hpet_stop_timer(HPETState *h
 
 /* the number of HPET tick that stands for
  * 1/(2^10) second, namely, 0.9765625 milliseconds */
-#define  HPET_TINY_TIME_SPAN  ((h->tsc_freq >> 10) / TSC_PER_HPET_TICK)
+#define  HPET_TINY_TIME_SPAN  ((h->stime_freq >> 10) / STIME_PER_HPET_TICK)
 
 static void hpet_set_timer(HPETState *h, unsigned int tn)
 {
@@ -558,17 +558,17 @@ void hpet_init(struct vcpu *v)
     spin_lock_init(&h->lock);
 
     h->vcpu = v;
-    h->tsc_freq = ticks_per_sec(v);
-
-    h->hpet_to_ns_scale = ((S_TO_NS * TSC_PER_HPET_TICK) << 10) / h->tsc_freq;
+    h->stime_freq = S_TO_NS;
+
+    h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) / 
h->stime_freq;
     h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale;
 
     /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */
     h->hpet.capability = 0x8086A201ULL;
 
     /* This is the number of femptoseconds per HPET tick. */
-    /* Here we define HPET's frequency to be 1/32 of the TSC's */
-    h->hpet.capability |= ((S_TO_FS*TSC_PER_HPET_TICK/h->tsc_freq) << 32);
+    /* Here we define HPET's frequency to be 1/16 of Xen system time */
+    h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32);
 
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
     {
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/hvm.c    Mon Jun 02 11:35:39 2008 +0900
@@ -296,6 +296,8 @@ int hvm_domain_initialise(struct domain 
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
     spin_lock_init(&d->arch.hvm_domain.uc_lock);
 
+    hvm_init_guest_time(d);
+
     d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
 
     hvm_init_cacheattr_region_list(d);
@@ -661,7 +663,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
         hpet_init(v);
  
         /* Init guest TSC to start from zero. */
-        hvm_set_guest_time(v, 0);
+        hvm_set_guest_tsc(v, 0);
 
         /* Can start up without SIPI-SIPI or setvcpucontext domctl. */
         v->is_initialised = 1;
@@ -1098,16 +1100,17 @@ int hvm_virtual_to_linear_addr(
     return 0;
 }
 
-static void *hvm_map(unsigned long va, int size)
+static void *hvm_map_entry(unsigned long va)
 {
     unsigned long gfn, mfn;
     p2m_type_t p2mt;
     uint32_t pfec;
 
-    if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
-    {
-        hvm_inject_exception(TRAP_page_fault, PFEC_write_access,
-                             (va + PAGE_SIZE - 1) & PAGE_MASK);
+    if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )
+    {
+        gdprintk(XENLOG_ERR, "Descriptor table entry "
+                 "straddles page boundary\n");
+        domain_crash(current->domain);
         return NULL;
     }
 
@@ -1119,7 +1122,8 @@ static void *hvm_map(unsigned long va, i
     mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
     if ( !p2m_is_ram(p2mt) )
     {
-        hvm_inject_exception(TRAP_page_fault, pfec, va);
+        gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");
+        domain_crash(current->domain);
         return NULL;
     }
 
@@ -1130,7 +1134,7 @@ static void *hvm_map(unsigned long va, i
     return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);
 }
 
-static void hvm_unmap(void *p)
+static void hvm_unmap_entry(void *p)
 {
     if ( p )
         unmap_domain_page(p);
@@ -1166,7 +1170,7 @@ static int hvm_load_segment_selector(
     if ( ((sel & 0xfff8) + 7) > desctab.limit )
         goto fail;
 
-    pdesc = hvm_map(desctab.base + (sel & 0xfff8), 8);
+    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));
     if ( pdesc == NULL )
         goto hvm_map_fail;
 
@@ -1226,7 +1230,7 @@ static int hvm_load_segment_selector(
     desc.b |= 0x100;
 
  skip_accessed_flag:
-    hvm_unmap(pdesc);
+    hvm_unmap_entry(pdesc);
 
     segr.base = (((desc.b <<  0) & 0xff000000u) |
                  ((desc.b << 16) & 0x00ff0000u) |
@@ -1242,7 +1246,7 @@ static int hvm_load_segment_selector(
     return 0;
 
  unmap_and_fail:
-    hvm_unmap(pdesc);
+    hvm_unmap_entry(pdesc);
  fail:
     hvm_inject_exception(fault_type, sel & 0xfffc, 0);
  hvm_map_fail:
@@ -1258,7 +1262,7 @@ void hvm_task_switch(
     struct segment_register gdt, tr, prev_tr, segr;
     struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
     unsigned long eflags;
-    int exn_raised;
+    int exn_raised, rc;
     struct {
         u16 back_link,__blh;
         u32 esp0;
@@ -1270,7 +1274,7 @@ void hvm_task_switch(
         u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
         u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
         u16 trace, iomap;
-    } *ptss, tss;
+    } tss = { 0 };
 
     hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
     hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
@@ -1283,11 +1287,11 @@ void hvm_task_switch(
         goto out;
     }
 
-    optss_desc = hvm_map(gdt.base + (prev_tr.sel & 0xfff8), 8);
+    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8));
     if ( optss_desc == NULL )
         goto out;
 
-    nptss_desc = hvm_map(gdt.base + (tss_sel & 0xfff8), 8);
+    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8));
     if ( nptss_desc == NULL )
         goto out;
 
@@ -1322,84 +1326,89 @@ void hvm_task_switch(
         goto out;
     }
 
-    ptss = hvm_map(prev_tr.base, sizeof(tss));
-    if ( ptss == NULL )
+    rc = hvm_copy_from_guest_virt(
+        &tss, prev_tr.base, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
         goto out;
 
     eflags = regs->eflags;
     if ( taskswitch_reason == TSW_iret )
         eflags &= ~X86_EFLAGS_NT;
 
-    ptss->cr3    = v->arch.hvm_vcpu.guest_cr[3];
-    ptss->eip    = regs->eip;
-    ptss->eflags = eflags;
-    ptss->eax    = regs->eax;
-    ptss->ecx    = regs->ecx;
-    ptss->edx    = regs->edx;
-    ptss->ebx    = regs->ebx;
-    ptss->esp    = regs->esp;
-    ptss->ebp    = regs->ebp;
-    ptss->esi    = regs->esi;
-    ptss->edi    = regs->edi;
+    tss.cr3    = v->arch.hvm_vcpu.guest_cr[3];
+    tss.eip    = regs->eip;
+    tss.eflags = eflags;
+    tss.eax    = regs->eax;
+    tss.ecx    = regs->ecx;
+    tss.edx    = regs->edx;
+    tss.ebx    = regs->ebx;
+    tss.esp    = regs->esp;
+    tss.ebp    = regs->ebp;
+    tss.esi    = regs->esi;
+    tss.edi    = regs->edi;
 
     hvm_get_segment_register(v, x86_seg_es, &segr);
-    ptss->es = segr.sel;
+    tss.es = segr.sel;
     hvm_get_segment_register(v, x86_seg_cs, &segr);
-    ptss->cs = segr.sel;
+    tss.cs = segr.sel;
     hvm_get_segment_register(v, x86_seg_ss, &segr);
-    ptss->ss = segr.sel;
+    tss.ss = segr.sel;
     hvm_get_segment_register(v, x86_seg_ds, &segr);
-    ptss->ds = segr.sel;
+    tss.ds = segr.sel;
     hvm_get_segment_register(v, x86_seg_fs, &segr);
-    ptss->fs = segr.sel;
+    tss.fs = segr.sel;
     hvm_get_segment_register(v, x86_seg_gs, &segr);
-    ptss->gs = segr.sel;
+    tss.gs = segr.sel;
     hvm_get_segment_register(v, x86_seg_ldtr, &segr);
-    ptss->ldt = segr.sel;
-
-    hvm_unmap(ptss);
-
-    ptss = hvm_map(tr.base, sizeof(tss));
-    if ( ptss == NULL )
+    tss.ldt = segr.sel;
+
+    rc = hvm_copy_to_guest_virt(
+        prev_tr.base, &tss, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
         goto out;
 
-    if ( hvm_set_cr3(ptss->cr3) )
-    {
-        hvm_unmap(ptss);
+    rc = hvm_copy_from_guest_virt(
+        &tss, tr.base, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
         goto out;
-    }
-
-    regs->eip    = ptss->eip;
-    regs->eflags = ptss->eflags | 2;
-    regs->eax    = ptss->eax;
-    regs->ecx    = ptss->ecx;
-    regs->edx    = ptss->edx;
-    regs->ebx    = ptss->ebx;
-    regs->esp    = ptss->esp;
-    regs->ebp    = ptss->ebp;
-    regs->esi    = ptss->esi;
-    regs->edi    = ptss->edi;
+
+    if ( hvm_set_cr3(tss.cr3) )
+        goto out;
+
+    regs->eip    = tss.eip;
+    regs->eflags = tss.eflags | 2;
+    regs->eax    = tss.eax;
+    regs->ecx    = tss.ecx;
+    regs->edx    = tss.edx;
+    regs->ebx    = tss.ebx;
+    regs->esp    = tss.esp;
+    regs->ebp    = tss.ebp;
+    regs->esi    = tss.esi;
+    regs->edi    = tss.edi;
 
     if ( (taskswitch_reason == TSW_call_or_int) )
     {
         regs->eflags |= X86_EFLAGS_NT;
-        ptss->back_link = prev_tr.sel;
+        tss.back_link = prev_tr.sel;
     }
 
     exn_raised = 0;
-    if ( hvm_load_segment_selector(v, x86_seg_es, ptss->es) ||
-         hvm_load_segment_selector(v, x86_seg_cs, ptss->cs) ||
-         hvm_load_segment_selector(v, x86_seg_ss, ptss->ss) ||
-         hvm_load_segment_selector(v, x86_seg_ds, ptss->ds) ||
-         hvm_load_segment_selector(v, x86_seg_fs, ptss->fs) ||
-         hvm_load_segment_selector(v, x86_seg_gs, ptss->gs) ||
-         hvm_load_segment_selector(v, x86_seg_ldtr, ptss->ldt) )
+    if ( hvm_load_segment_selector(v, x86_seg_es, tss.es) ||
+         hvm_load_segment_selector(v, x86_seg_cs, tss.cs) ||
+         hvm_load_segment_selector(v, x86_seg_ss, tss.ss) ||
+         hvm_load_segment_selector(v, x86_seg_ds, tss.ds) ||
+         hvm_load_segment_selector(v, x86_seg_fs, tss.fs) ||
+         hvm_load_segment_selector(v, x86_seg_gs, tss.gs) ||
+         hvm_load_segment_selector(v, x86_seg_ldtr, tss.ldt) )
         exn_raised = 1;
 
-    if ( (ptss->trace & 1) && !exn_raised )
+    rc = hvm_copy_to_guest_virt(
+        tr.base, &tss, sizeof(tss), PFEC_page_present);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
+        exn_raised = 1;
+
+    if ( (tss.trace & 1) && !exn_raised )
         hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
-
-    hvm_unmap(ptss);
 
     tr.attr.fields.type = 0xb; /* busy 32-bit tss */
     hvm_set_segment_register(v, x86_seg_tr, &tr);
@@ -1428,8 +1437,8 @@ void hvm_task_switch(
     }
 
  out:
-    hvm_unmap(optss_desc);
-    hvm_unmap(nptss_desc);
+    hvm_unmap_entry(optss_desc);
+    hvm_unmap_entry(nptss_desc);
 }
 
 #define HVMCOPY_from_guest (0u<<0)
@@ -1632,7 +1641,7 @@ int hvm_msr_read_intercept(struct cpu_us
     switch ( ecx )
     {
     case MSR_IA32_TSC:
-        msr_content = hvm_get_guest_time(v);
+        msr_content = hvm_get_guest_tsc(v);
         break;
 
     case MSR_IA32_APICBASE:
@@ -1725,7 +1734,7 @@ int hvm_msr_write_intercept(struct cpu_u
     switch ( ecx )
     {
      case MSR_IA32_TSC:
-        hvm_set_guest_time(v, msr_content);
+        hvm_set_guest_tsc(v, msr_content);
         pt_reset(v);
         break;
 
@@ -2071,6 +2080,13 @@ void hvm_vcpu_reset_state(struct vcpu *v
     if ( v->is_initialised )
         goto out;
 
+    if ( !paging_mode_hap(d) )
+    {
+        if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
+            put_page(pagetable_get_page(v->arch.guest_table));
+        v->arch.guest_table = pagetable_null();
+    }
+
     ctxt = &v->arch.guest_context;
     memset(ctxt, 0, sizeof(*ctxt));
     ctxt->flags = VGCF_online;
@@ -2122,6 +2138,8 @@ void hvm_vcpu_reset_state(struct vcpu *v
     v->arch.hvm_vcpu.cache_tsc_offset =
         v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
     hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+
+    paging_update_paging_modes(v);
 
     v->arch.flags |= TF_kernel_mode;
     v->is_initialised = 1;
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/i8254.c  Mon Jun 02 11:35:39 2008 +0900
@@ -31,6 +31,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
+#include <asm/time.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
@@ -52,6 +53,9 @@ static int handle_pit_io(
     int dir, uint32_t port, uint32_t bytes, uint32_t *val);
 static int handle_speaker_io(
     int dir, uint32_t port, uint32_t bytes, uint32_t *val);
+
+#define get_guest_time(v) \
+   (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time())
 
 /* Compute with 96 bit intermediate result: (a*b)/c */
 static uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
@@ -86,8 +90,8 @@ static int pit_get_count(PITState *pit, 
 
     ASSERT(spin_is_locked(&pit->lock));
 
-    d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel],
-                 PIT_FREQ, ticks_per_sec(v));
+    d = muldiv64(get_guest_time(v) - pit->count_load_time[channel],
+                 PIT_FREQ, SYSTEM_TIME_HZ);
 
     switch ( c->mode )
     {
@@ -117,8 +121,8 @@ static int pit_get_out(PITState *pit, in
 
     ASSERT(spin_is_locked(&pit->lock));
 
-    d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel], 
-                 PIT_FREQ, ticks_per_sec(v));
+    d = muldiv64(get_guest_time(v) - pit->count_load_time[channel], 
+                 PIT_FREQ, SYSTEM_TIME_HZ);
 
     switch ( s->mode )
     {
@@ -164,7 +168,7 @@ static void pit_set_gate(PITState *pit, 
     case 3:
         /* Restart counting on rising edge. */
         if ( s->gate < val )
-            pit->count_load_time[channel] = hvm_get_guest_time(v);
+            pit->count_load_time[channel] = get_guest_time(v);
         break;
     }
 
@@ -180,7 +184,7 @@ static void pit_time_fired(struct vcpu *
 static void pit_time_fired(struct vcpu *v, void *priv)
 {
     uint64_t *count_load_time = priv;
-    *count_load_time = hvm_get_guest_time(v);
+    *count_load_time = get_guest_time(v);
 }
 
 static void pit_load_count(PITState *pit, int channel, int val)
@@ -195,11 +199,11 @@ static void pit_load_count(PITState *pit
         val = 0x10000;
 
     if ( v == NULL )
-        rdtscll(pit->count_load_time[channel]);
-    else
-        pit->count_load_time[channel] = hvm_get_guest_time(v);
+        pit->count_load_time[channel] = 0;
+    else
+        pit->count_load_time[channel] = get_guest_time(v);
     s->count = val;
-    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
+    period = DIV_ROUND(val * SYSTEM_TIME_HZ, PIT_FREQ);
 
     if ( (v == NULL) || !is_hvm_vcpu(v) || (channel != 0) )
         return;
@@ -435,7 +439,7 @@ static int pit_load(struct domain *d, hv
      * time jitter here, but the wall-clock will have jumped massively, so 
      * we hope the guest can handle it.
      */
-    pit->pt0.last_plt_gtime = hvm_get_guest_time(d->vcpu[0]);
+    pit->pt0.last_plt_gtime = get_guest_time(d->vcpu[0]);
     for ( i = 0; i < 3; i++ )
         pit_load_count(pit, i, pit->hw.channels[i].count);
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/pmtimer.c
--- a/xen/arch/x86/hvm/pmtimer.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/pmtimer.c        Mon Jun 02 11:35:39 2008 +0900
@@ -257,7 +257,7 @@ void pmtimer_init(struct vcpu *v)
 
     spin_lock_init(&s->lock);
 
-    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / ticks_per_sec(v);
+    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / SYSTEM_TIME_HZ;
     s->vcpu = v;
 
     /* Intercept port I/O (need two handlers because PM1a_CNT is between
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Jun 02 11:35:39 2008 +0900
@@ -299,7 +299,7 @@ static void svm_save_cpu_state(struct vc
     data->msr_efer         = v->arch.hvm_vcpu.guest_efer;
     data->msr_flags        = -1ULL;
 
-    data->tsc = hvm_get_guest_time(v);
+    data->tsc = hvm_get_guest_tsc(v);
 }
 
 
@@ -315,7 +315,7 @@ static void svm_load_cpu_state(struct vc
     v->arch.hvm_vcpu.guest_efer = data->msr_efer;
     svm_update_guest_efer(v);
 
-    hvm_set_guest_time(v, data->tsc);
+    hvm_set_guest_tsc(v, data->tsc);
 }
 
 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/vlapic.c Mon Jun 02 11:35:39 2008 +0900
@@ -22,18 +22,19 @@
 #include <xen/types.h>
 #include <xen/mm.h>
 #include <xen/xmalloc.h>
+#include <xen/domain.h>
 #include <xen/domain_page.h>
-#include <asm/page.h>
 #include <xen/event.h>
 #include <xen/trace.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/numa.h>
+#include <asm/current.h>
+#include <asm/page.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <asm/current.h>
 #include <asm/hvm/vmx/vmx.h>
-#include <xen/numa.h>
 #include <public/hvm/ioreq.h>
 #include <public/hvm/params.h>
 
@@ -259,6 +260,7 @@ static void vlapic_init_action(unsigned 
 {
     struct vcpu *v = (struct vcpu *)_vcpu;
     struct domain *d = v->domain;
+    bool_t fpu_initialised;
 
     /* If the VCPU is not on its way down we have nothing to do. */
     if ( !test_bit(_VPF_down, &v->pause_flags) )
@@ -270,15 +272,12 @@ static void vlapic_init_action(unsigned 
         return;
     }
 
+    /* Reset necessary VCPU state. This does not include FPU state. */
     domain_lock(d);
-
-    /* Paranoia makes us re-assert VPF_down under the domain lock. */
-    set_bit(_VPF_down, &v->pause_flags);
-    v->is_initialised = 0;
-    clear_bit(_VPF_blocked, &v->pause_flags);
-
+    fpu_initialised = v->fpu_initialised;
+    vcpu_reset(v);
+    v->fpu_initialised = fpu_initialised;
     vlapic_reset(vcpu_vlapic(v));
-
     domain_unlock(d);
 
     vcpu_unpause(v);
@@ -474,7 +473,6 @@ static uint32_t vlapic_get_tmcct(struct 
     uint64_t counter_passed;
 
     counter_passed = ((hvm_get_guest_time(v) - vlapic->timer_last_update)
-                      * 1000000000ULL / ticks_per_sec(v)
                       / APIC_BUS_CYCLE_NS / vlapic->hw.timer_divisor);
     tmcct = tmict - counter_passed;
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Jun 02 11:35:39 2008 +0900
@@ -607,7 +607,7 @@ static void vmx_save_cpu_state(struct vc
     data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
 #endif
 
-    data->tsc = hvm_get_guest_time(v);
+    data->tsc = hvm_get_guest_tsc(v);
 }
 
 static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
@@ -625,7 +625,7 @@ static void vmx_load_cpu_state(struct vc
     v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
 #endif
 
-    hvm_set_guest_time(v, data->tsc);
+    hvm_set_guest_tsc(v, data->tsc);
 }
 
 
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/hvm/vpt.c    Mon Jun 02 11:35:39 2008 +0900
@@ -25,6 +25,39 @@
 #define mode_is(d, name) \
     ((d)->arch.hvm_domain.params[HVM_PARAM_TIMER_MODE] == HVMPTM_##name)
 
+void hvm_init_guest_time(struct domain *d)
+{
+    struct pl_time *pl = &d->arch.hvm_domain.pl_time;
+
+    spin_lock_init(&pl->pl_time_lock);
+    pl->stime_offset = -(u64)get_s_time();
+    pl->last_guest_time = 0;
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+    struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
+    u64 now;
+
+    /* Called from device models shared with PV guests. Be careful. */
+    ASSERT(is_hvm_vcpu(v));
+
+    spin_lock(&pl->pl_time_lock);
+    now = get_s_time() + pl->stime_offset;
+    if ( (int64_t)(now - pl->last_guest_time) >= 0 )
+        pl->last_guest_time = now;
+    else
+        now = pl->last_guest_time;
+    spin_unlock(&pl->pl_time_lock);
+
+    return now + v->arch.hvm_vcpu.stime_offset;
+}
+
+void hvm_set_guest_time(struct vcpu *v, u64 guest_time)
+{
+    v->arch.hvm_vcpu.stime_offset += guest_time - hvm_get_guest_time(v);
+}
+
 static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src)
 {
     struct vcpu *v = pt->vcpu;
@@ -348,7 +381,7 @@ void create_periodic_time(
     pt->vcpu = v;
     pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
     pt->irq = irq;
-    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+    pt->period_cycles = (u64)period;
     pt->one_shot = one_shot;
     pt->scheduled = NOW() + period;
     /*
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm.c Mon Jun 02 11:35:39 2008 +0900
@@ -1939,6 +1939,20 @@ int get_page_type(struct page_info *page
     }
     while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 
+    if ( unlikely((x & PGT_type_mask) != type) )
+    {
+        /* Special pages should not be accessible from devices. */
+        struct domain *d = page_get_owner(page);
+        if ( d && unlikely(need_iommu(d)) )
+        {
+            if ( (x & PGT_type_mask) == PGT_writable_page )
+                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
+            else if ( type == PGT_writable_page )
+                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
+                               page_to_mfn(page));
+        }
+    }
+
     if ( unlikely(!(nx & PGT_validated)) )
     {
         /* Try to validate page type; drop the new reference on failure. */
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Mon Jun 02 11:35:39 2008 +0900
@@ -266,12 +266,6 @@ out:
                 iommu_unmap_page(d, gfn);
         }
     }
-
-#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
-    /* If p2m table is shared with vtd page-table. */
-    if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
-        iommu_flush(d, gfn, (u64*)ept_entry);
-#endif
 
     return rv;
 }
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm/p2m.c     Mon Jun 02 11:35:39 2008 +0900
@@ -325,7 +325,7 @@ p2m_set_entry(struct domain *d, unsigned
     if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
         d->arch.p2m->max_mapped_pfn = gfn;
 
-    if ( iommu_enabled && is_hvm_domain(d) )
+    if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
     {
         if ( p2mt == p2m_ram_rw )
             for ( i = 0; i < (1UL << page_order); i++ )
@@ -868,7 +868,12 @@ p2m_remove_page(struct domain *d, unsign
     unsigned long i;
 
     if ( !paging_mode_translate(d) )
+    {
+        if ( need_iommu(d) )
+            for ( i = 0; i < (1 << page_order); i++ )
+                iommu_unmap_page(d, mfn + i);
         return;
+    }
 
     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
 
@@ -899,7 +904,19 @@ guest_physmap_add_entry(struct domain *d
     int rc = 0;
 
     if ( !paging_mode_translate(d) )
-        return -EINVAL;
+    {
+        if ( need_iommu(d) && t == p2m_ram_rw )
+        {
+            for ( i = 0; i < (1 << page_order); i++ )
+                if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
+                {
+                    while ( i-- > 0 )
+                        iommu_unmap_page(d, mfn + i);
+                    return rc;
+                }
+        }
+        return 0;
+    }
 
 #if CONFIG_PAGING_LEVELS == 3
     /*
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Jun 02 11:35:39 2008 +0900
@@ -2799,8 +2799,11 @@ int shadow_track_dirty_vram(struct domai
     if ( !d->dirty_vram )
     {
         /* Just recount from start. */
-        for ( i = begin_pfn; i < end_pfn; i++ )
-            flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, 
&t));
+        for ( i = begin_pfn; i < end_pfn; i++ ) {
+            mfn_t mfn = gfn_to_mfn(d, i, &t);
+            if (mfn_x(mfn) != INVALID_MFN)
+                flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
+        }
 
         gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
 
@@ -2840,61 +2843,70 @@ int shadow_track_dirty_vram(struct domai
         /* Iterate over VRAM to track dirty bits. */
         for ( i = 0; i < nr; i++ ) {
             mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
-            struct page_info *page = mfn_to_page(mfn);
-            u32 count_info = page->u.inuse.type_info & PGT_count_mask;
+            struct page_info *page;
+            u32 count_info;
             int dirty = 0;
             paddr_t sl1ma = d->dirty_vram->sl1ma[i];
 
-            switch (count_info)
+            if (mfn_x(mfn) == INVALID_MFN)
             {
-            case 0:
-                /* No guest reference, nothing to track. */
-                break;
-            case 1:
-                /* One guest reference. */
-                if ( sl1ma == INVALID_PADDR )
+                dirty = 1;
+            }
+            else
+            {
+                page = mfn_to_page(mfn);
+                count_info = page->u.inuse.type_info & PGT_count_mask;
+                switch (count_info)
                 {
-                    /* We don't know which sl1e points to this, too bad. */
+                case 0:
+                    /* No guest reference, nothing to track. */
+                    break;
+                case 1:
+                    /* One guest reference. */
+                    if ( sl1ma == INVALID_PADDR )
+                    {
+                        /* We don't know which sl1e points to this, too bad. */
+                        dirty = 1;
+                        /* TODO: Heuristics for finding the single mapping of
+                         * this gmfn */
+                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
+                    }
+                    else
+                    {
+                        /* Hopefully the most common case: only one mapping,
+                         * whose dirty bit we can use. */
+                        l1_pgentry_t *sl1e;
+#ifdef __i386__
+                        void *sl1p = map_sl1p;
+                        unsigned long sl1mfn = paddr_to_pfn(sl1ma);
+
+                        if ( sl1mfn != map_mfn ) {
+                            if ( map_sl1p )
+                                sh_unmap_domain_page(map_sl1p);
+                            map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
+                            map_mfn = sl1mfn;
+                        }
+                        sl1e = sl1p + (sl1ma & ~PAGE_MASK);
+#else
+                        sl1e = maddr_to_virt(sl1ma);
+#endif
+
+                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+                        {
+                            dirty = 1;
+                            /* Note: this is atomic, so we may clear a
+                             * _PAGE_ACCESSED set by another processor. */
+                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+                            flush_tlb = 1;
+                        }
+                    }
+                    break;
+                default:
+                    /* More than one guest reference,
+                     * we don't afford tracking that. */
                     dirty = 1;
-                    /* TODO: Heuristics for finding the single mapping of
-                     * this gmfn */
-                    flush_tlb |= sh_remove_all_mappings(d->vcpu[0], 
gfn_to_mfn(d, begin_pfn + i, &t));
+                    break;
                 }
-                else
-                {
-                    /* Hopefully the most common case: only one mapping,
-                     * whose dirty bit we can use. */
-                    l1_pgentry_t *sl1e;
-#ifdef __i386__
-                    void *sl1p = map_sl1p;
-                    unsigned long sl1mfn = paddr_to_pfn(sl1ma);
-
-                    if ( sl1mfn != map_mfn ) {
-                        if ( map_sl1p )
-                            sh_unmap_domain_page(map_sl1p);
-                        map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
-                        map_mfn = sl1mfn;
-                    }
-                    sl1e = sl1p + (sl1ma & ~PAGE_MASK);
-#else
-                    sl1e = maddr_to_virt(sl1ma);
-#endif
-
-                    if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
-                    {
-                        dirty = 1;
-                        /* Note: this is atomic, so we may clear a
-                         * _PAGE_ACCESSED set by another processor. */
-                        l1e_remove_flags(*sl1e, _PAGE_DIRTY);
-                        flush_tlb = 1;
-                    }
-                }
-                break;
-            default:
-                /* More than one guest reference,
-                 * we don't afford tracking that. */
-                dirty = 1;
-                break;
             }
 
             if ( dirty )
@@ -2916,8 +2928,11 @@ int shadow_track_dirty_vram(struct domai
             {
                 /* was clean for more than two seconds, try to disable guest
                  * write access */
-                for ( i = begin_pfn; i < end_pfn; i++ )
-                    flush_tlb |= sh_remove_write_access(d->vcpu[0], 
gfn_to_mfn(d, i, &t), 1, 0);
+                for ( i = begin_pfn; i < end_pfn; i++ ) {
+                    mfn_t mfn = gfn_to_mfn(d, i, &t);
+                    if (mfn_x(mfn) != INVALID_MFN)
+                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 
1, 0);
+                }
                 d->dirty_vram->last_dirty = -1;
             }
             rc = 0;
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/msi.c        Mon Jun 02 11:35:39 2008 +0900
@@ -25,6 +25,7 @@
 #include <mach_apic.h>
 #include <io_ports.h>
 #include <public/physdev.h>
+#include <xen/iommu.h>
 
 extern int msi_irq_enable;
 
@@ -156,6 +157,9 @@ void read_msi_msg(unsigned int irq, stru
     default:
         BUG();
     }
+
+    if ( vtd_enabled )
+        msi_msg_read_remap_rte(entry, msg);
 }
 
 static int set_vector_msi(struct msi_desc *entry)
@@ -201,6 +205,9 @@ void write_msi_msg(unsigned int irq, str
 void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
     struct msi_desc *entry = irq_desc[irq].msi_desc;
+
+    if ( vtd_enabled )
+        msi_msg_write_remap_rte(entry, msg);
 
     switch ( entry->msi_attrib.type )
     {
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/setup.c      Mon Jun 02 11:35:39 2008 +0900
@@ -1100,6 +1100,14 @@ void arch_get_xen_caps(xen_capabilities_
 #endif
 }
 
+int xen_in_range(paddr_t start, paddr_t end)
+{
+    start = max_t(paddr_t, start, xenheap_phys_start);
+    end = min_t(paddr_t, end, xenheap_phys_end);
+ 
+    return start < end; 
+}
+
 /*
  * Local variables:
  * mode: C
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/smpboot.c    Mon Jun 02 11:35:39 2008 +0900
@@ -1391,6 +1391,11 @@ void enable_nonboot_cpus(void)
                panic("Not enough cpus");
        }
        cpus_clear(frozen_cpus);
+
+       /*
+        * Cleanup possible dangling ends after sleep...
+        */
+       smpboot_restore_warm_reset_vector();
 }
 #else /* ... !CONFIG_HOTPLUG_CPU */
 int __cpu_disable(void)
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/tboot.c
--- a/xen/arch/x86/tboot.c      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/tboot.c      Mon Jun 02 11:35:39 2008 +0900
@@ -96,6 +96,18 @@ int tboot_in_measured_env(void)
     return (g_tboot_shared != NULL);
 }
 
+int tboot_in_range(paddr_t start, paddr_t end)
+{
+    if ( g_tboot_shared == NULL || g_tboot_shared->version < 0x02 )
+        return 0;
+
+    start = max_t(paddr_t, start, g_tboot_shared->tboot_base);
+    end = min_t(paddr_t, end, 
+                g_tboot_shared->tboot_base + g_tboot_shared->tboot_size);
+ 
+    return start < end; 
+}
+
 /*
  * Local variables:
  * mode: C
diff -r d2a239224cb2 -r f1508348ffab xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c    Mon Jun 02 11:35:39 2008 +0900
@@ -2105,12 +2105,14 @@ x86_emulate(
         break;
     }
 
+    /* Inject #DB if single-step tracing was enabled at instruction start. */
+    if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
+         (ops->inject_hw_exception != NULL) )
+        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
+
     /* Commit shadow register state. */
     _regs.eflags &= ~EFLG_RF;
     *ctxt->regs = _regs;
-    if ( (_regs.eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
-         (ops->inject_hw_exception != NULL) )
-        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
 
  done:
     return rc;
diff -r d2a239224cb2 -r f1508348ffab xen/common/domain.c
--- a/xen/common/domain.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/domain.c       Mon Jun 02 11:35:39 2008 +0900
@@ -637,7 +637,7 @@ void vcpu_reset(struct vcpu *v)
 {
     struct domain *d = v->domain;
 
-    domain_pause(d);
+    vcpu_pause(v);
     domain_lock(d);
 
     arch_vcpu_reset(v);
@@ -653,7 +653,7 @@ void vcpu_reset(struct vcpu *v)
     clear_bit(_VPF_blocked, &v->pause_flags);
 
     domain_unlock(v->domain);
-    domain_unpause(d);
+    vcpu_unpause(v);
 }
 
 
diff -r d2a239224cb2 -r f1508348ffab xen/common/grant_table.c
--- a/xen/common/grant_table.c  Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/grant_table.c  Mon Jun 02 11:35:39 2008 +0900
@@ -32,6 +32,8 @@
 #include <xen/trace.h>
 #include <xen/guest_access.h>
 #include <xen/domain_page.h>
+#include <xen/iommu.h>
+#include <xen/paging.h>
 #include <xsm/xsm.h>
 
 #ifndef max_nr_grant_frames
@@ -196,8 +198,9 @@ __gnttab_map_grant_ref(
     struct domain *ld, *rd;
     struct vcpu   *led;
     int            handle;
-    unsigned long  frame = 0;
+    unsigned long  frame = 0, nr_gets = 0;
     int            rc = GNTST_okay;
+    u32            old_pin;
     unsigned int   cache_flags;
     struct active_grant_entry *act;
     struct grant_mapping *mt;
@@ -318,6 +321,7 @@ __gnttab_map_grant_ref(
         }
     }
 
+    old_pin = act->pin;
     if ( op->flags & GNTMAP_device_map )
         act->pin += (op->flags & GNTMAP_readonly) ?
             GNTPIN_devr_inc : GNTPIN_devw_inc;
@@ -361,20 +365,17 @@ __gnttab_map_grant_ref(
             rc = GNTST_general_error;
             goto undo_out;
         }
-        
+
+        nr_gets++;
         if ( op->flags & GNTMAP_host_map )
         {
             rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0);
             if ( rc != GNTST_okay )
-            {
-                if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
-                    put_page_type(mfn_to_page(frame));
-                put_page(mfn_to_page(frame));
                 goto undo_out;
-            }
 
             if ( op->flags & GNTMAP_device_map )
             {
+                nr_gets++;
                 (void)get_page(mfn_to_page(frame), rd);
                 if ( !(op->flags & GNTMAP_readonly) )
                     get_page_type(mfn_to_page(frame), PGT_writable_page);
@@ -382,6 +383,17 @@ __gnttab_map_grant_ref(
         }
     }
 
+    if ( need_iommu(ld) &&
+         !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
+         (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
+    {
+        if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) )
+        {
+            rc = GNTST_general_error;
+            goto undo_out;
+        }
+    }
+
     TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
 
     mt = &maptrack_entry(ld->grant_table, handle);
@@ -397,6 +409,19 @@ __gnttab_map_grant_ref(
     return;
 
  undo_out:
+    if ( nr_gets > 1 )
+    {
+        if ( !(op->flags & GNTMAP_readonly) )
+            put_page_type(mfn_to_page(frame));
+        put_page(mfn_to_page(frame));
+    }
+    if ( nr_gets > 0 )
+    {
+        if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
+            put_page_type(mfn_to_page(frame));
+        put_page(mfn_to_page(frame));
+    }
+
     spin_lock(&rd->grant_table->lock);
 
     act = &active_entry(rd->grant_table, op->ref);
@@ -451,6 +476,7 @@ __gnttab_unmap_common(
     struct active_grant_entry *act;
     grant_entry_t   *sha;
     s16              rc = 0;
+    u32              old_pin;
 
     ld = current->domain;
 
@@ -497,6 +523,7 @@ __gnttab_unmap_common(
 
     act = &active_entry(rd->grant_table, op->map->ref);
     sha = &shared_entry(rd->grant_table, op->map->ref);
+    old_pin = act->pin;
 
     if ( op->frame == 0 )
     {
@@ -532,6 +559,17 @@ __gnttab_unmap_common(
             act->pin -= GNTPIN_hstr_inc;
         else
             act->pin -= GNTPIN_hstw_inc;
+    }
+
+    if ( need_iommu(ld) &&
+         (old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
+         !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
+    {
+        if ( iommu_unmap_page(ld, mfn_to_gmfn(ld, op->frame)) )
+        {
+            rc = GNTST_general_error;
+            goto unmap_out;
+        }
     }
 
     /* If just unmapped a writable mapping, mark as dirtied */
@@ -1073,6 +1111,11 @@ gnttab_transfer(
             gop.status = GNTST_bad_page;
             goto copyback;
         }
+
+#ifndef __ia64__ /* IA64 implicitly replaces the old page in steal_page(). */
+        guest_physmap_remove_page(d, gop.mfn, mfn, 0);
+#endif
+        flush_tlb_mask(d->domain_dirty_cpumask);
 
         /* Find the target domain. */
         if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) )
diff -r d2a239224cb2 -r f1508348ffab xen/common/libelf/libelf-private.h
--- a/xen/common/libelf/libelf-private.h        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/libelf/libelf-private.h        Mon Jun 02 11:35:39 2008 +0900
@@ -43,7 +43,7 @@
 #define bswap_16(x) swap16(x)
 #define bswap_32(x) swap32(x)
 #define bswap_64(x) swap64(x)
-#elif defined(__linux__) || defined(__Linux__)
+#elif defined(__linux__) || defined(__Linux__) || defined(__MINIOS__)
 #include <byteswap.h>
 #else
 #error Unsupported OS
diff -r d2a239224cb2 -r f1508348ffab xen/common/memory.c
--- a/xen/common/memory.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/common/memory.c       Mon Jun 02 11:35:39 2008 +0900
@@ -124,12 +124,9 @@ static void populate_physmap(struct memo
         }
 
         mfn = page_to_mfn(page);
-
-        if ( unlikely(paging_mode_translate(d)) )
-        {
-            guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
-        }
-        else
+        guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+
+        if ( !paging_mode_translate(d) )
         {
             for ( j = 0; j < (1 << a->extent_order); j++ )
                 set_gpfn_from_mfn(mfn + j, gpfn + j);
@@ -436,11 +433,9 @@ static long memory_exchange(XEN_GUEST_HA
                 &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
 
             mfn = page_to_mfn(page);
-            if ( unlikely(paging_mode_translate(d)) )
-            {
-                guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
-            }
-            else
+            guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
+
+            if ( !paging_mode_translate(d) )
             {
                 for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
                     set_gpfn_from_mfn(mfn + k, gpfn + k);
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Mon Jun 02 11:35:02 
2008 +0900
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Mon Jun 02 11:35:39 
2008 +0900
@@ -635,6 +635,16 @@ static void amd_iommu_return_device(
     reassign_device(s, t, bus, devfn);
 }
 
+static int amd_iommu_group_id(u8 bus, u8 devfn)
+{
+    int rt;
+    int bdf = (bus << 8) | devfn;
+    rt = ( bdf < ivrs_bdf_entries ) ?
+        ivrs_mappings[bdf].dte_requestor_id :
+        bdf;
+    return rt;
+}
+
 struct iommu_ops amd_iommu_ops = {
     .init = amd_iommu_domain_init,
     .assign_device  = amd_iommu_assign_device,
@@ -642,4 +652,5 @@ struct iommu_ops amd_iommu_ops = {
     .map_page = amd_iommu_map_page,
     .unmap_page = amd_iommu_unmap_page,
     .reassign_device = amd_iommu_return_device,
+    .get_device_group_id = amd_iommu_group_id,
 };
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/iommu.c   Mon Jun 02 11:35:39 2008 +0900
@@ -15,14 +15,20 @@
 
 #include <xen/sched.h>
 #include <xen/iommu.h>
+#include <xen/paging.h>
+#include <xen/guest_access.h>
 
 extern struct iommu_ops intel_iommu_ops;
 extern struct iommu_ops amd_iommu_ops;
+static int iommu_populate_page_table(struct domain *d);
 int intel_vtd_setup(void);
 int amd_iov_detect(void);
 
 int iommu_enabled = 1;
 boolean_param("iommu", iommu_enabled);
+
+int iommu_pv_enabled = 0;
+boolean_param("iommu_pv", iommu_pv_enabled);
 
 int iommu_domain_init(struct domain *domain)
 {
@@ -54,11 +60,46 @@ int assign_device(struct domain *d, u8 b
 int assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
-
-    if ( !iommu_enabled || !hd->platform_ops )
-        return 0;
-
-    return hd->platform_ops->assign_device(d, bus, devfn);
+    int rc;
+
+    if ( !iommu_enabled || !hd->platform_ops )
+        return 0;
+
+    if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) )
+        return rc;
+
+    if ( has_iommu_pdevs(d) && !need_iommu(d) )
+    {
+        d->need_iommu = 1;
+        return iommu_populate_page_table(d);
+    }
+    return 0;
+}
+
+static int iommu_populate_page_table(struct domain *d)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct page_info *page;
+    int rc;
+
+    spin_lock(&d->page_alloc_lock);
+
+    list_for_each_entry ( page, &d->page_list, list )
+    {
+        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
+        {
+            rc = hd->platform_ops->map_page(
+                d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page));
+            if (rc)
+            {
+                spin_unlock(&d->page_alloc_lock);
+                hd->platform_ops->teardown(d);
+                return rc;
+            }
+        }
+    }
+    spin_unlock(&d->page_alloc_lock);
+    return 0;
 }
 
 void iommu_domain_destroy(struct domain *d)
@@ -137,7 +178,13 @@ void deassign_device(struct domain *d, u
     if ( !iommu_enabled || !hd->platform_ops )
         return;
 
-    return hd->platform_ops->reassign_device(d, dom0, bus, devfn);
+    hd->platform_ops->reassign_device(d, dom0, bus, devfn);
+
+    if ( !has_iommu_pdevs(d) && need_iommu(d) )
+    {
+        d->need_iommu = 0;
+        hd->platform_ops->teardown(d);
+    }
 }
 
 static int iommu_setup(void)
@@ -160,7 +207,56 @@ static int iommu_setup(void)
     iommu_enabled = (rc == 0);
 
  out:
+    if ( !iommu_enabled || !vtd_enabled )
+        iommu_pv_enabled = 0;
     printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
+    if (iommu_enabled)
+        printk("I/O virtualisation for PV guests %sabled\n",
+               iommu_pv_enabled ? "en" : "dis");
     return rc;
 }
 __initcall(iommu_setup);
+
+int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
+    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev;
+    int group_id, sdev_id;
+    u32 bdf;
+    int i = 0;
+    struct iommu_ops *ops = hd->platform_ops;
+
+    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
+        return 0;
+
+    group_id = ops->get_device_group_id(bus, devfn);
+
+    list_for_each_entry(pdev,
+        &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list)
+    {
+        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
+            continue;
+
+        sdev_id = ops->get_device_group_id(pdev->bus, pdev->devfn);
+        if ( (sdev_id == group_id) && (i < max_sdevs) )
+        {
+            bdf = 0;
+            bdf |= (pdev->bus & 0xff) << 16;
+            bdf |= (pdev->devfn & 0xff) << 8;
+            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
+                return -1;
+            i++;
+        }
+    }
+
+    return i;
+}
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.c        Mon Jun 02 11:35:39 2008 +0900
@@ -147,39 +147,6 @@ struct acpi_drhd_unit * acpi_find_matche
     return NULL;
 }
 
-struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev)
-{
-    struct acpi_rmrr_unit *rmrr;
-
-    list_for_each_entry ( rmrr, &acpi_rmrr_units, list )
-        if ( acpi_pci_device_match(rmrr->devices,
-                                   rmrr->devices_cnt, dev) )
-            return rmrr;
-
-    return NULL;
-}
-
-struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev)
-{
-    struct acpi_atsr_unit *atsru;
-    struct acpi_atsr_unit *all_ports_atsru;
-
-    all_ports_atsru = NULL;
-    list_for_each_entry ( atsru, &acpi_atsr_units, list )
-    {
-        if ( atsru->all_ports )
-            all_ports_atsru = atsru;
-        if ( acpi_pci_device_match(atsru->devices,
-                                   atsru->devices_cnt, dev) )
-            return atsru;
-    }
-
-    if ( all_ports_atsru )
-        return all_ports_atsru;;
-
-    return NULL;
-}
-
 static int scope_device_count(void *start, void *end)
 {
     struct acpi_dev_scope *scope;
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/dmar.h
--- a/xen/drivers/passthrough/vtd/dmar.h        Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.h        Mon Jun 02 11:35:39 2008 +0900
@@ -86,7 +86,6 @@ struct acpi_atsr_unit {
     }
 
 struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev);
-struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev);
 
 #define DMAR_TYPE 1
 #define RMRR_TYPE 2
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/extern.h
--- a/xen/drivers/passthrough/vtd/extern.h      Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/extern.h      Mon Jun 02 11:35:39 2008 +0900
@@ -27,8 +27,7 @@ extern struct ir_ctrl *ir_ctrl;
 extern struct ir_ctrl *ir_ctrl;
 
 void print_iommu_regs(struct acpi_drhd_unit *drhd);
-void print_vtd_entries(struct domain *d, struct iommu *iommu,
-                       int bus, int devfn, unsigned long gmfn);
+void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
 void pdev_flr(u8 bus, u8 devfn);
 
 int qinval_setup(struct iommu *iommu);
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/intremap.c    Mon Jun 02 11:35:39 2008 +0900
@@ -48,14 +48,14 @@ static void remap_entry_to_ioapic_rte(
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct IO_APIC_route_remap_entry *remap_rte;
-    unsigned int index;
+    int index = 0;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( ir_ctrl == NULL )
+    if ( ir_ctrl == NULL || ir_ctrl->iremap_index < 0 )
     {
         dprintk(XENLOG_ERR VTDPREFIX,
-                "remap_entry_to_ioapic_rte: ir_ctl == NULL");
+                "remap_entry_to_ioapic_rte: ir_ctl is not ready\n");
         return;
     }
 
@@ -63,11 +63,8 @@ static void remap_entry_to_ioapic_rte(
     index = (remap_rte->index_15 << 15) + remap_rte->index_0_14;
 
     if ( index > ir_ctrl->iremap_index )
-    {
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "Index is larger than remap table entry size. Error!\n");
-        return;
-    }
+        panic("%s: index (%d) is larger than remap table entry size (%d)!\n",
+              __func__, index, ir_ctrl->iremap_index);
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
@@ -81,79 +78,90 @@ static void remap_entry_to_ioapic_rte(
     old_rte->trigger = iremap_entry->lo.tm;
     old_rte->__reserved_2 = 0;
     old_rte->dest.logical.__reserved_1 = 0;
-    old_rte->dest.logical.logical_dest = iremap_entry->lo.dst;
+    old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8;
 
     unmap_vtd_domain_page(iremap_entries);
     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
 }
 
 static void ioapic_rte_to_remap_entry(struct iommu *iommu,
-    int apic_id, struct IO_APIC_route_entry *old_rte)
+    int apic_id, struct IO_APIC_route_entry *old_rte,
+    unsigned int rte_upper, unsigned int value)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct iremap_entry new_ire;
     struct IO_APIC_route_remap_entry *remap_rte;
-    unsigned int index;
+    struct IO_APIC_route_entry new_rte;
+    int index;
     unsigned long flags;
-    int ret = 0;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
-    index = ir_ctrl->iremap_index;
+
+    if ( remap_rte->format == 0 )
+    {
+        ir_ctrl->iremap_index++;
+        index = ir_ctrl->iremap_index;
+    }
+    else
+        index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
+
     if ( index > IREMAP_ENTRY_NR - 1 )
-    {
-        dprintk(XENLOG_ERR VTDPREFIX,
-               "The interrupt number is more than 256!\n");
-        goto out;
-    }
+        panic("ioapic_rte_to_remap_entry: intremap index is more than 256!\n");
 
     iremap_entries =
         (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
     iremap_entry = &iremap_entries[index];
 
-    if ( *(u64 *)iremap_entry != 0 )
-        dprintk(XENLOG_WARNING VTDPREFIX,
-               "Interrupt remapping entry is in use already!\n");
-    iremap_entry->lo.fpd = 0;
-    iremap_entry->lo.dm = old_rte->dest_mode;
-    iremap_entry->lo.rh = 0;
-    iremap_entry->lo.tm = old_rte->trigger;
-    iremap_entry->lo.dlm = old_rte->delivery_mode;
-    iremap_entry->lo.avail = 0;
-    iremap_entry->lo.res_1 = 0;
-    iremap_entry->lo.vector = old_rte->vector;
-    iremap_entry->lo.res_2 = 0;
-    iremap_entry->lo.dst = (old_rte->dest.logical.logical_dest << 8);
-    iremap_entry->hi.sid = apicid_to_bdf(apic_id);
-    iremap_entry->hi.sq = 0;    /* comparing all 16-bit of SID */
-    iremap_entry->hi.svt = 1;   /* turn on requestor ID verification SID/SQ */
-    iremap_entry->hi.res_1 = 0;
-    iremap_entry->lo.p = 1;    /* finally, set present bit */
-    ir_ctrl->iremap_index++;
+    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
+
+    if ( rte_upper )
+        new_ire.lo.dst = (value >> 24) << 8;
+    else
+    {
+        *(((u32 *)&new_rte) + 0) = value;
+        new_ire.lo.fpd = 0;
+        new_ire.lo.dm = new_rte.dest_mode;
+        new_ire.lo.rh = 0;
+        new_ire.lo.tm = new_rte.trigger;
+        new_ire.lo.dlm = new_rte.delivery_mode;
+        new_ire.lo.avail = 0;
+        new_ire.lo.res_1 = 0;
+        new_ire.lo.vector = new_rte.vector;
+        new_ire.lo.res_2 = 0;
+        new_ire.hi.sid = apicid_to_bdf(apic_id);
+
+        new_ire.hi.sq = 0;    /* comparing all 16-bit of SID */
+        new_ire.hi.svt = 1;   /* requestor ID verification SID/SQ */
+        new_ire.hi.res_1 = 0;
+        new_ire.lo.p = 1;     /* finally, set present bit */
+
+        /* now construct new ioapic rte entry */
+        remap_rte->vector = new_rte.vector;
+        remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
+        remap_rte->index_15 = index & 0x8000;
+        remap_rte->index_0_14 = index & 0x7fff;
+
+        remap_rte->delivery_status = new_rte.delivery_status;
+        remap_rte->polarity = new_rte.polarity;
+        remap_rte->irr = new_rte.irr;
+        remap_rte->trigger = new_rte.trigger;
+        remap_rte->mask = new_rte.mask;
+        remap_rte->reserved = 0;
+        remap_rte->format = 1;    /* indicate remap format */
+    }
+
+    memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
+    iommu_flush_iec_index(iommu, 0, index);
+    invalidate_sync(iommu);
 
     unmap_vtd_domain_page(iremap_entries);
-    iommu_flush_iec_index(iommu, 0, index);
-    ret = invalidate_sync(iommu);
-
-    /* now construct new ioapic rte entry */
-    remap_rte->vector = old_rte->vector;
-    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
-    remap_rte->index_15 = index & 0x8000;
-    remap_rte->index_0_14 = index & 0x7fff;
-    remap_rte->delivery_status = old_rte->delivery_status;
-    remap_rte->polarity = old_rte->polarity;
-    remap_rte->irr = old_rte->irr;
-    remap_rte->trigger = old_rte->trigger;
-    remap_rte->mask = 1;
-    remap_rte->reserved = 0;
-    remap_rte->format = 1;    /* indicate remap format */
-out:
     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
     return;
 }
 
-unsigned int
-io_apic_read_remap_rte(
+unsigned int io_apic_read_remap_rte(
     unsigned int apic, unsigned int reg)
 {
     struct IO_APIC_route_entry old_rte = { 0 };
@@ -198,15 +206,15 @@ io_apic_read_remap_rte(
     }
 }
 
-void
-io_apic_write_remap_rte(
+void io_apic_write_remap_rte(
     unsigned int apic, unsigned int reg, unsigned int value)
 {
     struct IO_APIC_route_entry old_rte = { 0 };
     struct IO_APIC_route_remap_entry *remap_rte;
-    int rte_upper = (reg & 1) ? 1 : 0;
+    unsigned int rte_upper = (reg & 1) ? 1 : 0;
     struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    int saved_mask;
 
     if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
     {
@@ -225,21 +233,192 @@ io_apic_write_remap_rte(
     *(((u32 *)&old_rte) + 1) = *(IO_APIC_BASE(apic)+4);
 
     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
-    if ( remap_rte->mask || (remap_rte->format == 0) )
-    {
-        *IO_APIC_BASE(apic) = rte_upper ? ++reg : reg;
-        *(IO_APIC_BASE(apic)+4) = value;
-        return;
-    }
-
-    *(((u32 *)&old_rte) + rte_upper) = value;
-    ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, &old_rte);
+
+    /* mask the interrupt while we change the intremap table */
+    saved_mask = remap_rte->mask;
+    remap_rte->mask = 1;
+    *IO_APIC_BASE(apic) = reg;
+    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
+    remap_rte->mask = saved_mask;
+
+    ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid,
+                              &old_rte, rte_upper, value);
 
     /* write new entry to ioapic */
     *IO_APIC_BASE(apic) = reg;
-    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
+    *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0);
     *IO_APIC_BASE(apic) = reg + 1;
-    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+1);
+    *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1);
+}
+
+static void remap_entry_to_msi_msg(
+    struct iommu *iommu, struct msi_msg *msg)
+{
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct msi_msg_remap_entry *remap_rte;
+    int index;
+    unsigned long flags;
+    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+
+    if ( ir_ctrl == NULL )
+    {
+        dprintk(XENLOG_ERR VTDPREFIX,
+                "remap_entry_to_msi_msg: ir_ctl == NULL");
+        return;
+    }
+
+    remap_rte = (struct msi_msg_remap_entry *) msg;
+    index = (remap_rte->address_lo.index_15 << 15) |
+            remap_rte->address_lo.index_0_14;
+
+    if ( index > ir_ctrl->iremap_index )
+        panic("%s: index (%d) is larger than remap table entry size (%d)\n",
+              __func__, index, ir_ctrl->iremap_index);
+
+    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+    iremap_entry = &iremap_entries[index];
+
+    msg->address_hi = MSI_ADDR_BASE_HI;
+    msg->address_lo =
+        MSI_ADDR_BASE_LO |
+        ((iremap_entry->lo.dm == 0) ?
+            MSI_ADDR_DESTMODE_PHYS:
+            MSI_ADDR_DESTMODE_LOGIC) |
+        ((iremap_entry->lo.dlm != dest_LowestPrio) ?
+            MSI_ADDR_REDIRECTION_CPU:
+            MSI_ADDR_REDIRECTION_LOWPRI) |
+        iremap_entry->lo.dst >> 8;
+
+    msg->data =
+        MSI_DATA_TRIGGER_EDGE |
+        MSI_DATA_LEVEL_ASSERT |
+        ((iremap_entry->lo.dlm != dest_LowestPrio) ?
+            MSI_DATA_DELIVERY_FIXED:
+            MSI_DATA_DELIVERY_LOWPRI) |
+        iremap_entry->lo.vector;
+
+    unmap_vtd_domain_page(iremap_entries);
+    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+}
+
+static void msi_msg_to_remap_entry(
+    struct iommu *iommu, struct pci_dev *pdev, struct msi_msg *msg)
+{
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct iremap_entry new_ire;
+    struct msi_msg_remap_entry *remap_rte;
+    unsigned int index;
+    unsigned long flags;
+    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    int i = 0;
+
+    remap_rte = (struct msi_msg_remap_entry *) msg;
+    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+
+    /* If the entry for a PCI device has been there, use the old entry,
+     * Or, assign a new entry for it.
+     */
+    for ( i = 0; i <= ir_ctrl->iremap_index; i++ )
+    {
+        iremap_entry = &iremap_entries[i];
+        if ( iremap_entry->hi.sid ==
+             ((pdev->bus << 8) | pdev->devfn) )
+           break;
+    }
+
+    if ( i > ir_ctrl->iremap_index )
+    {
+       ir_ctrl->iremap_index++;
+        index = ir_ctrl->iremap_index;
+    }
+    else
+        index = i;
+
+    if ( index > IREMAP_ENTRY_NR - 1 )
+        panic("msi_msg_to_remap_entry: intremap index is more than 256!\n");
+
+    iremap_entry = &iremap_entries[index];
+    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
+
+    /* Set interrupt remapping table entry */
+    new_ire.lo.fpd = 0;
+    new_ire.lo.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
+    new_ire.lo.rh = 0;
+    new_ire.lo.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    new_ire.lo.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
+    new_ire.lo.avail = 0;
+    new_ire.lo.res_1 = 0;
+    new_ire.lo.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
+                        MSI_DATA_VECTOR_MASK;
+    new_ire.lo.res_2 = 0;
+    new_ire.lo.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
+                      & 0xff) << 8;
+
+    new_ire.hi.sid = (pdev->bus << 8) | pdev->devfn;
+    new_ire.hi.sq = 0;
+    new_ire.hi.svt = 1;
+    new_ire.hi.res_1 = 0;
+    new_ire.lo.p = 1;    /* finally, set present bit */
+
+    /* now construct new MSI/MSI-X rte entry */
+    remap_rte->address_lo.dontcare = 0;
+    remap_rte->address_lo.index_15 = index & 0x8000;
+    remap_rte->address_lo.index_0_14 = index & 0x7fff;
+    remap_rte->address_lo.SHV = 1;
+    remap_rte->address_lo.format = 1;
+
+    remap_rte->address_hi = 0;
+    remap_rte->data = 0;
+
+    memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
+    iommu_flush_iec_index(iommu, 0, index);
+    invalidate_sync(iommu);
+
+    unmap_vtd_domain_page(iremap_entries);
+    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
+    return;
+}
+
+void msi_msg_read_remap_rte(
+    struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+    struct pci_dev *pdev = msi_desc->dev;
+    struct acpi_drhd_unit *drhd = NULL;
+    struct iommu *iommu = NULL;
+    struct ir_ctrl *ir_ctrl;
+
+    drhd = acpi_find_matched_drhd_unit(pdev);
+    iommu = drhd->iommu;
+
+    ir_ctrl = iommu_ir_ctrl(iommu);
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
+        return;
+
+    remap_entry_to_msi_msg(iommu, msg);
+}
+
+void msi_msg_write_remap_rte(
+    struct msi_desc *msi_desc, struct msi_msg *msg)
+{
+    struct pci_dev *pdev = msi_desc->dev;
+    struct acpi_drhd_unit *drhd = NULL;
+    struct iommu *iommu = NULL;
+    struct ir_ctrl *ir_ctrl;
+
+    drhd = acpi_find_matched_drhd_unit(msi_desc->dev);
+    iommu = drhd->iommu;
+
+    ir_ctrl = iommu_ir_ctrl(iommu);
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
+        return;
+
+    msi_msg_to_remap_entry(iommu, pdev, msg);
 }
 
 int intremap_setup(struct iommu *iommu)
@@ -260,6 +439,7 @@ int intremap_setup(struct iommu *iommu)
                     "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
             return -ENODEV;
         }
+        ir_ctrl->iremap_index = -1;
     }
 
 #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Mon Jun 02 11:35:39 2008 +0900
@@ -112,28 +112,27 @@ struct iommu_flush *iommu_get_flush(stru
     return iommu ? &iommu->intel->flush : NULL;
 }
 
-unsigned int clflush_size;
-void clflush_cache_range(void *adr, int size)
+static unsigned int clflush_size;
+static int iommus_incoherent;
+static void __iommu_flush_cache(void *addr, int size)
 {
     int i;
+
+    if ( !iommus_incoherent )
+        return;
+
     for ( i = 0; i < size; i += clflush_size )
-        clflush(adr + i);
-}
-
-static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
-{
-    if ( !ecap_coherent(iommu->ecap) )
-        clflush_cache_range(addr, size);
-}
-
-void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
-{
-    __iommu_flush_cache(iommu, addr, 8);
-}
-
-void iommu_flush_cache_page(struct iommu *iommu, void *addr)
-{
-    __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
+        clflush((char *)addr + i);
+}
+
+void iommu_flush_cache_entry(void *addr)
+{
+    __iommu_flush_cache(addr, 8);
+}
+
+void iommu_flush_cache_page(void *addr)
+{
+    __iommu_flush_cache(addr, PAGE_SIZE_4K);
 }
 
 int nr_iommus;
@@ -157,7 +156,7 @@ static u64 bus_to_context_maddr(struct i
         }
         set_root_value(*root, maddr);
         set_root_present(*root);
-        iommu_flush_cache_entry(iommu, root);
+        iommu_flush_cache_entry(root);
     }
     maddr = (u64) get_context_addr(*root);
     unmap_vtd_domain_page(root_entries);
@@ -191,30 +190,22 @@ static int device_context_mapped(struct 
     return ret;
 }
 
-static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
+static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
     struct dma_pte *parent, *pte = NULL;
     int level = agaw_to_level(hd->agaw);
     int offset;
     unsigned long flags;
-    u64 pte_maddr = 0;
+    u64 pte_maddr = 0, maddr;
     u64 *vaddr = NULL;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
 
     addr &= (((u64)1) << addr_width) - 1;
     spin_lock_irqsave(&hd->mapping_lock, flags);
     if ( hd->pgd_maddr == 0 )
-    {
-        hd->pgd_maddr = alloc_pgtable_maddr();
-        if ( hd->pgd_maddr == 0 )
-            return 0;
-    }
+        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
+            goto out;
 
     parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
     while ( level > 1 )
@@ -224,7 +215,9 @@ static u64 addr_to_dma_page_maddr(struct
 
         if ( dma_pte_addr(*pte) == 0 )
         {
-            u64 maddr = alloc_pgtable_maddr();
+            if ( !alloc )
+                break;
+            maddr = alloc_pgtable_maddr();
             dma_set_pte_addr(*pte, maddr);
             vaddr = map_vtd_domain_page(maddr);
             if ( !vaddr )
@@ -236,7 +229,7 @@ static u64 addr_to_dma_page_maddr(struct
              */
             dma_set_pte_readable(*pte);
             dma_set_pte_writable(*pte);
-            iommu_flush_cache_entry(iommu, pte);
+            iommu_flush_cache_entry(pte);
         }
         else
         {
@@ -259,43 +252,9 @@ static u64 addr_to_dma_page_maddr(struct
     }
 
     unmap_vtd_domain_page(parent);
+ out:
     spin_unlock_irqrestore(&hd->mapping_lock, flags);
     return pte_maddr;
-}
-
-/* return address's page at specific level */
-static u64 dma_addr_level_page_maddr(
-    struct domain *domain, u64 addr, int level)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct dma_pte *parent, *pte = NULL;
-    int total = agaw_to_level(hd->agaw);
-    int offset;
-    u64 pg_maddr = hd->pgd_maddr;
-
-    if ( pg_maddr == 0 )
-        return 0;
-
-    parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
-    while ( level <= total )
-    {
-        offset = address_level_offset(addr, total);
-        pte = &parent[offset];
-        if ( dma_pte_addr(*pte) == 0 )
-            break;
-
-        pg_maddr = pte->val & PAGE_MASK_4K;
-        unmap_vtd_domain_page(parent);
-
-        if ( level == total )
-            return pg_maddr;
-
-        parent = map_vtd_domain_page(pte->val);
-        total--;
-    }
-
-    unmap_vtd_domain_page(parent);
-    return 0;
 }
 
 static void iommu_flush_write_buffer(struct iommu *iommu)
@@ -485,9 +444,12 @@ static int flush_iotlb_reg(void *_iommu,
     /* check IOTLB invalidation granularity */
     if ( DMA_TLB_IAIG(val) == 0 )
         printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
+
+#ifdef VTD_DEBUG
     if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
         printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
                (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
+#endif
     /* flush context entry will implictly flush write buffer */
     return 0;
 }
@@ -572,34 +534,36 @@ void iommu_flush_all(void)
 /* clear one page's page table */
 static void dma_pte_clear_one(struct domain *domain, u64 addr)
 {
+    struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-
     /* get last level pte */
-    pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
+    pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
     if ( pg_maddr == 0 )
         return;
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + address_level_offset(addr, 1);
-    if ( pte )
-    {
-        dma_clear_pte(*pte);
-        iommu_flush_cache_entry(drhd->iommu, pte);
-
-        for_each_drhd_unit ( drhd )
-        {
-            iommu = drhd->iommu;
-            if ( cap_caching_mode(iommu->cap) )
-                iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
-                                      addr, 1, 0);
-            else if (cap_rwbf(iommu->cap))
-                iommu_flush_write_buffer(iommu);
-        }
-    }
+
+    if ( !dma_pte_present(*pte) )
+    {
+        unmap_vtd_domain_page(page);
+        return;
+    }
+
+    dma_clear_pte(*pte); 
+    iommu_flush_cache_entry(pte);
+
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        if ( test_bit(iommu->index, &hd->iommu_bitmap) )
+            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+                                  addr, 1, 0);
+    }
+
     unmap_vtd_domain_page(page);
 }
 
@@ -626,7 +590,6 @@ static void iommu_free_next_pagetable(u6
 static void iommu_free_next_pagetable(u64 pt_maddr, unsigned long index,
                                       int level)
 {
-    struct acpi_drhd_unit *drhd;
     unsigned long next_index;
     struct dma_pte *pt_vaddr, *pde;
     int next_level;
@@ -636,50 +599,38 @@ static void iommu_free_next_pagetable(u6
 
     pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
     pde = &pt_vaddr[index];
-    if ( dma_pte_addr(*pde) != 0 )
-    {
-        next_level = level - 1;
-        if ( next_level > 1 )
-        {
-            next_index = 0;
-            do
-            {
-                iommu_free_next_pagetable(pde->val,
-                                          next_index, next_level);
-                next_index++;
-            } while ( next_index < PTE_NUM );
-        }
-
-        dma_clear_pte(*pde);
-        drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-        iommu_flush_cache_entry(drhd->iommu, pde);
-        free_pgtable_maddr(pde->val);
-        unmap_vtd_domain_page(pt_vaddr);
-    }
-    else
-        unmap_vtd_domain_page(pt_vaddr);
+    if ( dma_pte_addr(*pde) == 0 )
+        goto out;
+
+    next_level = level - 1;
+    if ( next_level > 1 )
+    {
+        for ( next_index = 0; next_index < PTE_NUM; next_index++ )
+            iommu_free_next_pagetable(pde->val, next_index, next_level);
+    }
+
+    dma_clear_pte(*pde);
+    iommu_flush_cache_entry(pde);
+    free_pgtable_maddr(pde->val);
+
+ out:
+    unmap_vtd_domain_page(pt_vaddr);
 }
 
 /* free all VT-d page tables when shut down or destroy domain. */
 static void iommu_free_pagetable(struct domain *domain)
 {
-    unsigned long index;
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    int total_level = agaw_to_level(hd->agaw);
-
-    if ( hd->pgd_maddr != 0 )
-    {
-        index = 0;
-        do
-        {
-            iommu_free_next_pagetable(hd->pgd_maddr,
-                                      index, total_level + 1);
-            index++;
-        } while ( index < PTE_NUM );
-
-        free_pgtable_maddr(hd->pgd_maddr);
-        hd->pgd_maddr = 0;
-    }
+    int i, total_level = agaw_to_level(hd->agaw);
+
+    if ( hd->pgd_maddr == 0 )
+        return;
+
+    for ( i = 0; i < PTE_NUM; i++ )
+        iommu_free_next_pagetable(hd->pgd_maddr, i, total_level + 1);
+
+    free_pgtable_maddr(hd->pgd_maddr);
+    hd->pgd_maddr = 0;
 }
 
 static int iommu_set_root_entry(struct iommu *iommu)
@@ -777,16 +728,17 @@ int iommu_disable_translation(struct iom
 
 static struct iommu *vector_to_iommu[NR_VECTORS];
 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
-                                   u8 fault_reason, u16 source_id, u32 addr)
+                                   u8 fault_reason, u16 source_id, u64 addr)
 {
     dprintk(XENLOG_WARNING VTDPREFIX,
-            "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
+            "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x "
+            "iommu->reg = %p\n",
             (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
             PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
             fault_reason, iommu->reg);
 
     if ( fault_reason < 0x20 )
-        print_vtd_entries(current->domain, iommu, (source_id >> 8),
+        print_vtd_entries(iommu, (source_id >> 8),
                           (source_id & 0xff), (addr >> PAGE_SHIFT));
 
     return 0;
@@ -844,7 +796,8 @@ static void iommu_page_fault(int vector,
     {
         u8 fault_reason;
         u16 source_id;
-        u32 guest_addr, data;
+        u32 data;
+        u64 guest_addr;
         int type;
 
         /* highest 32 bits */
@@ -998,6 +951,8 @@ static int iommu_alloc(struct acpi_drhd_
 static int iommu_alloc(struct acpi_drhd_unit *drhd)
 {
     struct iommu *iommu;
+    unsigned long sagaw;
+    int agaw;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
@@ -1020,10 +975,27 @@ static int iommu_alloc(struct acpi_drhd_
 
     set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
     iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
-    nr_iommus++;
+    iommu->index = nr_iommus++;
 
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
+
+    /* Calculate number of pagetable levels: between 2 and 4. */
+    sagaw = cap_sagaw(iommu->cap);
+    for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
+        if ( test_bit(agaw, &sagaw) )
+            break;
+    if ( agaw < 0 )
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                 "IOMMU: unsupported sagaw %lx\n", sagaw);
+        xfree(iommu);
+        return -ENODEV;
+    }
+    iommu->nr_pt_levels = agaw_to_level(agaw);
+
+    if ( !ecap_coherent(iommu->ecap) )
+        iommus_incoherent = 1;
 
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
@@ -1066,9 +1038,7 @@ static int intel_iommu_domain_init(struc
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct iommu *iommu = NULL;
-    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
-    int i, adjust_width, agaw;
-    unsigned long sagaw;
+    u64 i;
     struct acpi_drhd_unit *drhd;
 
     INIT_LIST_HEAD(&hd->pdev_list);
@@ -1076,28 +1046,25 @@ static int intel_iommu_domain_init(struc
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
 
-    /* Calculate AGAW. */
-    if ( guest_width > cap_mgaw(iommu->cap) )
-        guest_width = cap_mgaw(iommu->cap);
-    adjust_width = guestwidth_to_adjustwidth(guest_width);
-    agaw = width_to_agaw(adjust_width);
-    /* FIXME: hardware doesn't support it, choose a bigger one? */
-    sagaw = cap_sagaw(iommu->cap);
-    if ( !test_bit(agaw, &sagaw) )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "IOMMU: hardware doesn't support the agaw\n");
-        agaw = find_next_bit(&sagaw, 5, agaw);
-        if ( agaw >= 5 )
-            return -ENODEV;
-    }
-    hd->agaw = agaw;
+    hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
 
     if ( d->domain_id == 0 )
     {
-        /* Set up 1:1 page table for dom0. */
+        extern int xen_in_range(paddr_t start, paddr_t end);
+        extern int tboot_in_range(paddr_t start, paddr_t end);
+
+        /* 
+         * Set up 1:1 page table for dom0 except the critical segments
+         * like Xen and tboot.
+         */
         for ( i = 0; i < max_page; i++ )
+        {
+            if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ||
+                 tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) )
+                continue;
+
             iommu_map_page(d, i, i);
+        }
 
         setup_dom0_devices(d);
         setup_dom0_rmrr(d);
@@ -1123,7 +1090,8 @@ static int domain_context_mapping_one(
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct context_entry *context, *context_entries;
     unsigned long flags;
-    u64 maddr;
+    u64 maddr, pgd_maddr;
+    int agaw;
 
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
@@ -1136,38 +1104,64 @@ static int domain_context_mapping_one(
     }
 
     spin_lock_irqsave(&iommu->lock, flags);
+
+#ifdef CONTEXT_PASSTHRU
+    if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+    else
+    {
+#endif
+        /* Ensure we have pagetables allocated down to leaf PTE. */
+        if ( hd->pgd_maddr == 0 )
+        {
+            addr_to_dma_page_maddr(domain, 0, 1);
+            if ( hd->pgd_maddr == 0 )
+            {
+            nomem:
+                unmap_vtd_domain_page(context_entries);
+                spin_unlock_irqrestore(&iommu->lock, flags);
+                return -ENOMEM;
+            }
+        }
+
+        /* Skip top levels of page tables for 2- and 3-level DRHDs. */
+        pgd_maddr = hd->pgd_maddr;
+        for ( agaw = level_to_agaw(4);
+              agaw != level_to_agaw(iommu->nr_pt_levels);
+              agaw-- )
+        {
+            struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
+            pgd_maddr = dma_pte_addr(*p);
+            unmap_vtd_domain_page(p);
+            if ( pgd_maddr == 0 )
+                goto nomem;
+        }
+
+        context_set_address_root(*context, pgd_maddr);
+        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+#ifdef CONTEXT_PASSTHRU
+    }
+#endif
+
     /*
      * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
      * be 1 based as required by intel's iommu hw.
      */
     context_set_domain_id(context, domain);
-    context_set_address_width(*context, hd->agaw);
-
-    if ( ecap_pass_thru(iommu->ecap) )
-        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
-#ifdef CONTEXT_PASSTHRU
-    else
-    {
-#endif
-        ASSERT(hd->pgd_maddr != 0);
-        context_set_address_root(*context, hd->pgd_maddr);
-        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
-#ifdef CONTEXT_PASSTHRU
-    }
-#endif
-
+    context_set_address_width(*context, agaw);
     context_set_fault_enable(*context);
     context_set_present(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
 
     unmap_vtd_domain_page(context_entries);
 
-    if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
-                                    (((u16)bus) << 8) | devfn,
-                                    DMA_CCMD_MASK_NOBIT, 1) )
+    /* Context entry was previously non-present (with domid 0). */
+    iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
+                               DMA_CCMD_MASK_NOBIT, 1);
+    if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
         iommu_flush_write_buffer(iommu);
-    else
-        iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
+
+    set_bit(iommu->index, &hd->iommu_bitmap);
     spin_unlock_irqrestore(&iommu->lock, flags);
 
     return 0;
@@ -1314,7 +1308,7 @@ static int domain_context_unmap_one(
     spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
     context_clear_entry(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
     iommu_flush_context_global(iommu, 0);
     iommu_flush_iotlb_global(iommu, 0);
     unmap_vtd_domain_page(context_entries);
@@ -1395,11 +1389,12 @@ void reassign_device_ownership(
 {
     struct hvm_iommu *source_hd = domain_hvm_iommu(source);
     struct hvm_iommu *target_hd = domain_hvm_iommu(target);
-    struct pci_dev *pdev;
+    struct pci_dev *pdev, *pdev2;
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int status;
     unsigned long flags;
+    int found = 0;
 
     pdev_flr(bus, devfn);
 
@@ -1420,6 +1415,18 @@ void reassign_device_ownership(
     list_move(&pdev->list, &target_hd->pdev_list);
     spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
     spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
+
+    for_each_pdev ( source, pdev2 )
+    {
+        drhd = acpi_find_matched_drhd_unit(pdev2);
+        if ( drhd->iommu == iommu )
+        {
+            found = 1;
+            break;
+        }
+    }
+    if ( !found )
+        clear_bit(iommu->index, &source_hd->iommu_bitmap);
 
     status = domain_context_mapping(target, iommu, pdev);
     if ( status != 0 )
@@ -1477,13 +1484,12 @@ int intel_iommu_map_page(
 int intel_iommu_map_page(
     struct domain *d, unsigned long gfn, unsigned long mfn)
 {
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     struct dma_pte *page = NULL, *pte = NULL;
     u64 pg_maddr;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
+    int pte_present;
 
 #ifdef CONTEXT_PASSTHRU
     /* do nothing if dom0 and iommu supports pass thru */
@@ -1491,23 +1497,27 @@ int intel_iommu_map_page(
         return 0;
 #endif
 
-    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
+    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
     if ( pg_maddr == 0 )
         return -ENOMEM;
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     pte = page + (gfn & LEVEL_MASK);
+    pte_present = dma_pte_present(*pte);
     dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
-    iommu_flush_cache_entry(iommu, pte);
+    iommu_flush_cache_entry(pte);
     unmap_vtd_domain_page(page);
 
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
-        if ( cap_caching_mode(iommu->cap) )
-            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
-                                  (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
-        else if ( cap_rwbf(iommu->cap) )
+
+        if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
+            continue;
+
+        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
+                                   (paddr_t)gfn << PAGE_SHIFT_4K, 1,
+                                   !pte_present) )
             iommu_flush_write_buffer(iommu);
     }
 
@@ -1536,6 +1546,7 @@ int iommu_page_mapping(struct domain *do
 int iommu_page_mapping(struct domain *domain, paddr_t iova,
                        paddr_t hpa, size_t size, int prot)
 {
+    struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     u64 start_pfn, end_pfn;
@@ -1543,24 +1554,23 @@ int iommu_page_mapping(struct domain *do
     int index;
     u64 pg_maddr;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
     if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
         return -EINVAL;
+
     iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
     start_pfn = hpa >> PAGE_SHIFT_4K;
     end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
     index = 0;
     while ( start_pfn < end_pfn )
     {
-        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
+        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 
1);
         if ( pg_maddr == 0 )
             return -ENOMEM;
         page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
         pte = page + (start_pfn & LEVEL_MASK);
         dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
-        iommu_flush_cache_entry(iommu, pte);
+        iommu_flush_cache_entry(pte);
         unmap_vtd_domain_page(page);
         start_pfn++;
         index++;
@@ -1569,10 +1579,12 @@ int iommu_page_mapping(struct domain *do
     for_each_drhd_unit ( drhd )
     {
         iommu = drhd->iommu;
-        if ( cap_caching_mode(iommu->cap) )
-            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
-                                  iova, index, 0);
-        else if ( cap_rwbf(iommu->cap) )
+
+        if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
+            continue;
+
+        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+                                   iova, index, 1) )
             iommu_flush_write_buffer(iommu);
     }
 
@@ -1584,25 +1596,6 @@ int iommu_page_unmapping(struct domain *
     dma_pte_clear_range(domain, addr, addr + size);
 
     return 0;
-}
-
-void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu = NULL;
-    struct dma_pte *pte = (struct dma_pte *) p2m_entry;
-
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        if ( cap_caching_mode(iommu->cap) )
-            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
-                                  (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
-        else if ( cap_rwbf(iommu->cap) )
-            iommu_flush_write_buffer(iommu);
-    }
-
-    iommu_flush_cache_entry(iommu, pte);
 }
 
 static int iommu_prepare_rmrr_dev(
@@ -1916,6 +1909,7 @@ struct iommu_ops intel_iommu_ops = {
     .map_page = intel_iommu_map_page,
     .unmap_page = intel_iommu_unmap_page,
     .reassign_device = reassign_device_ownership,
+    .get_device_group_id = NULL,
 };
 
 /*
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.h       Mon Jun 02 11:35:39 2008 +0900
@@ -236,6 +236,7 @@ struct context_entry {
 #define LEVEL_STRIDE       (9)
 #define LEVEL_MASK         ((1 << LEVEL_STRIDE) - 1)
 #define PTE_NUM            (1 << LEVEL_STRIDE)
+#define level_to_agaw(val) ((val) - 2)
 #define agaw_to_level(val) ((val) + 2)
 #define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
 #define width_to_agaw(w)   ((w - 30)/LEVEL_STRIDE)
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/utils.c       Mon Jun 02 11:35:39 2008 +0900
@@ -213,109 +213,97 @@ u32 get_level_index(unsigned long gmfn, 
     return gmfn & LEVEL_MASK;
 }
 
-void print_vtd_entries(
-    struct domain *d,
-    struct iommu *iommu,
-    int bus, int devfn,
-    unsigned long gmfn)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(d);
-    struct acpi_drhd_unit *drhd;
+void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn)
+{
     struct context_entry *ctxt_entry;
     struct root_entry *root_entry;
     struct dma_pte pte;
     u64 *l;
-    u32 l_index;
-    u32 i = 0;
-    int level = agaw_to_level(hd->agaw);
-
-    printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
-           d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
-
-    if ( hd->pgd_maddr == 0 )
-    {
-        printk("    hd->pgd_maddr == 0\n");
-        return;
-    }
-    printk("    hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr);
-
-    for_each_drhd_unit ( drhd )
-    {
-        printk("---- print_vtd_entries %d ----\n", i++);
-
-        if ( iommu->root_maddr == 0 )
-        {
-            printk("    iommu->root_maddr = 0\n");
-            continue;
-        }
-
-        root_entry =
-            (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+    u32 l_index, level;
+
+    printk("print_vtd_entries: iommu = %p bdf = %x:%x:%x gmfn = %"PRIx64"\n",
+           iommu, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
+
+    if ( iommu->root_maddr == 0 )
+    {
+        printk("    iommu->root_maddr = 0\n");
+        return;
+    }
+
+    root_entry = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
  
-        printk("    root_entry = %p\n", root_entry);
-        printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
-        if ( !root_present(root_entry[bus]) )
-        {
-            unmap_vtd_domain_page(root_entry);
-            printk("    root_entry[%x] not present\n", bus);
-            continue;
-        }
-
-        ctxt_entry =
-            (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
-        if ( ctxt_entry == NULL )
-        {
-            unmap_vtd_domain_page(root_entry);
-            printk("    ctxt_entry == NULL\n");
-            continue;
-        }
-
-        printk("    context = %p\n", ctxt_entry);
-        printk("    context[%x] = %"PRIx64" %"PRIx64"\n",
-               devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
-        if ( !context_present(ctxt_entry[devfn]) )
+    printk("    root_entry = %p\n", root_entry);
+    printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
+    if ( !root_present(root_entry[bus]) )
+    {
+        unmap_vtd_domain_page(root_entry);
+        printk("    root_entry[%x] not present\n", bus);
+        return;
+    }
+
+    ctxt_entry =
+        (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
+    if ( ctxt_entry == NULL )
+    {
+        unmap_vtd_domain_page(root_entry);
+        printk("    ctxt_entry == NULL\n");
+        return;
+    }
+
+    printk("    context = %p\n", ctxt_entry);
+    printk("    context[%x] = %"PRIx64"_%"PRIx64"\n",
+           devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
+    if ( !context_present(ctxt_entry[devfn]) )
+    {
+        unmap_vtd_domain_page(ctxt_entry);
+        unmap_vtd_domain_page(root_entry);
+        printk("    ctxt_entry[%x] not present\n", devfn);
+        return;
+    }
+
+    level = agaw_to_level(context_address_width(ctxt_entry[devfn]));
+    if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
+         level != VTD_PAGE_TABLE_LEVEL_4)
+    {
+        unmap_vtd_domain_page(ctxt_entry);
+        unmap_vtd_domain_page(root_entry);
+        printk("Unsupported VTD page table level (%d)!\n", level);
+    }
+
+    l = maddr_to_virt(ctxt_entry[devfn].lo);
+    do
+    {
+        l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
+        printk("    l%d = %p\n", level, l);
+        if ( l == NULL )
         {
             unmap_vtd_domain_page(ctxt_entry);
             unmap_vtd_domain_page(root_entry);
-            printk("    ctxt_entry[%x] not present\n", devfn);
-            continue;
-        }
-
-        if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
-             level != VTD_PAGE_TABLE_LEVEL_4)
+            printk("    l%d == NULL\n", level);
+            break;
+        }
+        l_index = get_level_index(gmfn, level);
+        printk("    l%d_index = %x\n", level, l_index);
+        printk("    l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]);
+
+        pte.val = l[l_index];
+        if ( !dma_pte_present(pte) )
         {
             unmap_vtd_domain_page(ctxt_entry);
             unmap_vtd_domain_page(root_entry);
-            printk("Unsupported VTD page table level (%d)!\n", level);
-            continue;
-        }
-
-        l = maddr_to_virt(ctxt_entry[devfn].lo);
-        do
-        {
-            l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
-            printk("    l%d = %p\n", level, l);
-            if ( l == NULL )
-            {
-                unmap_vtd_domain_page(ctxt_entry);
-                unmap_vtd_domain_page(root_entry);
-                printk("    l%d == NULL\n", level);
-                break;
-            }
-            l_index = get_level_index(gmfn, level);
-            printk("    l%d_index = %x\n", level, l_index);
-            printk("    l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]);
-
-            pte.val = l[l_index];
-            if ( !dma_pte_present(pte) )
-            {
-                unmap_vtd_domain_page(ctxt_entry);
-                unmap_vtd_domain_page(root_entry);
-                printk("    l%d[%x] not present\n", level, l_index);
-                break;
-            }
-
-            l = maddr_to_virt(l[l_index]);
-        } while ( --level );
-    }
-}
+            printk("    l%d[%x] not present\n", level, l_index);
+            break;
+        }
+
+        l = maddr_to_virt(l[l_index]);
+    } while ( --level );
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/vtd.h Mon Jun 02 11:35:39 2008 +0900
@@ -42,13 +42,31 @@ struct IO_APIC_route_remap_entry {
     };
 };
 
+struct msi_msg_remap_entry {
+    union {
+        u32 val;
+        struct {
+            u32 dontcare:2,
+                index_15:1,
+                SHV:1,
+                format:1,
+                index_0_14:15,
+                addr_id_val:12; /* Interrupt address identifier value,
+                                   must be 0FEEh */
+        };
+    } address_lo;   /* low 32 bits of msi message address */
+
+    u32        address_hi;     /* high 32 bits of msi message address */
+    u32        data;           /* msi message data */
+};
+
 unsigned int get_clflush_size(void);
 u64 alloc_pgtable_maddr(void);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
 void unmap_vtd_domain_page(void *va);
 
-void iommu_flush_cache_entry(struct iommu *iommu, void *addr);
-void iommu_flush_cache_page(struct iommu *iommu, void *addr);
+void iommu_flush_cache_entry(void *addr);
+void iommu_flush_cache_page(void *addr);
 
 #endif // _VTD_H_
diff -r d2a239224cb2 -r f1508348ffab xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Mon Jun 02 11:35:39 2008 +0900
@@ -41,8 +41,6 @@ u64 alloc_pgtable_maddr(void)
 {
     struct page_info *pg;
     u64 *vaddr;
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
 
     pg = alloc_domheap_page(NULL, 0);
     vaddr = map_domain_page(page_to_mfn(pg));
@@ -50,9 +48,7 @@ u64 alloc_pgtable_maddr(void)
         return 0;
     memset(vaddr, 0, PAGE_SIZE);
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
-    iommu_flush_cache_page(iommu, vaddr);
+    iommu_flush_cache_page(vaddr);
     unmap_domain_page(vaddr);
 
     return page_to_maddr(pg);
@@ -123,181 +119,3 @@ void hvm_dpci_isairq_eoi(struct domain *
         }
     }
 }
-
-void iommu_set_pgd(struct domain *d)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    unsigned long p2m_table;
-
-    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
-
-    if ( paging_mode_hap(d) )
-    {
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *dpte = NULL;
-        mfn_t pgd_mfn;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            dpte = map_domain_page(p2m_table);
-            if ( !dma_pte_present(*dpte) )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(dpte);
-                return;
-            }
-            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            unmap_domain_page(dpte);
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-    }
-    else
-    {
-#if CONFIG_PAGING_LEVELS == 3
-        struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL;
-        int i;
-        u64 pmd_maddr;
-        unsigned long flags;
-        l3_pgentry_t *l3e;
-        int level = agaw_to_level(hd->agaw);
-
-        spin_lock_irqsave(&hd->mapping_lock, flags);
-        hd->pgd_maddr = alloc_pgtable_maddr();
-        if ( hd->pgd_maddr == 0 )
-        {
-            spin_unlock_irqrestore(&hd->mapping_lock, flags);
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "Allocate pgd memory failed!\n");
-            return;
-        }
-
-        pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr);
-        l3e = map_domain_page(p2m_table);
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
-            /* We only support 8 entries for the PAE L3 p2m table */
-            for ( i = 0; i < 8 ; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-            break;
-
-        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
-            /* We allocate one more page for the top vtd page table. */
-            pmd_maddr = alloc_pgtable_maddr();
-            if ( pmd_maddr == 0 )
-            {
-                unmap_vtd_domain_page(pgd_vaddr);
-                unmap_domain_page(l3e);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "Allocate pmd memory failed!\n");
-                return;
-            }
-
-            pte = &pgd_vaddr[0];
-            dma_set_pte_addr(*pte, pmd_maddr);
-            dma_set_pte_readable(*pte);
-            dma_set_pte_writable(*pte);
-
-            pmd_vaddr = map_vtd_domain_page(pmd_maddr);
-            for ( i = 0; i < 8; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-
-            unmap_vtd_domain_page(pmd_vaddr);
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-
-        unmap_vtd_domain_page(pgd_vaddr);
-        unmap_domain_page(l3e);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
-
-#elif CONFIG_PAGING_LEVELS == 4
-        mfn_t pgd_mfn;
-        l3_pgentry_t *l3e;
-        int level = agaw_to_level(hd->agaw);
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            l3e = map_domain_page(p2m_table);
-            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(l3e);
-                return;
-            }
-
-            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            unmap_domain_page(l3e);
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-#endif
-    }
-}
-
-void iommu_free_pgd(struct domain *d)
-{
-#if CONFIG_PAGING_LEVELS == 3
-    struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    int level = agaw_to_level(hd->agaw);
-    struct dma_pte *pgd_vaddr = NULL;
-
-    switch ( level )
-    {
-    case VTD_PAGE_TABLE_LEVEL_3:
-        if ( hd->pgd_maddr != 0 )
-        {
-            free_pgtable_maddr(hd->pgd_maddr);
-            hd->pgd_maddr = 0;
-        }
-        break;
-    case VTD_PAGE_TABLE_LEVEL_4:
-        if ( hd->pgd_maddr != 0 )
-        {
-            pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr);
-            if ( pgd_vaddr[0].val != 0 )
-                free_pgtable_maddr(pgd_vaddr[0].val);
-            unmap_vtd_domain_page(pgd_vaddr);
-            free_pgtable_maddr(hd->pgd_maddr);
-            hd->pgd_maddr = 0;
-        }
-        break;
-    default:
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "Unsupported p2m table sharing level!\n");
-        break;
-    }
-#endif
-}
-
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/hvm.h     Mon Jun 02 11:35:39 2008 +0900
@@ -147,8 +147,10 @@ void hvm_send_assist_req(struct vcpu *v)
 
 void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc);
 u64 hvm_get_guest_tsc(struct vcpu *v);
-#define hvm_set_guest_time(vcpu, gtime) hvm_set_guest_tsc(vcpu, gtime)
-#define hvm_get_guest_time(vcpu)        hvm_get_guest_tsc(vcpu)
+
+void hvm_init_guest_time(struct domain *d);
+void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
+u64 hvm_get_guest_time(struct vcpu *v);
 
 #define hvm_paging_enabled(v) \
     (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG))
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/vcpu.h    Mon Jun 02 11:35:39 2008 +0900
@@ -68,6 +68,9 @@ struct hvm_vcpu {
     struct mtrr_state   mtrr;
     u64                 pat_cr;
 
+    /* In mode delay_for_missed_ticks, VCPUs have differing guest times. */
+    int64_t             stime_offset;
+
     /* Which cache mode is this VCPU in (CR0:CD/NW)? */
     u8                  cache_mode;
 
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jun 02 11:35:39 2008 +0900
@@ -49,7 +49,6 @@ void vmx_asm_do_vmentry(void);
 void vmx_asm_do_vmentry(void);
 void vmx_intr_assist(void);
 void vmx_do_resume(struct vcpu *);
-void set_guest_time(struct vcpu *v, u64 gtime);
 void vmx_vlapic_msr_changed(struct vcpu *v);
 void vmx_realmode(struct cpu_user_regs *regs);
 
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h     Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/hvm/vpt.h     Mon Jun 02 11:35:39 2008 +0900
@@ -57,7 +57,7 @@ typedef struct HPETState {
 typedef struct HPETState {
     struct hpet_registers hpet;
     struct vcpu *vcpu;
-    uint64_t tsc_freq;
+    uint64_t stime_freq;
     uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
     uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns      */
     uint64_t mc_offset;
@@ -137,6 +137,11 @@ struct pl_time {    /* platform time */
     struct RTCState  vrtc;
     struct HPETState vhpet;
     struct PMTState  vpmt;
+    /* guest_time = Xen sys time + stime_offset */
+    int64_t stime_offset;
+    /* Ensures monotonicity in appropriate timer modes. */
+    uint64_t last_guest_time;
+    spinlock_t pl_time_lock;
 };
 
 #define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency)
diff -r d2a239224cb2 -r f1508348ffab xen/include/asm-x86/tboot.h
--- a/xen/include/asm-x86/tboot.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/asm-x86/tboot.h       Mon Jun 02 11:35:39 2008 +0900
@@ -46,7 +46,15 @@ typedef struct __attribute__ ((__packed_
 } uuid_t;
 
 /* used to communicate between tboot and the launched kernel (i.e. Xen) */
-#define MAX_TB_ACPI_SINFO_SIZE   64
+
+typedef struct __attribute__ ((__packed__)) {
+    uint16_t pm1a_cnt;
+    uint16_t pm1b_cnt;
+    uint16_t pm1a_evt;
+    uint16_t pm1b_evt;
+    uint16_t pm1a_cnt_val;
+    uint16_t pm1b_cnt_val;
+} tboot_acpi_sleep_info;
 
 typedef struct __attribute__ ((__packed__)) {
     /* version 0x01+ fields: */
@@ -58,8 +66,9 @@ typedef struct __attribute__ ((__packed_
     uint32_t  shutdown_type;     /* type of shutdown (TB_SHUTDOWN_*) */
     uint32_t  s3_tb_wakeup_entry;/* entry point for tboot s3 wake up */
     uint32_t  s3_k_wakeup_entry; /* entry point for xen s3 wake up */
-    uint8_t   acpi_sinfo[MAX_TB_ACPI_SINFO_SIZE];
-                                 /* where kernel put acpi sleep info in Sx */
+    tboot_acpi_sleep_info
+              acpi_sinfo;        /* where kernel put acpi sleep info in Sx */
+    uint8_t   reserved[52];      /* this pad is for compat with old field */
     /* version 0x02+ fields: */
     uint32_t  tboot_base;        /* starting addr for tboot */
     uint32_t  tboot_size;        /* size of tboot */
diff -r d2a239224cb2 -r f1508348ffab xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/public/domctl.h       Mon Jun 02 11:35:39 2008 +0900
@@ -448,6 +448,16 @@ typedef struct xen_domctl_assign_device 
 typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
 
+/* Retrieve sibling devices infomation of machine_bdf */
+#define XEN_DOMCTL_get_device_group 50
+struct xen_domctl_get_device_group {
+    uint32_t  machine_bdf;      /* IN */
+    uint32_t  max_sdevs;        /* IN */
+    uint32_t  num_sdevs;        /* OUT */
+    XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */
+};
+typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
 
 /* Pass-through interrupts: bind real irq -> hvm devfn. */
 #define XEN_DOMCTL_bind_pt_irq       38
@@ -619,6 +629,7 @@ struct xen_domctl {
         struct xen_domctl_hvmcontext        hvmcontext;
         struct xen_domctl_address_size      address_size;
         struct xen_domctl_sendtrigger       sendtrigger;
+        struct xen_domctl_get_device_group  get_device_group;
         struct xen_domctl_assign_device     assign_device;
         struct xen_domctl_bind_pt_irq       bind_pt_irq;
         struct xen_domctl_memory_mapping    memory_mapping;
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/elfcore.h
--- a/xen/include/xen/elfcore.h Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/elfcore.h Mon Jun 02 11:35:39 2008 +0900
@@ -66,6 +66,7 @@ typedef struct {
     unsigned long xen_compile_time;
     unsigned long tainted;
 #if defined(__i386__) || defined(__x86_64__)
+    unsigned long xen_phys_start;
     unsigned long dom0_pfn_to_mfn_frame_list_list;
 #endif
 #if defined(__ia64__)
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/hvm/iommu.h
--- a/xen/include/xen/hvm/iommu.h       Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/hvm/iommu.h       Mon Jun 02 11:35:39 2008 +0900
@@ -43,6 +43,7 @@ struct hvm_iommu {
     int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
     struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
     domid_t iommu_domid;           /* domain id stored in iommu */
+    u64 iommu_bitmap;              /* bitmap of iommu(s) that the domain uses 
*/
 
     /* amd iommu support */
     int domain_id;
@@ -54,4 +55,7 @@ struct hvm_iommu {
     struct iommu_ops *platform_ops;
 };
 
+#define has_iommu_pdevs(domain) \
+    (!list_empty(&(domain->arch.hvm_domain.hvm_iommu.pdev_list)))
+
 #endif /* __ASM_X86_HVM_IOMMU_H__ */
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/iommu.h   Mon Jun 02 11:35:39 2008 +0900
@@ -29,6 +29,7 @@
 
 extern int vtd_enabled;
 extern int iommu_enabled;
+extern int iommu_pv_enabled;
 
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
@@ -43,7 +44,9 @@ struct iommu {
 struct iommu {
     struct list_head list;
     void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+    u32        index;         /* Sequence number of iommu */
     u32        gcmd;          /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+    u32 nr_pt_levels;
     u64        cap;
     u64        ecap;
     spinlock_t lock; /* protect context, domain ids */
@@ -58,14 +61,13 @@ int device_assigned(u8 bus, u8 devfn);
 int device_assigned(u8 bus, u8 devfn);
 int assign_device(struct domain *d, u8 bus, u8 devfn);
 void deassign_device(struct domain *d, u8 bus, u8 devfn);
+int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
+    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
 void reassign_device_ownership(struct domain *source,
                                struct domain *target,
                                u8 bus, u8 devfn);
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
-void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry);
-void iommu_set_pgd(struct domain *d);
-void iommu_free_pgd(struct domain *d);
 void iommu_domain_teardown(struct domain *d);
 int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
 int dpci_ioport_intercept(ioreq_t *p);
@@ -76,6 +78,11 @@ unsigned int io_apic_read_remap_rte(unsi
 unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg);
 void io_apic_write_remap_rte(unsigned int apic,
                              unsigned int reg, unsigned int value);
+
+struct msi_desc;
+struct msi_msg;
+void msi_msg_read_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg);
+void msi_msg_write_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg);
 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu);
 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu);
 struct iommu_flush *iommu_get_flush(struct iommu *iommu);
@@ -94,6 +101,7 @@ struct iommu_ops {
     int (*unmap_page)(struct domain *d, unsigned long gfn);
     void (*reassign_device)(struct domain *s, struct domain *t,
                             u8 bus, u8 devfn);
+    int (*get_device_group_id)(u8 bus, u8 devfn);
 };
 
 #endif /* _IOMMU_H_ */
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/sched.h   Mon Jun 02 11:35:39 2008 +0900
@@ -186,6 +186,8 @@ struct domain
 
     /* Is this an HVM guest? */
     bool_t           is_hvm;
+    /* Does this guest need iommu mappings? */
+    bool_t           need_iommu;
     /* Is this guest fully privileged (aka dom0)? */
     bool_t           is_privileged;
     /* Which guest this guest has privileges on */
@@ -515,6 +517,7 @@ static inline void vcpu_unblock(struct v
 
 #define is_hvm_domain(d) ((d)->is_hvm)
 #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
+#define need_iommu(d)    ((d)->need_iommu && !(d)->is_hvm)
 
 extern enum cpufreq_controller {
     FREQCTL_none, FREQCTL_dom0_kernel
diff -r d2a239224cb2 -r f1508348ffab xen/include/xen/time.h
--- a/xen/include/xen/time.h    Mon Jun 02 11:35:02 2008 +0900
+++ b/xen/include/xen/time.h    Mon Jun 02 11:35:39 2008 +0900
@@ -47,6 +47,7 @@ struct tm {
 };
 struct tm gmtime(unsigned long t);
 
+#define SYSTEM_TIME_HZ  1000000000ULL
 #define NOW()           ((s_time_t)get_s_time())
 #define SECONDS(_s)     ((s_time_t)((_s)  * 1000000000ULL))
 #define MILLISECS(_ms)  ((s_time_t)((_ms) * 1000000ULL))

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>