WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 20 May 2008 08:30:16 -0700
Delivery-date: Tue, 20 May 2008 08:30:45 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1209067709 21600
# Node ID dc510776dd598f3f479af749865bec225e32634d
# Parent  239b44eeb2d6d235ddee581b6e89398c80278a2f
# Parent  97da69831384f0819caeeb8b8bdff0f942b2d690
merge with xen-unstable.hg
---
 xen/arch/x86/hvm/svm/x86_32/Makefile          |    1 
 xen/arch/x86/hvm/svm/x86_32/exits.S           |  131 -------
 xen/arch/x86/hvm/svm/x86_64/Makefile          |    1 
 xen/arch/x86/hvm/svm/x86_64/exits.S           |  148 --------
 xen/arch/x86/hvm/vmx/x86_32/Makefile          |    1 
 xen/arch/x86/hvm/vmx/x86_32/exits.S           |  148 --------
 xen/arch/x86/hvm/vmx/x86_64/Makefile          |    1 
 xen/arch/x86/hvm/vmx/x86_64/exits.S           |  165 ---------
 .hgignore                                     |    1 
 Makefile                                      |    7 
 docs/misc/vtd.txt                             |    2 
 docs/src/user.tex                             |    7 
 docs/xen-api/revision-history.tex             |   10 
 docs/xen-api/xenapi-coversheet.tex            |    2 
 docs/xen-api/xenapi-datamodel.tex             |  180 ++++++++--
 extras/mini-os/Makefile                       |   29 +
 extras/mini-os/app.lds                        |   11 
 extras/mini-os/arch/ia64/minios-ia64.lds      |    5 
 extras/mini-os/arch/x86/minios-x86_32.lds     |    1 
 extras/mini-os/arch/x86/minios-x86_64.lds     |    1 
 extras/mini-os/arch/x86/mm.c                  |    7 
 extras/mini-os/fbfront.c                      |   98 +++--
 extras/mini-os/hypervisor.c                   |   15 
 extras/mini-os/include/fbfront.h              |    3 
 extras/mini-os/include/hypervisor.h           |    4 
 extras/mini-os/include/ia64/arch_mm.h         |    2 
 extras/mini-os/include/ia64/os.h              |   11 
 extras/mini-os/include/lib.h                  |    1 
 extras/mini-os/include/mm.h                   |    2 
 extras/mini-os/include/x86/os.h               |    1 
 extras/mini-os/kernel.c                       |   15 
 extras/mini-os/lib/sys.c                      |   35 ++
 extras/mini-os/main.c                         |    2 
 extras/mini-os/mm.c                           |   12 
 extras/mini-os/sched.c                        |  136 ++------
 tools/blktap/drivers/blktapctrl.c             |   83 ++++
 tools/blktap/drivers/tapdisk.h                |    2 
 tools/console/daemon/io.c                     |   18 -
 tools/console/daemon/main.c                   |   13 
 tools/examples/blktap                         |   22 -
 tools/firmware/hvmloader/Makefile             |    5 
 tools/firmware/hvmloader/acpi/build.c         |    4 
 tools/firmware/hvmloader/cacheattr.c          |   99 +++++
 tools/firmware/hvmloader/config.h             |    3 
 tools/firmware/hvmloader/hvmloader.c          |    3 
 tools/firmware/hvmloader/smp.c                |    9 
 tools/ioemu/Makefile                          |    2 
 tools/ioemu/hw/cirrus_vga.c                   |    4 
 tools/ioemu/hw/pci.c                          |   16 
 tools/ioemu/hw/vga.c                          |    6 
 tools/ioemu/hw/xen_blktap.c                   |   45 --
 tools/ioemu/hw/xenfb.c                        |  153 ++++++---
 tools/ioemu/tapdisk-ioemu.c                   |   14 
 tools/ioemu/target-i386-dm/helper2.c          |    2 
 tools/ioemu/vl.c                              |   17 -
 tools/ioemu/vl.h                              |    1 
 tools/libfsimage/ext2fs/fsys_ext2fs.c         |   61 +++
 tools/libxc/xc_hvm_build.c                    |    2 
 tools/python/xen/util/acmpolicy.py            |   27 +
 tools/python/xen/xend/XendDomain.py           |   26 +
 tools/python/xen/xend/XendDomainInfo.py       |   31 +
 tools/python/xen/xend/XendXSPolicyAdmin.py    |    1 
 tools/python/xen/xend/image.py                |   46 ++
 tools/python/xen/xm/main.py                   |   11 
 tools/tests/test_x86_emulator.c               |   48 --
 tools/tests/x86_emulate.c                     |    6 
 tools/xenmon/xenbaked.c                       |   32 +
 xen/Makefile                                  |   12 
 xen/arch/x86/Makefile                         |    2 
 xen/arch/x86/bitops.c                         |   32 +
 xen/arch/x86/cpu/mtrr/main.c                  |    7 
 xen/arch/x86/domain.c                         |   11 
 xen/arch/x86/domain_build.c                   |    4 
 xen/arch/x86/hvm/emulate.c                    |  133 +++++++-
 xen/arch/x86/hvm/hvm.c                        |  152 ++++++++-
 xen/arch/x86/hvm/mtrr.c                       |  312 ++++--------------
 xen/arch/x86/hvm/svm/Makefile                 |    4 
 xen/arch/x86/hvm/svm/entry.S                  |  178 ++++++++++
 xen/arch/x86/hvm/svm/intr.c                   |    6 
 xen/arch/x86/hvm/svm/svm.c                    |   40 --
 xen/arch/x86/hvm/vmx/Makefile                 |    4 
 xen/arch/x86/hvm/vmx/entry.S                  |  198 +++++++++++
 xen/arch/x86/hvm/vmx/intr.c                   |    6 
 xen/arch/x86/hvm/vmx/vmx.c                    |  101 ------
 xen/arch/x86/mm.c                             |   35 --
 xen/arch/x86/mm/shadow/common.c               |   94 ++---
 xen/arch/x86/mm/shadow/multi.c                |   13 
 xen/arch/x86/setup.c                          |    4 
 xen/arch/x86/smp.c                            |   27 -
 xen/arch/x86/traps.c                          |   41 +-
 xen/arch/x86/x86_emulate.c                    |   18 -
 xen/arch/x86/x86_emulate/x86_emulate.c        |  282 +++++++++++-----
 xen/arch/x86/x86_emulate/x86_emulate.h        |   54 +--
 xen/common/trace.c                            |   11 
 xen/common/xencomm.c                          |    2 
 xen/drivers/char/console.c                    |    2 
 xen/drivers/char/serial.c                     |   53 ++-
 xen/drivers/passthrough/amd/iommu_acpi.c      |  214 +++++-------
 xen/drivers/passthrough/amd/iommu_detect.c    |   62 +++
 xen/drivers/passthrough/amd/iommu_init.c      |  306 ++++++++++++++++++
 xen/drivers/passthrough/amd/iommu_map.c       |   21 -
 xen/drivers/passthrough/amd/pci_amd_iommu.c   |  210 +++++-------
 xen/drivers/passthrough/iommu.c               |   30 +
 xen/drivers/passthrough/vtd/dmar.c            |   58 ---
 xen/drivers/passthrough/vtd/iommu.c           |  432 ++++++++------------------
 xen/drivers/passthrough/vtd/utils.c           |    2 
 xen/drivers/passthrough/vtd/x86/vtd.c         |   16 
 xen/include/asm-x86/amd-iommu.h               |    6 
 xen/include/asm-x86/bitops.h                  |   52 +--
 xen/include/asm-x86/hvm/hvm.h                 |    2 
 xen/include/asm-x86/hvm/support.h             |    2 
 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h  |   37 ++
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |   19 -
 xen/include/asm-x86/hvm/vcpu.h                |   11 
 xen/include/asm-x86/mtrr.h                    |    8 
 xen/include/asm-x86/paging.h                  |    2 
 xen/include/public/xsm/acm.h                  |    7 
 xen/include/xen/iommu.h                       |    4 
 xen/include/xen/serial.h                      |    8 
 xen/include/xsm/acm/acm_core.h                |    1 
 xen/tools/Makefile                            |    4 
 xen/tools/figlet/figlet.c                     |   24 -
 xen/xsm/acm/acm_policy.c                      |    8 
 123 files changed, 2967 insertions(+), 2425 deletions(-)

diff -r 239b44eeb2d6 -r dc510776dd59 .hgignore
--- a/.hgignore Thu Apr 24 14:02:16 2008 -0600
+++ b/.hgignore Thu Apr 24 14:08:29 2008 -0600
@@ -243,6 +243,7 @@
 ^tools/xm-test/lib/XmTestLib/config.py$
 ^tools/xm-test/lib/XmTestReport/xmtest.py$
 ^tools/xm-test/tests/.*\.test$
+^xen/\.banner.*$
 ^xen/BLOG$
 ^xen/System.map$
 ^xen/TAGS$
diff -r 239b44eeb2d6 -r dc510776dd59 Makefile
--- a/Makefile  Thu Apr 24 14:02:16 2008 -0600
+++ b/Makefile  Thu Apr 24 14:08:29 2008 -0600
@@ -121,6 +121,13 @@ distclean:
 # Linux name for GNU distclean
 .PHONY: mrproper
 mrproper: distclean
+
+# Prepare for source tarball
+.PHONY: src-tarball
+src-tarball: distclean
+       $(MAKE) -C xen .banner
+       rm -rf xen/tools/figlet .[a-z]*
+       $(MAKE) -C xen distclean
 
 .PHONY: help
 help:
diff -r 239b44eeb2d6 -r dc510776dd59 docs/misc/vtd.txt
--- a/docs/misc/vtd.txt Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/misc/vtd.txt Thu Apr 24 14:08:29 2008 -0600
@@ -21,7 +21,7 @@ 11) "hide" pci device from dom0 as follo
 
 title Xen-Fedora Core (2.6.18-xen)
         root (hd0,0)
-        kernel /boot/xen.gz com1=115200,8n1 console=com1 vtd=1
+        kernel /boot/xen.gz com1=115200,8n1 console=com1
         module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro console=tty0 
console=ttyS0,115200,8n1 pciback.hide=(01:00.0)(03:00.0) 
pciback.verbose_request=1 apic=debug
         module /boot/initrd-2.6.18-xen.img
 
diff -r 239b44eeb2d6 -r dc510776dd59 docs/src/user.tex
--- a/docs/src/user.tex Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/src/user.tex Thu Apr 24 14:08:29 2008 -0600
@@ -4088,6 +4088,8 @@ editing \path{grub.conf}.
   a list of pages not to be allocated for use because they contain bad
   bytes. For example, if your memory tester says that byte 0x12345678
   is bad, you would place `badpage=0x12345' on Xen's command line.
+\item [ serial\_tx\_buffer=$<$size$>$ ] Size of serial transmit
+  buffers. Default is 16kB.
 \item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
   com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\
   Xen supports up to two 16550-compatible serial ports.  For example:
@@ -4239,10 +4241,11 @@ In addition to the standard Linux kernel
     \begin{tabular}{l}
       `xencons=off': disable virtual console \\
       `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
-      `xencons=ttyS': attach console to /dev/ttyS0
+      `xencons=ttyS': attach console to /dev/ttyS0 \\
+      `xencons=xvc': attach console to /dev/xvc0
     \end{tabular}
 \end{center}
-The default is ttyS for dom0 and tty for all other domains.
+The default is ttyS for dom0 and xvc for all other domains.
 \end{description}
 
 
diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/revision-history.tex
--- a/docs/xen-api/revision-history.tex Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/xen-api/revision-history.tex Thu Apr 24 14:08:29 2008 -0600
@@ -37,5 +37,15 @@
     \end{flushleft}
    \end{minipage}\\
   \hline
+  1.0.5 & 17th Apr. 08 & S. Berger &
+   \begin{minipage}[t]{7cm}
+    \begin{flushleft}
+     Added undocumented fields and methods for default\_netmask and
+     default\_gateway to the Network class. Removed an unimplemented
+     method from the XSPolicy class and removed the 'optional' from
+     'oldlabel' parameters.
+    \end{flushleft}
+   \end{minipage}\\
+  \hline
  \end{tabular}
 \end{center}
diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/xenapi-coversheet.tex
--- a/docs/xen-api/xenapi-coversheet.tex        Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/xen-api/xenapi-coversheet.tex        Thu Apr 24 14:08:29 2008 -0600
@@ -22,7 +22,7 @@
 \newcommand{\releasestatement}{Stable Release}
 
 %% Document revision
-\newcommand{\revstring}{API Revision 1.0.4}
+\newcommand{\revstring}{API Revision 1.0.5}
 
 %% Document authors
 \newcommand{\docauthors}{
diff -r 239b44eeb2d6 -r dc510776dd59 docs/xen-api/xenapi-datamodel.tex
--- a/docs/xen-api/xenapi-datamodel.tex Thu Apr 24 14:02:16 2008 -0600
+++ b/docs/xen-api/xenapi-datamodel.tex Thu Apr 24 14:08:29 2008 -0600
@@ -4467,7 +4467,7 @@ security_label, string old_label)\end{ve
 {\bf type} & {\bf name} & {\bf description} \\ \hline
 {\tt VM ref } & self & reference to the object \\ \hline
 {\tt string } & security\_label & security label for the VM \\ \hline
-{\tt string } & old\_label & Optional label value that the security label \\
+{\tt string } & old\_label & Label value that the security label \\
 & & must currently have for the change to succeed.\\ \hline
 
 \end{tabular}
@@ -7619,6 +7619,8 @@ Quals & Field & Type & Description \\
 $\mathit{RW}$ &  {\tt name/description} & string & a notes field containg 
human-readable description \\
 $\mathit{RO}_\mathit{run}$ &  {\tt VIFs} & (VIF ref) Set & list of connected 
vifs \\
 $\mathit{RO}_\mathit{run}$ &  {\tt PIFs} & (PIF ref) Set & list of connected 
pifs \\
+$\mathit{RW}$ &  {\tt default\_gateway} & string & default gateway \\
+$\mathit{RW}$ &  {\tt default\_netmask} & string & default netmask \\
 $\mathit{RW}$ &  {\tt other\_config} & (string $\rightarrow$ string) Map & 
additional configuration \\
 \hline
 \end{longtable}
@@ -7869,6 +7871,138 @@ Get the PIFs field of the given network.
 
 
 value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_default\_gateway}
+
+{\bf Overview:} 
+Get the default\_gateway field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_default_gateway (session_id s, network ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_default\_gateway}
+
+{\bf Overview:} 
+Set the default\_gateway field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void set_default_gateway (session_id s, network ref self, 
string value)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+{\tt string } & value & New value to set \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_default\_netmask}
+
+{\bf Overview:} 
+Get the default\_netmask field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} string get_default_netmask (session_id s, network ref 
self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~set\_default\_netmask}
+
+{\bf Overview:} 
+Set the default\_netmask field of the given network.
+
+ \noindent {\bf Signature:} 
+\begin{verbatim} void set_default_netmask (session_id s, network ref self, 
string value)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+ 
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt network ref } & self & reference to the object \\ \hline 
+
+{\tt string } & value & New value to set \\ \hline 
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:} 
+{\tt 
+void
+}
+
+
+
 \vspace{0.3cm}
 \vspace{0.3cm}
 \vspace{0.3cm}
@@ -8999,7 +9133,7 @@ security_label, string old_label)\end{ve
 {\tt VIF ref } & self & reference to the object \\ \hline
 
 {\tt string } & security\_label & New value of the security label \\ \hline
-{\tt string } & old\_label & Optional label value that the security label \\
+{\tt string } & old\_label & Label value that the security label \\
 & & must currently have for the change to succeed.\\ \hline
 \end{tabular}
 
@@ -11504,7 +11638,7 @@ security_label, string old_label)\end{ve
 {\tt VDI ref } & self & reference to the object \\ \hline
 
 {\tt string } & security\_label & New value of the security label \\ \hline
-{\tt string } & old\_label & Optional label value that the security label \\
+{\tt string } & old\_label & Label value that the security label \\
 & & must currently have for the change to succeed.\\ \hline
 \end{tabular}
 
@@ -14898,46 +15032,6 @@ The label of the given resource.
 \vspace{0.3cm}
 \vspace{0.3cm}
 \vspace{0.3cm}
-\subsubsection{RPC name:~activate\_xspolicy}
-
-{\bf Overview:}
-Load the referenced policy into the hypervisor.
-
- \noindent {\bf Signature:}
-\begin{verbatim} xs_instantiationflags activate_xspolicy (session_id s, xs_ref 
xspolicy,
-xs_instantiationflags flags)\end{verbatim}
-
-
-\noindent{\bf Arguments:}
-
-
-\vspace{0.3cm}
-\begin{tabular}{|c|c|p{7cm}|}
- \hline
-{\bf type} & {\bf name} & {\bf description} \\ \hline
-{\tt xs ref } & self & reference to the object \\ \hline
-{\tt xs\_instantiationflags } & flags & flags to activate on a policy; flags
-  can only be set \\ \hline
-
-\end{tabular}
-
-\vspace{0.3cm}
-
-
- \noindent {\bf Return Type:}
-{\tt
-xs\_instantiationflags
-}
-
-
-Currently active instantiation flags.
-\vspace{0.3cm}
-
-\noindent{\bf Possible Error Codes:} {\tt SECURITY\_ERROR}
-
-\vspace{0.3cm}
-\vspace{0.3cm}
-\vspace{0.3cm}
 \subsubsection{RPC name:~can\_run}
 
 {\bf Overview:}
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/Makefile   Thu Apr 24 14:08:29 2008 -0600
@@ -19,6 +19,7 @@ include minios.mk
 
 # Define some default flags for linking.
 LDLIBS := 
+APP_LDLIBS := 
 LDARCHLIB := -L$(TARGET_ARCH_DIR) -l$(ARCH_LIB_NAME)
 LDFLAGS_FINAL := -T $(TARGET_ARCH_DIR)/minios-$(XEN_TARGET_ARCH).lds
 
@@ -33,6 +34,7 @@ SUBDIRS := lib xenbus console
 SUBDIRS := lib xenbus console
 
 # The common mini-os objects to build.
+APP_OBJS :=
 OBJS := $(patsubst %.c,%.o,$(wildcard *.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c))
@@ -75,28 +77,28 @@ OBJS := $(filter-out lwip%.o $(LWO), $(O
 
 ifeq ($(caml),y)
 CAMLLIB = $(shell ocamlc -where)
-OBJS += $(CAMLDIR)/caml.o
-OBJS += $(CAMLLIB)/libasmrun.a
+APP_OBJS += main-caml.o
+APP_OBJS += $(CAMLDIR)/caml.o
+APP_OBJS += $(CAMLLIB)/libasmrun.a
 CFLAGS += -I$(CAMLLIB)
-LDLIBS += -lm
-else
+APP_LDLIBS += -lm
+endif
 OBJS := $(filter-out main-caml.o, $(OBJS))
-endif
 
 ifeq ($(qemu),y)
-OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a
+APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a 
$(QEMUDIR)/i386-dm-stubdom/libqemu.a
 CFLAGS += -DCONFIG_QEMU
 endif
 
 ifneq ($(CDIR),)
-OBJS += $(CDIR)/main.a
-LDLIBS += 
+APP_OBJS += $(CDIR)/main.a
+APP_LDLIBS += 
 endif
 
 ifeq ($(libc),y)
 LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest
-LDLIBS += -lpci
-LDLIBS += -lz
+APP_LDLIBS += -lpci
+APP_LDLIBS += -lz
 LDLIBS += -lc
 endif
 
@@ -104,8 +106,11 @@ OBJS := $(filter-out daytime.o, $(OBJS))
 OBJS := $(filter-out daytime.o, $(OBJS))
 endif
 
-$(TARGET): links $(OBJS) arch_lib
-       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
+app.o: $(APP_OBJS) app.lds
+       $(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@
+
+$(TARGET): links $(OBJS) app.o arch_lib
+       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o 
$@.o
        $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
        $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
        gzip -f -9 -c $@ >$@.gz
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/app.lds
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/extras/mini-os/app.lds    Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,11 @@
+SECTIONS
+{
+        .app.bss : {
+                __app_bss_start = . ;
+                *(.bss .bss.*)
+                *(COMMON)
+                *(.lbss .lbss.*)
+                *(LARGE_COMMON)
+                __app_bss_end = . ;
+        }
+}
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/ia64/minios-ia64.lds
--- a/extras/mini-os/arch/ia64/minios-ia64.lds  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/ia64/minios-ia64.lds  Thu Apr 24 14:08:29 2008 -0600
@@ -59,7 +59,10 @@ SECTIONS
   { *(.IA_64.unwind) }
 
   .bss : AT(ADDR(.bss) - (((5<<(61))+0x100000000) - (1 << 20)))
-  { *(.bss) }
+  {
+    *(.bss)
+    *(.app.bss)
+  }
 
   _end = .;
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/minios-x86_32.lds
--- a/extras/mini-os/arch/x86/minios-x86_32.lds Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/x86/minios-x86_32.lds Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,7 @@ SECTIONS
   __bss_start = .;             /* BSS */
   .bss : {
        *(.bss)
+        *(.app.bss)
        }
   _end = . ;
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/minios-x86_64.lds
--- a/extras/mini-os/arch/x86/minios-x86_64.lds Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/x86/minios-x86_64.lds Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,7 @@ SECTIONS
   __bss_start = .;             /* BSS */
   .bss : {
        *(.bss)
+        *(.app.bss)
        }
   _end = . ;
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/arch/x86/mm.c      Thu Apr 24 14:08:29 2008 -0600
@@ -556,7 +556,6 @@ void *map_frames_ex(unsigned long *f, un
 
 static void clear_bootstrap(void)
 {
-    struct xen_memory_reservation reservation;
     xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) };
     int n = sizeof(mfns)/sizeof(*mfns);
     pte_t nullpte = { };
@@ -567,11 +566,7 @@ static void clear_bootstrap(void)
     if (HYPERVISOR_update_va_mapping((unsigned long) &_text, nullpte, 
UVMF_INVLPG))
        printk("Unable to unmap first page\n");
 
-    set_xen_guest_handle(reservation.extent_start, mfns);
-    reservation.nr_extents = n;
-    reservation.extent_order = 0;
-    reservation.domid = DOMID_SELF;
-    if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != n)
+    if (free_physical_pages(mfns, n) != n)
        printk("Unable to free bootstrap pages\n");
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/fbfront.c
--- a/extras/mini-os/fbfront.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/fbfront.c  Thu Apr 24 14:08:29 2008 -0600
@@ -243,12 +243,12 @@ struct fbfront_dev {
     char *backend;
     int request_update;
 
-    char *data;
     int width;
     int height;
     int depth;
-    int line_length;
+    int stride;
     int mem_length;
+    int offset;
 };
 
 void fbfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
@@ -256,7 +256,7 @@ void fbfront_handler(evtchn_port_t port,
     wake_up(&fbfront_queue);
 }
 
-struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int 
height, int depth, int line_length, int mem_length)
+struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int 
width, int height, int depth, int stride, int n)
 {
     xenbus_transaction_t xbt;
     char* err;
@@ -289,24 +289,17 @@ struct fbfront_dev *init_fbfront(char *n
     dev->width = s->width = width;
     dev->height = s->height = height;
     dev->depth = s->depth = depth;
-    dev->line_length = s->line_length = line_length;
-    dev->mem_length = s->mem_length = mem_length;
-
-    ASSERT(!((unsigned long)data & ~PAGE_MASK));
-    dev->data = data;
+    dev->stride = s->line_length = stride;
+    dev->mem_length = s->mem_length = n * PAGE_SIZE;
+    dev->offset = 0;
 
     const int max_pd = sizeof(s->pd) / sizeof(s->pd[0]);
     unsigned long mapped = 0;
 
-    for (i = 0; mapped < mem_length && i < max_pd; i++) {
+    for (i = 0; mapped < n && i < max_pd; i++) {
         unsigned long *pd = (unsigned long *) alloc_page();
-        for (j = 0; mapped < mem_length && j < PAGE_SIZE / sizeof(unsigned 
long); j++) {
-            /* Trigger CoW */
-            * ((char *)data + mapped) = 0;
-            barrier();
-            pd[j] = virtual_to_mfn((unsigned long) data + mapped);
-            mapped += PAGE_SIZE;
-        }
+        for (j = 0; mapped < n && j < PAGE_SIZE / sizeof(unsigned long); j++)
+            pd[j] = mfns[mapped++];
         for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++)
             pd[j] = 0;
         s->pd[i] = virt_to_mfn(pd);
@@ -395,31 +388,11 @@ done:
     return dev;
 }
 
-void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int 
height)
+static void fbfront_out_event(struct fbfront_dev *dev, union xenfb_out_event 
*event)
 {
     struct xenfb_page *page = dev->page;
     uint32_t prod;
     DEFINE_WAIT(w);
-
-    if (dev->request_update <= 0)
-        return;
-
-    if (x < 0) {
-        width += x;
-        x = 0;
-    }
-    if (x + width > dev->width)
-        width = dev->width - x;
-
-    if (y < 0) {
-        height += y;
-        y = 0;
-    }
-    if (y + height > dev->height)
-        height = dev->height - y;
-
-    if (width <= 0 || height <= 0)
-        return;
 
     add_waiter(w, fbfront_queue);
     while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN)
@@ -428,14 +401,55 @@ void fbfront_update(struct fbfront_dev *
 
     prod = page->out_prod;
     mb(); /* ensure ring space available */
-    XENFB_OUT_RING_REF(page, prod).type = XENFB_TYPE_UPDATE;
-    XENFB_OUT_RING_REF(page, prod).update.x = x;
-    XENFB_OUT_RING_REF(page, prod).update.y = y;
-    XENFB_OUT_RING_REF(page, prod).update.width = width;
-    XENFB_OUT_RING_REF(page, prod).update.height = height;
+    XENFB_OUT_RING_REF(page, prod) = *event;
     wmb(); /* ensure ring contents visible */
     page->out_prod = prod + 1;
     notify_remote_via_evtchn(dev->evtchn);
+}
+
+void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int 
height)
+{
+    struct xenfb_update update;
+
+    if (dev->request_update <= 0)
+        return;
+
+    if (x < 0) {
+        width += x;
+        x = 0;
+    }
+    if (x + width > dev->width)
+        width = dev->width - x;
+
+    if (y < 0) {
+        height += y;
+        y = 0;
+    }
+    if (y + height > dev->height)
+        height = dev->height - y;
+
+    if (width <= 0 || height <= 0)
+        return;
+
+    update.type = XENFB_TYPE_UPDATE;
+    update.x = x;
+    update.y = y;
+    update.width = width;
+    update.height = height;
+    fbfront_out_event(dev, (union xenfb_out_event *) &update);
+}
+
+void fbfront_resize(struct fbfront_dev *dev, int width, int height, int 
stride, int depth, int offset)
+{
+    struct xenfb_resize resize;
+
+    resize.type = XENFB_TYPE_RESIZE;
+    dev->width  = resize.width = width;
+    dev->height = resize.height = height;
+    dev->stride = resize.stride = stride;
+    dev->depth  = resize.depth = depth;
+    dev->offset = resize.offset = offset;
+    fbfront_out_event(dev, (union xenfb_out_event *) &resize);
 }
 
 void shutdown_fbfront(struct fbfront_dev *dev)
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/hypervisor.c
--- a/extras/mini-os/hypervisor.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/hypervisor.c       Thu Apr 24 14:08:29 2008 -0600
@@ -66,6 +66,21 @@ void do_hypervisor_callback(struct pt_re
     in_callback = 0;
 }
 
+void force_evtchn_callback(void)
+{
+    vcpu_info_t *vcpu;
+    vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
+    int save = vcpu->evtchn_upcall_mask;
+
+    while (vcpu->evtchn_upcall_pending) {
+        vcpu->evtchn_upcall_mask = 1;
+        barrier();
+        do_hypervisor_callback(NULL);
+        barrier();
+        vcpu->evtchn_upcall_mask = save;
+        barrier();
+    };
+}
 
 inline void mask_evtchn(u32 port)
 {
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/fbfront.h
--- a/extras/mini-os/include/fbfront.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/fbfront.h  Thu Apr 24 14:08:29 2008 -0600
@@ -31,11 +31,12 @@ void shutdown_kbdfront(struct kbdfront_d
 void shutdown_kbdfront(struct kbdfront_dev *dev);
 
 
-struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int 
height, int depth, int line_length, int mem_length);
+struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int 
width, int height, int depth, int stride, int n);
 #ifdef HAVE_LIBC
 int fbfront_open(struct fbfront_dev *dev);
 #endif
 
 void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int 
height);
+void fbfront_resize(struct fbfront_dev *dev, int width, int height, int 
stride, int depth, int offset);
 
 void shutdown_fbfront(struct fbfront_dev *dev);
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/hypervisor.h
--- a/extras/mini-os/include/hypervisor.h       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/hypervisor.h       Thu Apr 24 14:08:29 2008 -0600
@@ -24,6 +24,7 @@
 #else
 #error "Unsupported architecture"
 #endif
+#include <traps.h>
 
 /*
  * a placeholder for the start of day information passed up from the hypervisor
@@ -37,7 +38,8 @@ extern union start_info_union start_info
 #define start_info (start_info_union.start_info)
 
 /* hypervisor.c */
-//void do_hypervisor_callback(struct pt_regs *regs);
+void force_evtchn_callback(void);
+void do_hypervisor_callback(struct pt_regs *regs);
 void mask_evtchn(u32 port);
 void unmask_evtchn(u32 port);
 void clear_evtchn(u32 port);
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/ia64/arch_mm.h
--- a/extras/mini-os/include/ia64/arch_mm.h     Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/ia64/arch_mm.h     Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,6 @@
 #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0)
 /* TODO */
 #define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0)
-#define do_map_zero(start, n) ((void)0)
+#define do_map_zero(start, n) ASSERT(n == 0)
 
 #endif /* __ARCH_MM_H__ */
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/ia64/os.h
--- a/extras/mini-os/include/ia64/os.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/ia64/os.h  Thu Apr 24 14:08:29 2008 -0600
@@ -189,17 +189,6 @@ __synch_cmpxchg(volatile void *ptr, uint
        return ia64_cmpxchg_acq_64(ptr, old, new);
 }
 
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-static inline void
-force_evtchn_callback(void)
-{
-       (void)HYPERVISOR_xen_version(0, NULL);
-}
-
 extern shared_info_t *HYPERVISOR_shared_info;
 
 static inline int
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/lib.h
--- a/extras/mini-os/include/lib.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/lib.h      Thu Apr 24 14:08:29 2008 -0600
@@ -187,6 +187,7 @@ int alloc_fd(enum fd_type type);
 int alloc_fd(enum fd_type type);
 void close_all_files(void);
 extern struct thread *main_thread;
+void sparse(unsigned long data, size_t size);
 #endif
 
 #endif /* _LIB_H_ */
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/mm.h
--- a/extras/mini-os/include/mm.h       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/mm.h       Thu Apr 24 14:08:29 2008 -0600
@@ -70,4 +70,6 @@ extern unsigned long heap, brk, heap_map
 extern unsigned long heap, brk, heap_mapped, heap_end;
 #endif
 
+int free_physical_pages(xen_pfn_t *mfns, int n);
+
 #endif /* _MM_H_ */
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/include/x86/os.h
--- a/extras/mini-os/include/x86/os.h   Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/include/x86/os.h   Thu Apr 24 14:08:29 2008 -0600
@@ -28,7 +28,6 @@ extern void do_exit(void) __attribute__(
 #include <xen/xen.h>
 
 
-#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0))
 
 #define __KERNEL_CS  FLAT_KERNEL_CS
 #define __KERNEL_DS  FLAT_KERNEL_DS
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/kernel.c   Thu Apr 24 14:08:29 2008 -0600
@@ -297,9 +297,20 @@ static void fbfront_thread(void *p)
 {
     size_t line_length = WIDTH * (DEPTH / 8);
     size_t memsize = HEIGHT * line_length;
-
+    unsigned long *mfns;
+    int i, n = (memsize + PAGE_SIZE-1) / PAGE_SIZE;
+
+    memsize = n * PAGE_SIZE;
     fb = _xmalloc(memsize, PAGE_SIZE);
-    fb_dev = init_fbfront(NULL, fb, WIDTH, HEIGHT, DEPTH, line_length, 
memsize);
+    mfns = xmalloc_array(unsigned long, n);
+    for (i = 0; i < n; i++) {
+        /* trigger CoW */
+        ((char *) fb) [i * PAGE_SIZE] = 0;
+        barrier();
+        mfns[i] = virtual_to_mfn((char *) fb + i * PAGE_SIZE);
+    }
+    fb_dev = init_fbfront(NULL, mfns, WIDTH, HEIGHT, DEPTH, line_length, n);
+    xfree(mfns);
     if (!fb_dev) {
         xfree(fb);
         return;
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/lib/sys.c
--- a/extras/mini-os/lib/sys.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/lib/sys.c  Thu Apr 24 14:08:29 2008 -0600
@@ -1108,6 +1108,41 @@ int munmap(void *start, size_t length)
     return 0;
 }
 
+void sparse(unsigned long data, size_t size)
+{
+    unsigned long newdata;
+    xen_pfn_t *mfns;
+    int i, n;
+
+    newdata = (data + PAGE_SIZE - 1) & PAGE_MASK;
+    if (newdata - data > size)
+        return;
+    size -= newdata - data;
+    data = newdata;
+    n = size / PAGE_SIZE;
+    size = n * PAGE_SIZE;
+
+    mfns = malloc(n * sizeof(*mfns));
+    for (i = 0; i < n; i++) {
+#ifdef LIBC_DEBUG
+        int j;
+        for (j=0; j<PAGE_SIZE; j++)
+            if (((char*)data + i * PAGE_SIZE)[j]) {
+                printk("%lx is not zero!\n", data + i * PAGE_SIZE + j);
+                exit(1);
+            }
+#endif
+        mfns[i] = virtual_to_mfn(data + i * PAGE_SIZE);
+    }
+
+    printk("sparsing %ldMB at %lx\n", size >> 20, data);
+
+    munmap((void *) data, size);
+    free_physical_pages(mfns, n);
+    do_map_zero(data, n);
+}
+
+
 /* Not supported by FS yet.  */
 unsupported_function_crash(link);
 unsupported_function(int, readlink, -1);
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/main.c
--- a/extras/mini-os/main.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/main.c     Thu Apr 24 14:08:29 2008 -0600
@@ -39,6 +39,7 @@ void _fini(void)
 {
 }
 
+extern char __app_bss_start, __app_bss_end;
 static void call_main(void *p)
 {
     char *args, /**path,*/ *msg, *c;
@@ -56,6 +57,7 @@ static void call_main(void *p)
      * crashing. */
     //sleep(1);
 
+    sparse((unsigned long) &__app_bss_start, &__app_bss_end - 
&__app_bss_start);
     start_networking();
     init_fs_frontend();
 
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/mm.c
--- a/extras/mini-os/mm.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/mm.c       Thu Apr 24 14:08:29 2008 -0600
@@ -36,6 +36,7 @@
 
 #include <os.h>
 #include <hypervisor.h>
+#include <xen/memory.h>
 #include <mm.h>
 #include <types.h>
 #include <lib.h>
@@ -360,6 +361,17 @@ void free_pages(void *pointer, int order
    
 }
 
+int free_physical_pages(xen_pfn_t *mfns, int n)
+{
+    struct xen_memory_reservation reservation;
+
+    set_xen_guest_handle(reservation.extent_start, mfns);
+    reservation.nr_extents = n;
+    reservation.extent_order = 0;
+    reservation.domid = DOMID_SELF;
+    return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+}
+
 #ifdef HAVE_LIBC
 void *sbrk(ptrdiff_t increment)
 {
diff -r 239b44eeb2d6 -r dc510776dd59 extras/mini-os/sched.c
--- a/extras/mini-os/sched.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/extras/mini-os/sched.c    Thu Apr 24 14:08:29 2008 -0600
@@ -70,62 +70,15 @@ void inline print_runqueue(void)
     printk("\n");
 }
 
-/* Find the time when the next timeout expires. If this is more than
-   10 seconds from now, return 10 seconds from now. */
-static s_time_t blocking_time(void)
-{
-    struct thread *thread;
-    struct list_head *iterator;
-    s_time_t min_wakeup_time;
-    unsigned long flags;
-    local_irq_save(flags);
-    /* default-block the domain for 10 seconds: */
-    min_wakeup_time = NOW() + SECONDS(10);
-
-    /* Thread list needs to be protected */
-    list_for_each(iterator, &idle_thread->thread_list)
-    {
-        thread = list_entry(iterator, struct thread, thread_list);
-        if(!is_runnable(thread) && thread->wakeup_time != 0LL)
-        {
-            if(thread->wakeup_time < min_wakeup_time)
-            {
-                min_wakeup_time = thread->wakeup_time;
-            }
-        }
-    }
-    local_irq_restore(flags);
-    return(min_wakeup_time);
-}
-
-/* Wake up all threads with expired timeouts. */
-static void wake_expired(void)
-{
-    struct thread *thread;
-    struct list_head *iterator;
-    s_time_t now = NOW();
-    unsigned long flags;
-    local_irq_save(flags);
-    /* Thread list needs to be protected */
-    list_for_each(iterator, &idle_thread->thread_list)
-    {
-        thread = list_entry(iterator, struct thread, thread_list);
-        if(!is_runnable(thread) && thread->wakeup_time != 0LL)
-        {
-            if(thread->wakeup_time <= now)
-                wake(thread);
-        }
-    }
-    local_irq_restore(flags);
-}
-
 void schedule(void)
 {
     struct thread *prev, *next, *thread;
     struct list_head *iterator;
     unsigned long flags;
+
     prev = current;
     local_irq_save(flags); 
+
     if (in_callback) {
         printk("Must not call schedule() from a callback\n");
         BUG();
@@ -134,6 +87,45 @@ void schedule(void)
         printk("Must not call schedule() with IRQs disabled\n");
         BUG();
     }
+
+    do {
+        /* Examine all threads.
+           Find a runnable thread, but also wake up expired ones and find the
+           time when the next timeout expires, else use 10 seconds. */
+        s_time_t now = NOW();
+        s_time_t min_wakeup_time = now + SECONDS(10);
+        next = NULL;   
+        list_for_each(iterator, &idle_thread->thread_list)
+        {
+            thread = list_entry(iterator, struct thread, thread_list);
+            if (!is_runnable(thread) && thread->wakeup_time != 0LL)
+            {
+                if (thread->wakeup_time <= now)
+                    wake(thread);
+                else if (thread->wakeup_time < min_wakeup_time)
+                    min_wakeup_time = thread->wakeup_time;
+            }
+            if(is_runnable(thread)) 
+            {
+                next = thread;
+                /* Put this thread on the end of the list */
+                list_del(&thread->thread_list);
+                list_add_tail(&thread->thread_list, &idle_thread->thread_list);
+                break;
+            }
+        }
+        if (next)
+            break;
+        /* block until the next timeout expires, or for 10 secs, whichever 
comes first */
+        block_domain(min_wakeup_time);
+        /* handle pending events if any */
+        force_evtchn_callback();
+    } while(1);
+    local_irq_restore(flags);
+    /* Interrupting the switch is equivalent to having the next thread
+       inturrupted at the return instruction. And therefore at safe point. */
+    if(prev != next) switch_threads(prev, next);
+
     list_for_each(iterator, &exited_threads)
     {
         thread = list_entry(iterator, struct thread, thread_list);
@@ -144,24 +136,6 @@ void schedule(void)
             xfree(thread);
         }
     }
-    next = idle_thread;   
-    /* Thread list needs to be protected */
-    list_for_each(iterator, &idle_thread->thread_list)
-    {
-        thread = list_entry(iterator, struct thread, thread_list);
-        if(is_runnable(thread)) 
-        {
-            next = thread;
-            /* Put this thread on the end of the list */
-            list_del(&thread->thread_list);
-            list_add_tail(&thread->thread_list, &idle_thread->thread_list);
-            break;
-        }
-    }
-    local_irq_restore(flags);
-    /* Interrupting the switch is equivalent to having the next thread
-       inturrupted at the return instruction. And therefore at safe point. */
-    if(prev != next) switch_threads(prev, next);
 }
 
 struct thread* create_thread(char *name, void (*function)(void *), void *data)
@@ -267,32 +241,10 @@ void wake(struct thread *thread)
 
 void idle_thread_fn(void *unused)
 {
-    s_time_t until;
     threads_started = 1;
-    unsigned long flags;
-    struct list_head *iterator;
-    struct thread *next, *thread;
-    for(;;)
-    {
-        schedule();
-        next = NULL;
-        local_irq_save(flags);
-        list_for_each(iterator, &idle_thread->thread_list)
-        {
-            thread = list_entry(iterator, struct thread, thread_list);
-            if(is_runnable(thread)) 
-            {
-                next = thread;
-                break;
-            }
-        }
-        if (!next) {
-            /* block until the next timeout expires, or for 10 secs, whichever 
comes first */
-            until = blocking_time();
-            block_domain(until);
-        }
-        local_irq_restore(flags);
-        wake_expired();
+    while (1) {
+        block(current);
+        schedule();
     }
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/blktap/drivers/blktapctrl.c
--- a/tools/blktap/drivers/blktapctrl.c Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/blktap/drivers/blktapctrl.c Thu Apr 24 14:08:29 2008 -0600
@@ -474,9 +474,8 @@ static int read_msg(int fd, int msgtype,
 
 }
 
-int launch_tapdisk(char *wrctldev, char *rdctldev)
-{
-       char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
+static int launch_tapdisk_provider(char **argv)
+{
        pid_t child;
        
        if ((child = fork()) < 0)
@@ -490,7 +489,9 @@ int launch_tapdisk(char *wrctldev, char 
                            i != STDERR_FILENO)
                                close(i);
 
-               execvp("tapdisk", argv);
+               execvp(argv[0], argv);
+               DPRINTF("execvp failed: %d (%s)\n", errno, strerror(errno));
+               DPRINTF("PATH = %s\n", getenv("PATH"));
                _exit(1);
        } else {
                pid_t got;
@@ -498,28 +499,78 @@ int launch_tapdisk(char *wrctldev, char 
                        got = waitpid(child, NULL, 0);
                } while (got != child);
        }
+       return child;
+}
+
+static int launch_tapdisk(char *wrctldev, char *rdctldev)
+{
+       char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
+
+       if (launch_tapdisk_provider(argv) < 0)
+               return -1;
+
        return 0;
 }
 
-/* Connect to qemu-dm */
-static int connect_qemu(blkif_t *blkif)
+static int launch_tapdisk_ioemu(void)
+{
+       char *argv[] = { "tapdisk-ioemu", NULL };
+       return launch_tapdisk_provider(argv);
+}
+
+/* 
+ * Connect to an ioemu based disk provider (qemu-dm or tapdisk-ioemu)
+ *
+ * If the domain has a device model, connect to qemu-dm through the
+ * domain specific pipe. Otherwise use a single tapdisk-ioemu instance
+ * which is represented by domid 0 and provides access for Dom0 and
+ * all DomUs without device model.
+ */
+static int connect_qemu(blkif_t *blkif, int domid)
 {
        char *rdctldev, *wrctldev;
-       
-       if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", 
-                       blkif->domid) < 0)
-               return -1;
-
-       if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", 
-                       blkif->domid) < 0) {
+
+       static int tapdisk_ioemu_pid = 0;
+       static int dom0_readfd = 0;
+       static int dom0_writefd = 0;
+       
+       if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) < 0)
+               return -1;
+
+       if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) < 0) {
                free(rdctldev);
                return -1;
        }
 
        DPRINTF("Using qemu blktap pipe: %s\n", rdctldev);
        
-       blkif->fds[READ] = open_ctrl_socket(wrctldev);
-       blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
+       if (domid == 0) {
+               /*
+                * tapdisk-ioemu exits as soon as the last image is 
+                * disconnected. Check if it is still running.
+                */
+               if (tapdisk_ioemu_pid == 0 || kill(tapdisk_ioemu_pid, 0)) {
+                       /* No device model and tapdisk-ioemu doesn't run yet */
+                       DPRINTF("Launching tapdisk-ioemu\n");
+                       tapdisk_ioemu_pid = launch_tapdisk_ioemu();
+                       
+                       dom0_readfd = open_ctrl_socket(wrctldev);
+                       dom0_writefd = open_ctrl_socket(rdctldev);
+               }
+
+               DPRINTF("Using tapdisk-ioemu connection\n");
+               blkif->fds[READ] = dom0_readfd;
+               blkif->fds[WRITE] = dom0_writefd;
+       } else if (access(rdctldev, R_OK | W_OK) == 0) {
+               /* Use existing pipe to the device model */
+               DPRINTF("Using qemu-dm connection\n");
+               blkif->fds[READ] = open_ctrl_socket(wrctldev);
+               blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
+       } else {
+               /* No device model => try with tapdisk-ioemu */
+               DPRINTF("No device model\n");
+               connect_qemu(blkif, 0);
+       }
        
        free(rdctldev);
        free(wrctldev);
@@ -599,7 +650,7 @@ int blktapctrl_new_blkif(blkif_t *blkif)
 
                if (!exist) {
                        if (type == DISK_TYPE_IOEMU) {
-                               if (connect_qemu(blkif))
+                               if (connect_qemu(blkif, blkif->domid))
                                        goto fail;
                        } else {
                                if (connect_tapdisk(blkif, minor))
diff -r 239b44eeb2d6 -r dc510776dd59 tools/blktap/drivers/tapdisk.h
--- a/tools/blktap/drivers/tapdisk.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/blktap/drivers/tapdisk.h    Thu Apr 24 14:08:29 2008 -0600
@@ -235,7 +235,7 @@ static disk_info_t ioemu_disk = {
        DISK_TYPE_IOEMU,
        "ioemu disk",
        "ioemu",
-       0,
+       1,
 #ifdef TAPDISK
        NULL
 #endif
diff -r 239b44eeb2d6 -r dc510776dd59 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/console/daemon/io.c Thu Apr 24 14:08:29 2008 -0600
@@ -63,6 +63,7 @@ extern int log_time_hv;
 extern int log_time_hv;
 extern int log_time_guest;
 extern char *log_dir;
+extern int discard_overflowed_data;
 
 static int log_time_hv_needts = 1;
 static int log_time_guest_needts = 1;
@@ -201,7 +202,7 @@ static void buffer_append(struct domain 
                              dom->domid, errno, strerror(errno));
        }
 
-       if (buffer->max_capacity &&
+       if (discard_overflowed_data && buffer->max_capacity &&
            buffer->size > buffer->max_capacity) {
                /* Discard the middle of the data. */
 
@@ -228,6 +229,11 @@ static void buffer_advance(struct buffer
        if (buffer->consumed == buffer->size) {
                buffer->consumed = 0;
                buffer->size = 0;
+               if (buffer->max_capacity &&
+                   buffer->capacity > buffer->max_capacity) {
+                       buffer->data = realloc(buffer->data, 
buffer->max_capacity);
+                       buffer->capacity = buffer->max_capacity;
+               }
        }
 }
 
@@ -1005,9 +1011,13 @@ void handle_io(void)
                                    d->next_period < next_timeout)
                                        next_timeout = d->next_period;
                        } else if (d->xce_handle != -1) {
-                               int evtchn_fd = xc_evtchn_fd(d->xce_handle);
-                               FD_SET(evtchn_fd, &readfds);
-                               max_fd = MAX(evtchn_fd, max_fd);
+                               if (discard_overflowed_data ||
+                                   !d->buffer.max_capacity ||
+                                   d->buffer.size < d->buffer.max_capacity) {
+                                       int evtchn_fd = 
xc_evtchn_fd(d->xce_handle);
+                                       FD_SET(evtchn_fd, &readfds);
+                                       max_fd = MAX(evtchn_fd, max_fd);
+                               }
                        }
 
                        if (d->master_fd != -1) {
diff -r 239b44eeb2d6 -r dc510776dd59 tools/console/daemon/main.c
--- a/tools/console/daemon/main.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/console/daemon/main.c       Thu Apr 24 14:08:29 2008 -0600
@@ -38,6 +38,7 @@ int log_time_hv = 0;
 int log_time_hv = 0;
 int log_time_guest = 0;
 char *log_dir = NULL;
+int discard_overflowed_data = 1;
 
 static void handle_hup(int sig)
 {
@@ -46,7 +47,7 @@ static void handle_hup(int sig)
 
 static void usage(char *name)
 {
-       printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] 
[--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all]\n", name);
+       printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] 
[--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all] [-o, 
--overflow-data=discard|keep]\n", name);
 }
 
 static void version(char *name)
@@ -56,7 +57,7 @@ static void version(char *name)
 
 int main(int argc, char **argv)
 {
-       const char *sopts = "hVvit:";
+       const char *sopts = "hVvit:o:";
        struct option lopts[] = {
                { "help", 0, 0, 'h' },
                { "version", 0, 0, 'V' },
@@ -66,6 +67,7 @@ int main(int argc, char **argv)
                { "log-dir", 1, 0, 'r' },
                { "pid-file", 1, 0, 'p' },
                { "timestamp", 1, 0, 't' },
+               { "overflow-data", 1, 0, 'o'},
                { 0 },
        };
        bool is_interactive = false;
@@ -119,6 +121,13 @@ int main(int argc, char **argv)
                                log_time_hv = 0;
                        }
                        break;
+               case 'o':
+                       if (!strcmp(optarg, "keep")) {
+                               discard_overflowed_data = 0;
+                       } else if (!strcmp(optarg, "discard")) {
+                               discard_overflowed_data = 1;
+                       }
+                       break;
                case '?':
                        fprintf(stderr,
                                "Try `%s --help' for more information\n",
diff -r 239b44eeb2d6 -r dc510776dd59 tools/examples/blktap
--- a/tools/examples/blktap     Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/examples/blktap     Thu Apr 24 14:08:29 2008 -0600
@@ -54,10 +54,6 @@ check_blktap_sharing()
     echo 'ok'
 }
 
-FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
-FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm")
-mode=$(xenstore_read "$XENBUS_PATH/mode")
-mode=$(canonicalise_mode "$mode")
 
 t=$(xenstore_read_default "$XENBUS_PATH/type" 'MISSING')
 if [ -n "$t" ]
@@ -77,15 +73,21 @@ else
     file="$p"
 fi
 
-if [ "$mode" != '!' ] 
-then
-    result=$(check_blktap_sharing "$file" "$mode")
-    [ "$result" = 'ok' ] || ebusy "$file already in use by other domain"
-fi
-
 if [ "$command" = 'add' ]
 then
     [ -e "$file" ] || { fatal $file does not exist; }
+
+    FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
+    FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm")
+    mode=$(xenstore_read "$XENBUS_PATH/mode")
+    mode=$(canonicalise_mode "$mode")
+
+    if [ "$mode" != '!' ] 
+    then
+        result=$(check_blktap_sharing "$file" "$mode")
+        [ "$result" = 'ok' ] || ebusy "$file already in use by other domain"
+    fi
+
     success
 fi
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/Makefile
--- a/tools/firmware/hvmloader/Makefile Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/Makefile Thu Apr 24 14:08:29 2008 -0600
@@ -28,8 +28,9 @@ LOADADDR = 0x100000
 
 CFLAGS += $(CFLAGS_include) -I.
 
-SRCS = hvmloader.c mp_tables.c util.c smbios.c 32bitbios_support.c smp.c
-OBJS = $(patsubst %.c,%.o,$(SRCS))
+SRCS  = hvmloader.c mp_tables.c util.c smbios.c 
+SRCS += 32bitbios_support.c smp.c cacheattr.c
+OBJS  = $(patsubst %.c,%.o,$(SRCS))
 
 .PHONY: all
 all: hvmloader
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/acpi/build.c
--- a/tools/firmware/hvmloader/acpi/build.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/acpi/build.c     Thu Apr 24 14:08:29 2008 -0600
@@ -84,8 +84,8 @@ static int construct_bios_info_table(uin
 
     bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
 
-    bios_info->pci_min = 0xf0000000;
-    bios_info->pci_len = 0x0c000000;
+    bios_info->pci_min = PCI_MEMBASE;
+    bios_info->pci_len = PCI_MEMSIZE;
 
     return align16(sizeof(*bios_info));
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/cacheattr.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/firmware/hvmloader/cacheattr.c      Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,99 @@
+/*
+ * cacheattr.c: MTRR and PAT initialisation.
+ *
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ * 
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include "util.h"
+#include "config.h"
+
+#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
+#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
+#define MSR_MTRRcap          0x00fe
+#define MSR_MTRRfix64K_00000 0x0250
+#define MSR_MTRRfix16K_80000 0x0258
+#define MSR_MTRRfix16K_A0000 0x0259
+#define MSR_MTRRfix4K_C0000  0x0268
+#define MSR_MTRRfix4K_C8000  0x0269
+#define MSR_MTRRfix4K_D0000  0x026a
+#define MSR_MTRRfix4K_D8000  0x026b
+#define MSR_MTRRfix4K_E0000  0x026c
+#define MSR_MTRRfix4K_E8000  0x026d
+#define MSR_MTRRfix4K_F0000  0x026e
+#define MSR_MTRRfix4K_F8000  0x026f
+#define MSR_PAT              0x0277
+#define MSR_MTRRdefType      0x02ff
+
+void cacheattr_init(void)
+{
+    uint32_t eax, ebx, ecx, edx;
+    uint64_t mtrr_cap, mtrr_def, content, addr_mask;
+    unsigned int i, nr_var_ranges, phys_bits = 36;
+
+    /* Does the CPU support architectural MTRRs? */
+    cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+    if ( !(edx & (1u << 12)) )
+         return;
+
+    /* Find the physical address size for this CPU. */
+    cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+    if ( eax >= 0x80000008 )
+    {
+        cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+        phys_bits = (uint8_t)eax;
+    }
+
+    printf("%u-bit phys ... ", phys_bits);
+
+    addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);
+    mtrr_cap = rdmsr(MSR_MTRRcap);
+    mtrr_def = (1u << 11) | 6; /* E, default type WB */
+
+    /* Fixed-range MTRRs supported? */
+    if ( mtrr_cap & (1u << 8) )
+    {
+        /* 0x00000-0x9ffff: Write Back (WB) */
+        content = 0x0606060606060606ull;
+        wrmsr(MSR_MTRRfix64K_00000, content);
+        wrmsr(MSR_MTRRfix16K_80000, content);
+        /* 0xa0000-0xbffff: Write Combining (WC) */
+        if ( mtrr_cap & (1u << 10) ) /* WC supported? */
+            content = 0x0101010101010101ull;
+        wrmsr(MSR_MTRRfix16K_A0000, content);
+        /* 0xc0000-0xfffff: Write Back (WB) */
+        content = 0x0606060606060606ull;
+        for ( i = 0; i < 8; i++ )
+            wrmsr(MSR_MTRRfix4K_C0000 + i, content);
+        mtrr_def |= 1u << 10; /* FE */
+        printf("fixed MTRRs ... ");
+    }
+
+    /* Variable-range MTRRs supported? */
+    nr_var_ranges = (uint8_t)mtrr_cap;
+    if ( nr_var_ranges != 0 )
+    {
+        /* A single UC range covering PCI space. */
+        wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE);
+        wrmsr(MSR_MTRRphysMask(0),
+              ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11));
+        printf("var MTRRs ... ");
+    }
+
+    wrmsr(MSR_MTRRdefType, mtrr_def);
+}
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/config.h Thu Apr 24 14:08:29 2008 -0600
@@ -10,6 +10,9 @@
 
 #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
 #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
+
+#define PCI_MEMBASE         0xf0000000
+#define PCI_MEMSIZE         0x0c000000
 
 #define ROMBIOS_SEG            0xF000
 #define ROMBIOS_BEGIN          0x000F0000
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/hvmloader.c      Thu Apr 24 14:08:29 2008 -0600
@@ -96,6 +96,7 @@ asm (
     "stack:                          \n"
     "    .skip    0x4000             \n"
     "stack_top:                      \n"
+    "    .text                       \n"
     );
 
 void smp_initialise(void);
@@ -158,7 +159,7 @@ static void pci_setup(void)
     struct resource {
         uint32_t base, max;
     } *resource;
-    struct resource mem_resource = { 0xf0000000, 0xfc000000 };
+    struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
     struct resource io_resource  = { 0xc000, 0x10000 };
 
     /* Create a list of device BARs in descending order of size. */
diff -r 239b44eeb2d6 -r dc510776dd59 tools/firmware/hvmloader/smp.c
--- a/tools/firmware/hvmloader/smp.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/firmware/hvmloader/smp.c    Thu Apr 24 14:08:29 2008 -0600
@@ -66,12 +66,15 @@ asm (
     "stack:                          \n"
     "    .skip    0x4000             \n"
     "stack_top:                      \n"
+    "    .text                       \n"
     );
+
+extern void cacheattr_init(void);
 
 /*static*/ void ap_start(void)
 {
     printf(" - CPU%d ... ", ap_cpuid);
-
+    cacheattr_init();
     printf("done.\n");
     wmb();
     ap_callin = 1;
@@ -121,12 +124,10 @@ void smp_initialise(void)
 {
     unsigned int i, nr_cpus = get_vcpu_nr();
 
-    if ( nr_cpus <= 1 )
-        return;
-
     memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);
 
     printf("Multiprocessor initialisation:\n");
+    ap_start();
     for ( i = 1; i < nr_cpus; i++ )
         boot_cpu(i);
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/Makefile
--- a/tools/ioemu/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/Makefile      Thu Apr 24 14:08:29 2008 -0600
@@ -87,7 +87,7 @@ endif
 
 install: all $(if $(BUILD_DOCS),install-doc)
        mkdir -p "$(DESTDIR)$(bindir)"
-       $(INSTALL) -m 755 -s $(TOOLS) "$(DESTDIR)$(prefix)/sbin"
+       $(INSTALL) -m 755 $(TOOLS) "$(DESTDIR)$(SBINDIR)"
 #      mkdir -p "$(DESTDIR)$(datadir)"
 #      for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \
 #              video.x openbios-sparc32 linux_boot.bin pxe-ne2k_pci.bin \
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/cirrus_vga.c
--- a/tools/ioemu/hw/cirrus_vga.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/cirrus_vga.c       Thu Apr 24 14:08:29 2008 -0600
@@ -2595,6 +2595,10 @@ static void *set_vram_mapping(unsigned l
 
     memset(vram_pointer, 0, nr_extents * TARGET_PAGE_SIZE);
 
+#ifdef CONFIG_STUBDOM
+    xenfb_pv_display_start(vram_pointer);
+#endif
+
     free(extent_start);
 
     return vram_pointer;
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/pci.c      Thu Apr 24 14:08:29 2008 -0600
@@ -79,18 +79,30 @@ int pci_bus_num(PCIBus *s)
 
 void pci_device_save(PCIDevice *s, QEMUFile *f)
 {
-    qemu_put_be32(f, 1); /* PCI device version */
+    uint8_t irq_state = 0;
+    int i;
+    qemu_put_be32(f, 2); /* PCI device version */
     qemu_put_buffer(f, s->config, 256);
+    for (i = 0; i < 4; i++)
+        irq_state |= !!s->irq_state[i] << i;
+    qemu_put_buffer(f, &irq_state, 1);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
 {
     uint32_t version_id;
     version_id = qemu_get_be32(f);
-    if (version_id != 1)
+    if (version_id != 1 && version_id != 2)
         return -EINVAL;
     qemu_get_buffer(f, s->config, 256);
     pci_update_mappings(s);
+    if (version_id == 2) {
+        uint8_t irq_state;
+        int i;
+        qemu_get_buffer(f, &irq_state, 1);
+        for (i = 0; i < 4; i++)
+            pci_set_irq(s, i, !!(irq_state >> i));
+    }
     return 0;
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/vga.c
--- a/tools/ioemu/hw/vga.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/vga.c      Thu Apr 24 14:08:29 2008 -0600
@@ -2067,8 +2067,8 @@ void vga_common_init(VGAState *s, Displa
                                  & ~(TARGET_PAGE_SIZE - 1));
 
     /* Video RAM must be 128-bit aligned for SSE optimizations later */
-    s->vram_alloc = qemu_malloc(vga_ram_size + 15);
-    s->vram_ptr = (uint8_t *)((long)(s->vram_alloc + 15) & ~15L);
+    /* and page-aligned for PVFB memory sharing */
+    s->vram_ptr = s->vram_alloc = qemu_memalign(TARGET_PAGE_SIZE, 
vga_ram_size);
 
     s->vram_offset = vga_ram_offset;
     s->vram_size = vga_ram_size;
@@ -2210,7 +2210,7 @@ void *vga_update_vram(VGAState *s, void 
     }
 
     if (!vga_ram_base) {
-        vga_ram_base = qemu_malloc(vga_ram_size + TARGET_PAGE_SIZE + 1);
+        vga_ram_base = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size + 
TARGET_PAGE_SIZE + 1);
         if (!vga_ram_base) {
             fprintf(stderr, "reallocate error\n");
             return NULL;
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/xen_blktap.c
--- a/tools/ioemu/hw/xen_blktap.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/xen_blktap.c       Thu Apr 24 14:08:29 2008 -0600
@@ -581,17 +581,13 @@ static void handle_blktap_ctrlmsg(void* 
  */
 static int open_ctrl_socket(char *devname)
 {
-       int ret;
        int ipc_fd;
 
        if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0)
                DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR);
 
-       ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
-       if ( (ret != 0) && (errno != EEXIST) ) {
-               DPRINTF("ERROR: pipe failed (%d)\n", errno);
+       if (access(devname, R_OK | W_OK))
                return -1;
-       }
 
        ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
 
@@ -601,42 +597,6 @@ static int open_ctrl_socket(char *devnam
        }
 
        return ipc_fd;
-}
-
-/**
- * Unmaps all disks and closes their pipes
- */
-void shutdown_blktap(void)
-{
-       fd_list_entry_t *ptr;
-       struct td_state *s;
-       char *devname;
-
-       DPRINTF("Shutdown blktap\n");
-
-       /* Unmap all disks */
-       ptr = fd_start;
-       while (ptr != NULL) {
-               s = ptr->s;
-               unmap_disk(s);
-               close(ptr->tap_fd);
-               ptr = ptr->next;
-       }
-
-       /* Delete control pipes */
-       if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {
-               DPRINTF("Delete %s\n", devname);
-               if (unlink(devname))
-                       DPRINTF("Could not delete: %s\n", strerror(errno));
-               free(devname);
-       }
-       
-       if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) { 
-               DPRINTF("Delete %s\n", devname);
-               if (unlink(devname))
-                       DPRINTF("Could not delete: %s\n", strerror(errno));
-               free(devname);
-       }
 }
 
 /**
@@ -679,8 +639,5 @@ int init_blktap(void)
        /* Attach a handler to the read pipe (called from qemu main loop) */
        qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL);
 
-       /* Register handler to clean up when the domain is destroyed */
-       atexit(&shutdown_blktap);
-
        return 0;
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/hw/xenfb.c
--- a/tools/ioemu/hw/xenfb.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/hw/xenfb.c    Thu Apr 24 14:08:29 2008 -0600
@@ -1235,14 +1235,10 @@ static struct semaphore kbd_sem = __SEMA
 static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0);
 static struct kbdfront_dev *kbd_dev;
 static char *kbd_path, *fb_path;
+static void *vga_vram, *nonshared_vram;
+static DisplayState *xenfb_ds;
 
 static unsigned char linux2scancode[KEY_MAX + 1];
-
-#define WIDTH 1024
-#define HEIGHT 768
-#define DEPTH 32
-#define LINESIZE (1280 * (DEPTH / 8))
-#define MEMSIZE (LINESIZE * HEIGHT)
 
 int xenfb_connect_vkbd(const char *path)
 {
@@ -1256,33 +1252,73 @@ int xenfb_connect_vfb(const char *path)
     return 0;
 }
 
-static void xenfb_pv_update(DisplayState *s, int x, int y, int w, int h)
-{
-    struct fbfront_dev *fb_dev = s->opaque;
+static void xenfb_pv_update(DisplayState *ds, int x, int y, int w, int h)
+{
+    struct fbfront_dev *fb_dev = ds->opaque;
+    if (!fb_dev)
+        return;
     fbfront_update(fb_dev, x, y, w, h);
 }
 
-static void xenfb_pv_resize(DisplayState *s, int w, int h, int linesize)
-{
-    struct fbfront_dev *fb_dev = s->opaque;
-    fprintf(stderr,"resize to %dx%d required\n", w, h);
-    s->width = w;
-    s->height = h;
-    /* TODO: send resize event if supported */
-    memset(s->data, 0, MEMSIZE);
-    fbfront_update(fb_dev, 0, 0, WIDTH, HEIGHT);
+static void xenfb_pv_resize(DisplayState *ds, int w, int h, int linesize)
+{
+    struct fbfront_dev *fb_dev = ds->opaque;
+    fprintf(stderr,"resize to %dx%d, %d required\n", w, h, linesize);
+    ds->width = w;
+    ds->height = h;
+    if (!linesize)
+        ds->shared_buf = 0;
+    if (!ds->shared_buf)
+        linesize = w * 4;
+    ds->linesize = linesize;
+    if (!fb_dev)
+        return;
+    if (ds->shared_buf) {
+        ds->data = NULL;
+    } else {
+        ds->data = nonshared_vram;
+        fbfront_resize(fb_dev, w, h, linesize, ds->depth, VGA_RAM_SIZE);
+    }
 }
 
 static void xenfb_pv_colourdepth(DisplayState *ds, int depth)
 {
-    /* TODO: send redepth event if supported */
+    struct fbfront_dev *fb_dev = ds->opaque;
     static int lastdepth = -1;
+    if (!depth) {
+        ds->shared_buf = 0;
+        ds->depth = 32;
+    } else {
+        ds->shared_buf = 1;
+        ds->depth = depth;
+    }
     if (depth != lastdepth) {
         fprintf(stderr,"redepth to %d required\n", depth);
         lastdepth = depth;
+    } else return;
+    if (!fb_dev)
+        return;
+    if (ds->shared_buf) {
+        ds->data = NULL;
+    } else {
+        ds->data = nonshared_vram;
+        fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, 
VGA_RAM_SIZE);
     }
-    /* We can't redepth for now */
-    ds->depth = DEPTH;
+}
+
+static void xenfb_pv_setdata(DisplayState *ds, void *pixels)
+{
+    struct fbfront_dev *fb_dev = ds->opaque;
+    int offset = pixels - vga_vram;
+    ds->data = pixels;
+    if (!fb_dev)
+        return;
+    fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, 
offset);
+}
+
+static void xenfb_pv_refresh(DisplayState *ds)
+{
+    vga_hw_update();
 }
 
 static void xenfb_kbd_handler(void *opaque)
@@ -1373,13 +1409,6 @@ static void xenfb_kbd_handler(void *opaq
     }
 }
 
-static void xenfb_pv_refresh(DisplayState *ds)
-{
-    /* always request negociation */
-    ds->depth = -1;
-    vga_hw_update();
-}
-
 static void kbdfront_thread(void *p)
 {
     int scancode, keycode;
@@ -1399,40 +1428,72 @@ static void kbdfront_thread(void *p)
 
 int xenfb_pv_display_init(DisplayState *ds)
 {
-    void *data;
+    if (!fb_path || !kbd_path)
+        return -1;
+
+    create_thread("kbdfront", kbdfront_thread, (void*) kbd_path);
+
+    xenfb_ds = ds;
+
+    ds->data = nonshared_vram = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
+    memset(ds->data, 0, VGA_RAM_SIZE);
+    ds->depth = 32;
+    ds->bgr = 0;
+    ds->width = 640;
+    ds->height = 400;
+    ds->linesize = 640 * 4;
+    ds->dpy_update = xenfb_pv_update;
+    ds->dpy_resize = xenfb_pv_resize;
+    ds->dpy_colourdepth = xenfb_pv_colourdepth;
+    ds->dpy_setdata = xenfb_pv_setdata;
+    ds->dpy_refresh = xenfb_pv_refresh;
+    return 0;
+}
+
+int xenfb_pv_display_start(void *data)
+{
+    DisplayState *ds = xenfb_ds;
     struct fbfront_dev *fb_dev;
     int kbd_fd;
+    int offset = 0;
+    unsigned long *mfns;
+    int n = VGA_RAM_SIZE / PAGE_SIZE;
+    int i;
 
     if (!fb_path || !kbd_path)
-        return -1;
-
-    create_thread("kbdfront", kbdfront_thread, (void*) kbd_path);
-
-    data = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
-    fb_dev = init_fbfront(fb_path, data, WIDTH, HEIGHT, DEPTH, LINESIZE, 
MEMSIZE);
+        return 0;
+
+    vga_vram = data;
+    mfns = malloc(2 * n * sizeof(*mfns));
+    for (i = 0; i < n; i++)
+        mfns[i] = virtual_to_mfn(vga_vram + i * PAGE_SIZE);
+    for (i = 0; i < n; i++)
+        mfns[n + i] = virtual_to_mfn(nonshared_vram + i * PAGE_SIZE);
+
+    fb_dev = init_fbfront(fb_path, mfns, ds->width, ds->height, ds->depth, 
ds->linesize, 2 * n);
+    free(mfns);
     if (!fb_dev) {
         fprintf(stderr,"can't open frame buffer\n");
         exit(1);
     }
     free(fb_path);
 
+    if (ds->shared_buf) {
+        offset = (void*) ds->data - vga_vram;
+    } else {
+        offset = VGA_RAM_SIZE;
+        ds->data = nonshared_vram;
+    }
+    if (offset)
+        fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, 
offset);
+
     down(&kbd_sem);
     free(kbd_path);
 
     kbd_fd = kbdfront_open(kbd_dev);
     qemu_set_fd_handler(kbd_fd, xenfb_kbd_handler, NULL, ds);
 
-    ds->data = data;
-    ds->linesize = LINESIZE;
-    ds->depth = DEPTH;
-    ds->bgr = 0;
-    ds->width = WIDTH;
-    ds->height = HEIGHT;
-    ds->dpy_update = xenfb_pv_update;
-    ds->dpy_resize = xenfb_pv_resize;
-    ds->dpy_colourdepth = xenfb_pv_colourdepth;
-    ds->dpy_refresh = xenfb_pv_refresh;
-    ds->opaque = fb_dev;
+    xenfb_ds->opaque = fb_dev;
     return 0;
 }
 #endif
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/tapdisk-ioemu.c
--- a/tools/ioemu/tapdisk-ioemu.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/tapdisk-ioemu.c       Thu Apr 24 14:08:29 2008 -0600
@@ -4,6 +4,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <signal.h>
+#include <unistd.h>
 #include <sys/time.h>
 
 #include <assert.h>
@@ -15,6 +16,8 @@ extern void bdrv_init(void);
 
 extern void *qemu_mallocz(size_t size);
 extern void qemu_free(void *ptr);
+
+extern void *fd_start;
 
 int domid = 0;
 FILE* logfile;
@@ -95,12 +98,17 @@ int main(void)
     int max_fd;
     fd_set rfds;
     struct timeval tv;
+    void *old_fd_start = NULL;
 
     logfile = stderr;
     
     bdrv_init();
     qemu_aio_init();
     init_blktap();
+
+    /* Daemonize */
+    if (fork() != 0)
+       exit(0);
    
     /* 
      * Main loop: Pass events to the corrsponding handlers and check for
@@ -137,6 +145,12 @@ int main(void)
             } else 
                 pioh = &ioh->next;
         }
+
+        /* Exit when the last image has been closed */
+        if (old_fd_start != NULL && fd_start == NULL)
+            exit(0);
+
+        old_fd_start = fd_start;
     }
     return 0;
 }
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c      Thu Apr 24 14:08:29 2008 -0600
@@ -482,7 +482,7 @@ void cpu_handle_ioreq(void *opaque)
     CPUState *env = opaque;
     ioreq_t *req = cpu_get_ioreq();
 
-    handle_buffered_io(env);
+    __handle_buffered_iopage(env);
     if (req) {
         __handle_ioreq(env, req);
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/vl.c  Thu Apr 24 14:08:29 2008 -0600
@@ -140,9 +140,9 @@
 #define MAX_IOPORTS 65536
 
 const char *bios_dir = CONFIG_QEMU_SHAREDIR;
-void **ioport_opaque;
-IOPortReadFunc *(*ioport_read_table)[MAX_IOPORTS];
-IOPortWriteFunc *(*ioport_write_table)[MAX_IOPORTS];
+void *ioport_opaque[MAX_IOPORTS];
+IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
+IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
 /* Note: bs_table[MAX_DISKS] is a dummy block driver if none available
    to store the VM snapshots */
 BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS + 1], *fd_table[MAX_FD];
@@ -281,9 +281,6 @@ void default_ioport_writel(void *opaque,
 
 void init_ioports(void)
 {
-    ioport_opaque = calloc(MAX_IOPORTS, sizeof(*ioport_opaque));
-    ioport_read_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_read_table));
-    ioport_write_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_write_table));
 }
 
 /* size is the word size in byte */
@@ -6276,12 +6273,6 @@ void qemu_system_powerdown_request(void)
     powerdown_requested = 1;
     if (cpu_single_env)
         cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
-}
-
-static void qemu_sighup_handler(int signal)
-{
-    fprintf(stderr, "Received SIGHUP, terminating.\n");
-    exit(0);
 }
 
 void main_loop_wait(int timeout)
@@ -7979,7 +7970,7 @@ int main(int argc, char **argv)
 
 #ifndef CONFIG_STUBDOM
     /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */
-    signal(SIGHUP, qemu_sighup_handler);
+    signal(SIGHUP, SIG_DFL);
     sigemptyset(&set);
     sigaddset(&set, SIGTERM);
     sigaddset(&set, SIGHUP);
diff -r 239b44eeb2d6 -r dc510776dd59 tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/ioemu/vl.h  Thu Apr 24 14:08:29 2008 -0600
@@ -1545,6 +1545,7 @@ char *xenstore_vm_read(int domid, char *
 
 /* xenfb.c */
 int xenfb_pv_display_init(DisplayState *ds);
+int xenfb_pv_display_start(void *vram_start);
 int xenfb_connect_vkbd(const char *path);
 int xenfb_connect_vfb(const char *path);
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/libfsimage/ext2fs/fsys_ext2fs.c
--- a/tools/libfsimage/ext2fs/fsys_ext2fs.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/libfsimage/ext2fs/fsys_ext2fs.c     Thu Apr 24 14:08:29 2008 -0600
@@ -77,7 +77,52 @@ struct ext2_super_block
     __u32 s_rev_level;         /* Revision level */
     __u16 s_def_resuid;                /* Default uid for reserved blocks */
     __u16 s_def_resgid;                /* Default gid for reserved blocks */
-    __u32 s_reserved[235];     /* Padding to the end of the block */
+    /*
+     * These fields are for EXT2_DYNAMIC_REV superblocks only.
+     *
+     * Note: the difference between the compatible feature set and
+     * the incompatible feature set is that if there is a bit set
+     * in the incompatible feature set that the kernel doesn't
+     * know about, it should refuse to mount the filesystem.
+     *
+     * e2fsck's requirements are more strict; if it doesn't know
+     * about a feature in either the compatible or incompatible
+     * feature set, it must abort and not try to meddle with
+     * things it doesn't understand...
+     */
+    __u32 s_first_ino;         /* First non-reserved inode */
+    __u16 s_inode_size;                /* size of inode structure */
+    __u16 s_block_group_nr;    /* block group # of this superblock */
+    __u32 s_feature_compat;    /* compatible feature set */
+    __u32 s_feature_incompat;  /* incompatible feature set */
+    __u32 s_feature_ro_compat; /* readonly-compatible feature set */
+    __u8  s_uuid[16];          /* 128-bit uuid for volume */
+    char  s_volume_name[16];   /* volume name */
+    char  s_last_mounted[64];  /* directory where last mounted */
+    __u32 s_algorithm_usage_bitmap; /* For compression */
+    /*
+     * Performance hints.  Directory preallocation should only
+     * happen if the EXT2_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+     */
+    __u8  s_prealloc_blocks;   /* Nr of blocks to try to preallocate*/
+    __u8  s_prealloc_dir_blocks;       /* Nr to preallocate for dirs */
+    __u16 s_reserved_gdt_blocks;/* Per group table for online growth */
+    /*
+     * Journaling support valid if EXT2_FEATURE_COMPAT_HAS_JOURNAL set.
+     */
+    __u8 s_journal_uuid[16];   /* uuid of journal superblock */
+    __u32 s_journal_inum;      /* inode number of journal file */
+    __u32 s_journal_dev;       /* device number of journal file */
+    __u32 s_last_orphan;       /* start of list of inodes to delete */
+    __u32 s_hash_seed[4];      /* HTREE hash seed */
+    __u8  s_def_hash_version;  /* Default hash version to use */
+    __u8  s_jnl_backup_type;   /* Default type of journal backup */
+    __u16 s_reserved_word_pad;
+    __u32 s_default_mount_opts;
+    __u32 s_first_meta_bg;     /* First metablock group */
+    __u32 s_mkfs_time;         /* When the filesystem was created */
+    __u32 s_jnl_blocks[17];    /* Backup of the journal inode */
+    __u32 s_reserved[172];     /* Padding to the end of the block */
   };
 
 struct ext2_group_desc
@@ -216,6 +261,9 @@ struct ext2_dir_entry
 #define EXT2_ADDR_PER_BLOCK(s)          (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
 #define EXT2_ADDR_PER_BLOCK_BITS(s)            (log2(EXT2_ADDR_PER_BLOCK(s)))
 
+#define EXT2_INODE_SIZE(s)             (SUPERBLOCK->s_inode_size)
+#define EXT2_INODES_PER_BLOCK(s)       (EXT2_BLOCK_SIZE(s)/EXT2_INODE_SIZE(s))
+
 /* linux/ext2_fs.h */
 #define EXT2_BLOCK_SIZE_BITS(s)        ((s)->s_log_block_size + 10)
 /* kind of from ext2/super.c */
@@ -537,7 +585,7 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna
       gdp = GROUP_DESC;
       ino_blk = gdp[desc].bg_inode_table +
        (((current_ino - 1) % (SUPERBLOCK->s_inodes_per_group))
-        >> log2 (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode)));
+        >> log2 (EXT2_INODES_PER_BLOCK (SUPERBLOCK)));
 #ifdef E2DEBUG
       printf ("inode table fsblock=%d\n", ino_blk);
 #endif /* E2DEBUG */
@@ -549,13 +597,12 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna
       /* reset indirect blocks! */
       mapblock2 = mapblock1 = -1;
 
-      raw_inode = INODE +
-       ((current_ino - 1)
-        & (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode) - 1));
+      raw_inode = (struct ext2_inode *)((char *)INODE +
+       ((current_ino - 1) & (EXT2_INODES_PER_BLOCK (SUPERBLOCK) - 1)) *
+       EXT2_INODE_SIZE (SUPERBLOCK));
 #ifdef E2DEBUG
       printf ("ipb=%d, sizeof(inode)=%d\n",
-             (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode)),
-             sizeof (struct ext2_inode));
+             EXT2_INODES_PER_BLOCK (SUPERBLOCK), EXT2_INODE_SIZE (SUPERBLOCK));
       printf ("inode=%x, raw_inode=%x\n", INODE, raw_inode);
       printf ("offset into inode table block=%d\n", (int) raw_inode - (int) 
INODE);
       for (i = (unsigned char *) INODE; i <= (unsigned char *) raw_inode;
diff -r 239b44eeb2d6 -r dc510776dd59 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/libxc/xc_hvm_build.c        Thu Apr 24 14:08:29 2008 -0600
@@ -298,7 +298,7 @@ static int setup_guest(int xc_handle,
                        _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
     munmap(ident_pt, PAGE_SIZE);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
-                     special_page_nr + SPECIALPAGE_IDENT_PT);
+                     (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
 
     /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
     entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/util/acmpolicy.py
--- a/tools/python/xen/util/acmpolicy.py        Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/util/acmpolicy.py        Thu Apr 24 14:08:29 2008 -0600
@@ -17,6 +17,7 @@
 #============================================================================
 
 import os
+import sha
 import stat
 import array
 import struct
@@ -35,7 +36,7 @@ ACM_POLICIES_DIR = security.policy_dir_p
 
 # Constants needed for generating a binary policy from its XML
 # representation
-ACM_POLICY_VERSION = 3  # Latest one
+ACM_POLICY_VERSION = 4  # Latest one
 ACM_CHWALL_VERSION = 1
 
 ACM_STE_VERSION = 1
@@ -965,6 +966,10 @@ class ACMPolicy(XSPolicy):
             return dom.toxml()
         return None
 
+    def hash(self):
+        """ Calculate a SAH1 hash of the XML policy """
+        return sha.sha(self.toxml())
+
     def save(self):
         ### Save the XML policy into a file ###
         rc = -xsconstants.XSERR_FILE_ERROR
@@ -1403,7 +1408,7 @@ class ACMPolicy(XSPolicy):
             ste_bin += "\x00"
 
         #Write binary header:
-        headerformat="!iiiiiiiiii"
+        headerformat="!iiiiiiiiii20s"
         totallen_bin = struct.calcsize(headerformat) + \
                        len(pr_bin) + len(chw_bin) + len(ste_bin)
         polref_offset = struct.calcsize(headerformat)
@@ -1425,7 +1430,8 @@ class ACMPolicy(XSPolicy):
                               primpoloffset,
                               secpolcode,
                               secpoloffset,
-                              major, minor)
+                              major, minor,
+                              self.hash().digest())
 
         all_bin = array.array('B')
         for s in [ hdr_bin, pr_bin, chw_bin, ste_bin ]:
@@ -1443,6 +1449,21 @@ class ACMPolicy(XSPolicy):
             rc = -xsconstants.XSERR_BAD_LABEL
         return rc, mapfile, all_bin.tostring()
 
+    def validate_enforced_policy_hash(self):
+        """ verify that the policy hash embedded in the binary policy
+            that is currently enforce matches the one of the XML policy.
+        """
+        if self.hash().digest() != self.get_enforced_policy_hash():
+            raise Exception('Policy hashes do not match')
+
+    def get_enforced_policy_hash(self):
+        binpol = self.get_enforced_binary()
+        headerformat="!iiiiiiiiii20s"
+        res = struct.unpack(headerformat, binpol[:60])
+        if len(res) >= 11:
+            return res[10]
+        return None
+
     def get_enforced_binary(self):
         rc, binpol = security.hv_get_policy()
         if rc != 0:
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Thu Apr 24 14:08:29 2008 -0600
@@ -1622,7 +1622,31 @@ class XendDomain:
                                           vcpu)
         except Exception, ex:
             raise XendError(str(ex))
- 
+
+    def domain_reset(self, domid):
+        """Terminate domain immediately, and then create domain.
+
+        @param domid: Domain ID or Name
+        @type domid: int or string.
+        @rtype: None
+        @raise XendError: Failed to destroy or create
+        @raise XendInvalidDomain: Domain is not valid
+        """
+
+        dominfo = self.domain_lookup_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        if dominfo and dominfo.getDomid() == DOM0_ID:
+            raise XendError("Cannot reset privileged domain %s" % domid)
+        if dominfo._stateGet() not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+            raise VMBadState("Domain '%s' is not started" % domid,
+                             POWER_STATE_NAMES[DOM_STATE_RUNNING],
+                             POWER_STATE_NAMES[dominfo._stateGet()])
+        try:
+            dominfo.resetDomain()
+        except Exception, ex:
+            raise XendError(str(ex))
+
 
 def instance():
     """Singleton constructor. Use this instead of the class constructor.
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Apr 24 14:08:29 2008 -0600
@@ -1837,6 +1837,9 @@ class XendDomainInfo:
 
         @raise: VmError for invalid devices
         """
+        if self.image:
+            self.image.prepareEnvironment()
+
         ordered_refs = self.info.ordered_device_refs()
         for dev_uuid in ordered_refs:
             devclass, config = self.info['devices'][dev_uuid]
@@ -2323,6 +2326,34 @@ class XendDomainInfo:
         self._cleanup_phantom_devs(paths)
 
 
+    def resetDomain(self):
+        log.debug("XendDomainInfo.resetDomain(%s)", str(self.domid))
+
+        old_domid = self.domid
+        prev_vm_xend = self._listRecursiveVm('xend')
+        new_dom_info = self.info
+        try:
+            self._unwatchVm()
+            self.destroy()
+
+            new_dom = None
+            try:
+                from xen.xend import XendDomain
+                new_dom_info['domid'] = None
+                new_dom = XendDomain.instance().domain_create_from_dict(
+                    new_dom_info)
+                for x in prev_vm_xend[0][1]:
+                    new_dom._writeVm('xend/%s' % x[0], x[1])
+                new_dom.waitForDevices()
+                new_dom.unpause()
+            except:
+                if new_dom:
+                    new_dom.destroy()
+                raise
+        except:
+            log.exception('Failed to reset domain %s.', str(old_domid))
+
+
     def resumeDomain(self):
         log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid))
 
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/XendXSPolicyAdmin.py
--- a/tools/python/xen/xend/XendXSPolicyAdmin.py        Thu Apr 24 14:02:16 
2008 -0600
+++ b/tools/python/xen/xend/XendXSPolicyAdmin.py        Thu Apr 24 14:08:29 
2008 -0600
@@ -54,6 +54,7 @@ class XSPolicyAdmin:
         try:
             self.xsobjs[ref] = ACMPolicy(name=act_pol_name, ref=ref)
             self.policies[ref] = (act_pol_name, xsconstants.ACM_POLICY_ID)
+            self.xsobjs[ref].validate_enforced_policy_hash()
         except Exception, e:
             log.error("Could not find XML representation of policy '%s': "
                       "%s" % (act_pol_name,e))
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xend/image.py    Thu Apr 24 14:08:29 2008 -0600
@@ -184,6 +184,42 @@ class ImageHandler:
     def buildDomain(self):
         """Build the domain. Define in subclass."""
         raise NotImplementedError()
+
+    def prepareEnvironment(self):
+        """Prepare the environment for the execution of the domain. This
+        method is called before any devices are set up."""
+        
+        domid = self.vm.getDomid()
+       
+        # Delete left-over pipes
+        try:
+            os.unlink('/var/run/tap/qemu-read-%d' % domid)
+            os.unlink('/var/run/tap/qemu-write-%d' % domid)
+        except:
+            pass
+
+        # No device model, don't create pipes
+        if self.device_model is None:
+            return
+
+        # If we use a device model, the pipes for communication between
+        # blktapctrl and ioemu must be present before the devices are 
+        # created (blktapctrl must access them for new block devices)
+
+        # mkdir throws an exception if the path already exists
+        try:
+            os.mkdir('/var/run/tap', 0755)
+        except:
+            pass
+
+        try:
+            os.mkfifo('/var/run/tap/qemu-read-%d' % domid, 0600)
+            os.mkfifo('/var/run/tap/qemu-write-%d' % domid, 0600)
+        except OSError, e:
+            log.warn('Could not create blktap pipes for domain %d' % domid)
+            log.exception(e)
+            pass
+
 
     # Return a list of cmd line args to the device models based on the
     # xm config file
@@ -411,6 +447,12 @@ class ImageHandler:
             self.pid = None
             state = xstransact.Remove("/local/domain/0/device-model/%i"
                                       % self.vm.getDomid())
+            
+            try:
+                os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid())
+                os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid())
+            except:
+                pass
 
 
 class LinuxImageHandler(ImageHandler):
@@ -643,7 +685,9 @@ class IA64_HVM_ImageHandler(HVMImageHand
         # ROM size for guest firmware, io page, xenstore page
         # buffer io page, buffer pio page and memmap info page
         extra_pages = 1024 + 5
-        return mem_kb + extra_pages * page_kb
+        mem_kb += extra_pages * page_kb
+        # Add 8 MiB overhead for QEMU's video RAM.
+        return mem_kb + 8192
 
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
diff -r 239b44eeb2d6 -r dc510776dd59 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/python/xen/xm/main.py       Thu Apr 24 14:08:29 2008 -0600
@@ -107,6 +107,7 @@ SUBCOMMAND_HELP = {
                      'Migrate a domain to another machine.'),
     'pause'       : ('<Domain>', 'Pause execution of a domain.'),
     'reboot'      : ('<Domain> [-wa]', 'Reboot a domain.'),
+    'reset'       : ('<Domain>', 'Reset a domain.'),
     'restore'     : ('<CheckpointFile> [-p]',
                      'Restore a domain from a saved state.'),
     'save'        : ('[-c] <Domain> <CheckpointFile>',
@@ -274,6 +275,7 @@ common_commands = [
     "migrate",
     "pause",
     "reboot",
+    "reset",
     "restore",
     "resume",
     "save",
@@ -303,6 +305,7 @@ domain_commands = [
     "pause",
     "reboot",
     "rename",
+    "reset",
     "restore",
     "resume",
     "save",
@@ -1247,6 +1250,13 @@ def xm_shutdown(args):
     arg_check(args, "shutdown", 1, 4)
     from xen.xm import shutdown
     shutdown.main(["shutdown"] + args)
+
+def xm_reset(args):
+    arg_check(args, "reset", 1)
+    dom = args[0]
+
+    # TODO: XenAPI
+    server.xend.domain.reset(dom)
 
 def xm_pause(args):
     arg_check(args, "pause", 1)
@@ -2474,6 +2484,7 @@ commands = {
     "dump-core": xm_dump_core,
     "reboot": xm_reboot,
     "rename": xm_rename,
+    "reset": xm_reset,
     "restore": xm_restore,
     "resume": xm_resume,
     "save": xm_save,
diff -r 239b44eeb2d6 -r dc510776dd59 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/tests/test_x86_emulator.c   Thu Apr 24 14:08:29 2008 -0600
@@ -26,14 +26,8 @@ static int read(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    unsigned long addr = offset;
-    switch ( bytes )
-    {
-    case 1: *val = *(uint8_t *)addr; break;
-    case 2: *val = *(uint16_t *)addr; break;
-    case 4: *val = *(uint32_t *)addr; break;
-    case 8: *val = *(unsigned long *)addr; break;
-    }
+    *val = 0;
+    memcpy(val, (void *)offset, bytes);
     return X86EMUL_OKAY;
 }
 
@@ -44,48 +38,19 @@ static int write(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    unsigned long addr = offset;
-    switch ( bytes )
-    {
-    case 1: *(uint8_t *)addr = (uint8_t)val; break;
-    case 2: *(uint16_t *)addr = (uint16_t)val; break;
-    case 4: *(uint32_t *)addr = (uint32_t)val; break;
-    case 8: *(unsigned long *)addr = val; break;
-    }
+    memcpy((void *)offset, &val, bytes);
     return X86EMUL_OKAY;
 }
 
 static int cmpxchg(
     unsigned int seg,
     unsigned long offset,
-    unsigned long old,
-    unsigned long new,
+    void *old,
+    void *new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    unsigned long addr = offset;
-    switch ( bytes )
-    {
-    case 1: *(uint8_t *)addr = (uint8_t)new; break;
-    case 2: *(uint16_t *)addr = (uint16_t)new; break;
-    case 4: *(uint32_t *)addr = (uint32_t)new; break;
-    case 8: *(unsigned long *)addr = new; break;
-    }
-    return X86EMUL_OKAY;
-}
-
-static int cmpxchg8b(
-    unsigned int seg,
-    unsigned long offset,
-    unsigned long old_lo,
-    unsigned long old_hi,
-    unsigned long new_lo,
-    unsigned long new_hi,
-    struct x86_emulate_ctxt *ctxt)
-{
-    unsigned long addr = offset;
-    ((unsigned long *)addr)[0] = new_lo;
-    ((unsigned long *)addr)[1] = new_hi;
+    memcpy((void *)offset, new, bytes);
     return X86EMUL_OKAY;
 }
 
@@ -94,7 +59,6 @@ static struct x86_emulate_ops emulops = 
     .insn_fetch = read,
     .write      = write,
     .cmpxchg    = cmpxchg,
-    .cmpxchg8b  = cmpxchg8b
 };
 
 int main(int argc, char **argv)
diff -r 239b44eeb2d6 -r dc510776dd59 tools/tests/x86_emulate.c
--- a/tools/tests/x86_emulate.c Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/tests/x86_emulate.c Thu Apr 24 14:08:29 2008 -0600
@@ -4,10 +4,4 @@
 #include <public/xen.h>
 
 #include "x86_emulate/x86_emulate.h"
-
-#define __emulate_fpu_insn(_op)                 \
-do{ rc = X86EMUL_UNHANDLEABLE;                  \
-    goto done;                                  \
-} while (0)
-
 #include "x86_emulate/x86_emulate.c"
diff -r 239b44eeb2d6 -r dc510776dd59 tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/tools/xenmon/xenbaked.c   Thu Apr 24 14:08:29 2008 -0600
@@ -509,14 +509,36 @@ int monitor_tbufs(void)
     {
         for ( i = 0; (i < num) && !interrupted; i++ )
         {
-            while ( meta[i]->cons != meta[i]->prod )
+            unsigned long start_offset, end_offset, cons, prod;
+
+            cons = meta[i]->cons;
+            prod = meta[i]->prod;
+            xen_rmb(); /* read prod, then read item. */
+
+            if ( cons == prod )
+                continue;
+
+            start_offset = cons % data_size;
+            end_offset = prod % data_size;
+
+            if ( start_offset >= end_offset )
             {
-                xen_rmb(); /* read prod, then read item. */
+                while ( start_offset != data_size )
+                {
+                    rec_size = process_record(
+                        i, (struct t_rec *)(data[i] + start_offset));
+                    start_offset += rec_size;
+                }
+                start_offset = 0;
+            }
+            while ( start_offset != end_offset )
+            {
                 rec_size = process_record(
-                    i, (struct t_rec *)(data[i] + meta[i]->cons % data_size));
-                xen_mb(); /* read item, then update cons. */
-                meta[i]->cons += rec_size;
+                    i, (struct t_rec *)(data[i] + start_offset));
+                start_offset += rec_size;
             }
+            xen_mb(); /* read item, then update cons. */
+            meta[i]->cons = prod;
         }
 
        wait_for_event();
diff -r 239b44eeb2d6 -r dc510776dd59 xen/Makefile
--- a/xen/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/Makefile      Thu Apr 24 14:08:29 2008 -0600
@@ -44,6 +44,7 @@ _clean: delete-unfresh-files
        $(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) clean
        rm -f include/asm *.o $(TARGET)* *~ core
        rm -f include/asm-*/asm-offsets.h
+       [ -d tools/figlet ] && rm -f .banner*
 
 .PHONY: _distclean
 _distclean: clean
@@ -70,8 +71,14 @@ delete-unfresh-files:
                rm -f include/xen/compile.h; \
        fi
 
+.banner: Makefile
+       $(MAKE) -C tools
+       @tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) 2>$@2 >$@1
+       @cat $@1 $@2 >$@
+       @rm -f $@1 $@2
+
 # compile.h contains dynamic build info. Rebuilt on every 'make' invocation.
-include/xen/compile.h: include/xen/compile.h.in
+include/xen/compile.h: include/xen/compile.h.in .banner
        @sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \
            -e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \
            -e 's/@@whoami@@/$(USER)/g' \
@@ -83,7 +90,8 @@ include/xen/compile.h: include/xen/compi
            -e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \
            -e 's!@@changeset@@!$(shell ((hg parents --template "{date|date} 
{rev}:{node|short}" >/dev/null && hg parents --template "{date|date} 
{rev}:{node|short}") || echo "unavailable") 2>/dev/null)!g' \
            < include/xen/compile.h.in > $@.new
-       tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) >> $@.new
+       @grep \" .banner >> $@.new
+       @grep -v \" .banner
        @mv -f $@.new $@
 
 include/asm-$(TARGET_ARCH)/asm-offsets.h: arch/$(TARGET_ARCH)/asm-offsets.s
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/Makefile     Thu Apr 24 14:08:29 2008 -0600
@@ -52,6 +52,8 @@ obj-y += tboot.o
 
 obj-$(crash_debug) += gdbstub.o
 
+x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
+
 $(TARGET): $(TARGET)-syms boot/mkelf32
        ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
        `$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/bitops.c
--- a/xen/arch/x86/bitops.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/bitops.c     Thu Apr 24 14:08:29 2008 -0600
@@ -8,17 +8,18 @@ unsigned int __find_first_bit(
     unsigned long d0, d1, res;
 
     asm volatile (
-        "   xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */
+        "1: xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */
         "   repe; scas"__OS"\n\t"
-        "   je 1f\n\t"
+        "   je 2f\n\t"
+        "   bsf -"STR(BITS_PER_LONG/8)"(%2),%0\n\t"
+        "   jz 1b\n\t"
         "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
-        "   bsf (%2),%0\n"
-        "1: sub %%ebx,%%edi\n\t"
+        "2: sub %%ebx,%%edi\n\t"
         "   shl $3,%%edi\n\t"
         "   add %%edi,%%eax"
         : "=&a" (res), "=&c" (d0), "=&D" (d1)
-        : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
-          "2" (addr), "b" ((int)(long)addr) : "memory" );
+        : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr)
+        : "memory" );
 
     return res;
 }
@@ -34,8 +35,7 @@ unsigned int __find_next_bit(
     if ( bit != 0 )
     {
         /* Look for a bit in the first word. */
-        asm ( "bsf %1,%%"__OP"ax"
-              : "=a" (set) : "r" (*p >> bit), "0" (BITS_PER_LONG) );
+        set = __scanbit(*p >> bit, BITS_PER_LONG - bit);
         if ( set < (BITS_PER_LONG - bit) )
             return (offset + set);
         offset += BITS_PER_LONG - bit;
@@ -56,18 +56,20 @@ unsigned int __find_first_zero_bit(
     unsigned long d0, d1, d2, res;
 
     asm volatile (
+        "1: xor %%eax,%%eax ; not %3\n\t" /* rAX == ~0ul */
         "   xor %%edx,%%edx\n\t" /* also ensures ZF==1 if size==0 */
         "   repe; scas"__OS"\n\t"
-        "   je 1f\n\t"
+        "   je 2f\n\t"
+        "   xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t"
+        "   jz 1b\n\t"
+        "   bsf %3,%0\n\t"
         "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
-        "   xor (%2),%3\n\t"
-        "   bsf %3,%0\n"
-        "1: sub %%ebx,%%edi\n\t"
+        "2: sub %%ebx,%%edi\n\t"
         "   shl $3,%%edi\n\t"
         "   add %%edi,%%edx"
         : "=&d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-        : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
-          "2" (addr), "b" ((int)(long)addr), "3" (-1L) : "memory" );
+        : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr)
+        : "memory" );
 
     return res;
 }
@@ -83,7 +85,7 @@ unsigned int __find_next_zero_bit(
     if ( bit != 0 )
     {
         /* Look for zero in the first word. */
-        asm ( "bsf %1,%%"__OP"ax" : "=a" (set) : "r" (~(*p >> bit)) );
+        set = __scanbit(~(*p >> bit), BITS_PER_LONG - bit);
         if ( set < (BITS_PER_LONG - bit) )
             return (offset + set);
         offset += BITS_PER_LONG - bit;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/cpu/mtrr/main.c
--- a/xen/arch/x86/cpu/mtrr/main.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/cpu/mtrr/main.c      Thu Apr 24 14:08:29 2008 -0600
@@ -586,8 +586,6 @@ struct mtrr_value {
        unsigned long   lsize;
 };
 
-extern void global_init_mtrr_pat(void);
-
 /**
  * mtrr_bp_init - initialize mtrrs on the boot CPU
  *
@@ -654,11 +652,8 @@ void __init mtrr_bp_init(void)
        if (mtrr_if) {
                set_num_var_ranges();
                init_table();
-               if (use_intel()) {
+               if (use_intel())
                        get_mtrr_state();
-                       /* initialize some global data for MTRR/PAT 
virutalization */
-                       global_init_mtrr_pat();
-               }
        }
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/domain.c     Thu Apr 24 14:08:29 2008 -0600
@@ -521,10 +521,10 @@ int arch_domain_create(struct domain *d,
         clear_page(d->shared_info);
         share_xen_page_with_guest(
             virt_to_page(d->shared_info), d, XENSHARE_writable);
-    }
-
-    if ( (rc = iommu_domain_init(d)) != 0 )
-        goto fail;
+
+        if ( (rc = iommu_domain_init(d)) != 0 )
+            goto fail;
+    }
 
     if ( is_hvm_domain(d) )
     {
@@ -562,7 +562,8 @@ void arch_domain_destroy(struct domain *
     if ( is_hvm_domain(d) )
         hvm_domain_destroy(d);
 
-    iommu_domain_destroy(d);
+    if ( !is_idle_domain(d) )
+        iommu_domain_destroy(d);
 
     paging_final_teardown(d);
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/domain_build.c       Thu Apr 24 14:08:29 2008 -0600
@@ -957,8 +957,8 @@ int __init construct_dom0(
     rc |= ioports_deny_access(dom0, 0x40, 0x43);
     /* PIT Channel 2 / PC Speaker Control. */
     rc |= ioports_deny_access(dom0, 0x61, 0x61);
-    /* PCI configuration spaces. */
-    rc |= ioports_deny_access(dom0, 0xcf8, 0xcff);
+    /* PCI configuration space (NB. 0xcf8 has special treatment). */
+    rc |= ioports_deny_access(dom0, 0xcfc, 0xcff);
     /* Command-line I/O ranges. */
     process_dom0_ioports_disable();
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/emulate.c        Thu Apr 24 14:08:29 2008 -0600
@@ -28,6 +28,33 @@ static int hvmemul_do_io(
     ioreq_t *p = &vio->vp_ioreq;
     int rc;
 
+    /* Only retrieve the value from singleton (non-REP) reads. */
+    ASSERT((val == NULL) || ((dir == IOREQ_READ) && !value_is_ptr));
+
+    if ( is_mmio && !value_is_ptr )
+    {
+        /* Part of a multi-cycle read or write? */
+        if ( dir == IOREQ_WRITE )
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
+            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
+                return X86EMUL_OKAY;
+        }
+        else
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
+            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
+            {
+                *val = 0;
+                memcpy(val, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
+                       size);
+                return X86EMUL_OKAY;
+            }
+        }
+    }
+
     switch ( curr->arch.hvm_vcpu.io_state )
     {
     case HVMIO_none:
@@ -36,8 +63,13 @@ static int hvmemul_do_io(
         curr->arch.hvm_vcpu.io_state = HVMIO_none;
         if ( val == NULL )
             return X86EMUL_UNHANDLEABLE;
-        *val = curr->arch.hvm_vcpu.io_data;
-        return X86EMUL_OKAY;
+        goto finish_access;
+    case HVMIO_dispatched:
+        /* May have to wait for previous cycle of a multi-write to complete. */
+        if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) &&
+             (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa +
+                       curr->arch.hvm_vcpu.mmio_large_write_bytes)) )
+            return X86EMUL_RETRY;
     default:
         return X86EMUL_UNHANDLEABLE;
     }
@@ -80,8 +112,6 @@ static int hvmemul_do_io(
         *reps = p->count;
         p->state = STATE_IORESP_READY;
         hvm_io_assist();
-        if ( val != NULL )
-            *val = curr->arch.hvm_vcpu.io_data;
         curr->arch.hvm_vcpu.io_state = HVMIO_none;
         break;
     case X86EMUL_UNHANDLEABLE:
@@ -92,7 +122,43 @@ static int hvmemul_do_io(
         BUG();
     }
 
-    return rc;
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+ finish_access:
+    if ( val != NULL )
+        *val = curr->arch.hvm_vcpu.io_data;
+
+    if ( is_mmio && !value_is_ptr )
+    {
+        /* Part of a multi-cycle read or write? */
+        if ( dir == IOREQ_WRITE )
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
+            if ( bytes == 0 )
+                pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr;
+            if ( addr == (pa + bytes) )
+                curr->arch.hvm_vcpu.mmio_large_write_bytes += size;
+        }
+        else
+        {
+            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
+            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
+            if ( bytes == 0 )
+                pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr;
+            if ( (addr == (pa + bytes)) &&
+                 ((bytes + size) <
+                  sizeof(curr->arch.hvm_vcpu.mmio_large_read)) )
+            {
+                memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
+                       val, size);
+                curr->arch.hvm_vcpu.mmio_large_read_bytes += size;
+            }
+        }
+    }
+
+    return X86EMUL_OKAY;
 }
 
 static int hvmemul_do_pio(
@@ -371,11 +437,15 @@ static int hvmemul_cmpxchg(
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
-    unsigned long old,
-    unsigned long new,
+    void *p_old,
+    void *p_new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned long new = 0;
+    if ( bytes > sizeof(new) )
+        return X86EMUL_UNHANDLEABLE;
+    memcpy(&new, p_new, bytes);
     /* Fix this in case the guest is really relying on r-m-w atomicity. */
     return hvmemul_write(seg, offset, new, bytes, ctxt);
 }
@@ -603,7 +673,7 @@ static int hvmemul_read_msr(
 
     _regs.ecx = (uint32_t)reg;
 
-    if ( (rc = hvm_funcs.msr_read_intercept(&_regs)) != 0 )
+    if ( (rc = hvm_msr_read_intercept(&_regs)) != 0 )
         return rc;
 
     *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
@@ -621,7 +691,7 @@ static int hvmemul_write_msr(
     _regs.eax = (uint32_t)val;
     _regs.ecx = (uint32_t)reg;
 
-    return hvm_funcs.msr_write_intercept(&_regs);
+    return hvm_msr_write_intercept(&_regs);
 }
 
 static int hvmemul_wbinvd(
@@ -674,11 +744,40 @@ static int hvmemul_inject_sw_interrupt(
     return X86EMUL_OKAY;
 }
 
-static void hvmemul_load_fpu_ctxt(
-    struct x86_emulate_ctxt *ctxt)
-{
-    if ( !current->fpu_dirtied )
+static int hvmemul_get_fpu(
+    void (*exception_callback)(void *, struct cpu_user_regs *),
+    void *exception_callback_arg,
+    enum x86_emulate_fpu_type type,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    switch ( type )
+    {
+    case X86EMUL_FPU_fpu:
+        break;
+    case X86EMUL_FPU_mmx:
+        if ( !cpu_has_mmx )
+            return X86EMUL_UNHANDLEABLE;
+        break;
+    default:
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
+
+    curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
+    curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
+
+    return X86EMUL_OKAY;
+}
+
+static void hvmemul_put_fpu(
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+    curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
 }
 
 static int hvmemul_invlpg(
@@ -720,7 +819,8 @@ static struct x86_emulate_ops hvm_emulat
     .cpuid         = hvmemul_cpuid,
     .inject_hw_exception = hvmemul_inject_hw_exception,
     .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
-    .load_fpu_ctxt = hvmemul_load_fpu_ctxt,
+    .get_fpu       = hvmemul_get_fpu,
+    .put_fpu       = hvmemul_put_fpu,
     .invlpg        = hvmemul_invlpg
 };
 
@@ -763,6 +863,11 @@ int hvm_emulate_one(
     hvmemul_ctxt->exn_pending = 0;
 
     rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
+
+    if ( rc != X86EMUL_RETRY )
+        curr->arch.hvm_vcpu.mmio_large_read_bytes =
+            curr->arch.hvm_vcpu.mmio_large_write_bytes = 0;
+
     if ( rc != X86EMUL_OKAY )
         return rc;
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Thu Apr 24 14:08:29 2008 -0600
@@ -494,14 +494,14 @@ static int hvm_load_cpu_ctxt(struct doma
          ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )
     {
         gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",
-                 ctxt.msr_efer);
+                 ctxt.cr0);
         return -EINVAL;
     }
 
     if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )
     {
         gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",
-                 ctxt.msr_efer);
+                 ctxt.cr4);
         return -EINVAL;
     }
 
@@ -620,8 +620,6 @@ HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_
 HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
                           1, HVMSR_PER_VCPU);
 
-extern int reset_vmsr(struct mtrr_state *m, u64 *p);
-
 int hvm_vcpu_initialise(struct vcpu *v)
 {
     int rc;
@@ -647,7 +645,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
     spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
     INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
 
-    rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr);
+    rc = hvm_vcpu_cacheattr_init(v);
     if ( rc != 0 )
         goto fail3;
 
@@ -681,6 +679,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
 
 void hvm_vcpu_destroy(struct vcpu *v)
 {
+    hvm_vcpu_cacheattr_destroy(v);
     vlapic_destroy(v);
     hvm_funcs.vcpu_destroy(v);
 
@@ -1604,6 +1603,9 @@ void hvm_cpuid(unsigned int input, unsig
         *ebx &= 0x0000FFFFu;
         *ebx |= (current->vcpu_id * 2) << 24;
 
+        /* We always support MTRR MSRs. */
+        *edx |= bitmaskof(X86_FEATURE_MTRR);
+
         *ecx &= (bitmaskof(X86_FEATURE_XMM3) |
                  bitmaskof(X86_FEATURE_SSSE3) |
                  bitmaskof(X86_FEATURE_CX16) |
@@ -1653,6 +1655,146 @@ void hvm_cpuid(unsigned int input, unsig
 #endif
         break;
     }
+}
+
+int hvm_msr_read_intercept(struct cpu_user_regs *regs)
+{
+    uint32_t ecx = regs->ecx;
+    uint64_t msr_content = 0;
+    struct vcpu *v = current;
+    uint64_t *var_range_base, *fixed_range_base;
+    int index;
+
+    var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges;
+    fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges;
+
+    switch ( ecx )
+    {
+    case MSR_IA32_TSC:
+        msr_content = hvm_get_guest_time(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        break;
+
+    case MSR_IA32_MCG_CAP:
+    case MSR_IA32_MCG_STATUS:
+    case MSR_IA32_MC0_STATUS:
+    case MSR_IA32_MC1_STATUS:
+    case MSR_IA32_MC2_STATUS:
+    case MSR_IA32_MC3_STATUS:
+    case MSR_IA32_MC4_STATUS:
+    case MSR_IA32_MC5_STATUS:
+        /* No point in letting the guest see real MCEs */
+        msr_content = 0;
+        break;
+
+    case MSR_IA32_CR_PAT:
+        msr_content = v->arch.hvm_vcpu.pat_cr;
+        break;
+
+    case MSR_MTRRcap:
+        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
+        break;
+    case MSR_MTRRdefType:
+        msr_content = v->arch.hvm_vcpu.mtrr.def_type
+                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
+        break;
+    case MSR_MTRRfix64K_00000:
+        msr_content = fixed_range_base[0];
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        index = regs->ecx - MSR_MTRRfix16K_80000;
+        msr_content = fixed_range_base[index + 1];
+        break;
+    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        index = regs->ecx - MSR_MTRRfix4K_C0000;
+        msr_content = fixed_range_base[index + 3];
+        break;
+    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
+        msr_content = var_range_base[index];
+        break;
+
+    default:
+        return hvm_funcs.msr_read_intercept(regs);
+    }
+
+    regs->eax = (uint32_t)msr_content;
+    regs->edx = (uint32_t)(msr_content >> 32);
+    return X86EMUL_OKAY;
+}
+
+int hvm_msr_write_intercept(struct cpu_user_regs *regs)
+{
+    extern bool_t mtrr_var_range_msr_set(
+        struct mtrr_state *v, u32 msr, u64 msr_content);
+    extern bool_t mtrr_fix_range_msr_set(
+        struct mtrr_state *v, int row, u64 msr_content);
+    extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
+    extern bool_t pat_msr_set(u64 *pat, u64 msr);
+
+    uint32_t ecx = regs->ecx;
+    uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32);
+    struct vcpu *v = current;
+    int index;
+
+    switch ( ecx )
+    {
+     case MSR_IA32_TSC:
+        hvm_set_guest_time(v, msr_content);
+        pt_reset(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        vlapic_msr_set(vcpu_vlapic(v), msr_content);
+        break;
+
+    case MSR_IA32_CR_PAT:
+        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
+           goto gp_fault;
+        break;
+
+    case MSR_MTRRcap:
+        goto gp_fault;
+    case MSR_MTRRdefType:
+        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
+           goto gp_fault;
+        break;
+    case MSR_MTRRfix64K_00000:
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     index, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
+        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
+        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     index, msr_content) )
+            goto gp_fault;
+        break;
+    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
+        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
+                                     regs->ecx, msr_content) )
+            goto gp_fault;
+        break;
+
+    default:
+        return hvm_funcs.msr_write_intercept(regs);
+    }
+
+    return X86EMUL_OKAY;
+
+gp_fault:
+    hvm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
 }
 
 enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/mtrr.c   Thu Apr 24 14:08:29 2008 -0600
@@ -27,7 +27,6 @@
 #include <asm/hvm/support.h>
 #include <asm/hvm/cacheattr.h>
 
-/* Xen holds the native MTRR MSRs */
 extern struct mtrr_state mtrr_state;
 
 static uint64_t phys_base_msr_mask;
@@ -35,19 +34,17 @@ static uint32_t size_or_mask;
 static uint32_t size_or_mask;
 static uint32_t size_and_mask;
 
-static void init_pat_entry_tbl(uint64_t pat);
-static void init_mtrr_epat_tbl(void);
-static uint8_t get_mtrr_type(struct mtrr_state *m, paddr_t pa);
-/* get page attribute fields (PAn) from PAT MSR */
+/* Get page attribute fields (PAn) from PAT MSR. */
 #define pat_cr_2_paf(pat_cr,n)  ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff)
-/* pat entry to PTE flags (PAT, PCD, PWT bits) */
+
+/* PAT entry to PTE flags (PAT, PCD, PWT bits). */
 static uint8_t pat_entry_2_pte_flags[8] = {
     0,           _PAGE_PWT,
     _PAGE_PCD,   _PAGE_PCD | _PAGE_PWT,
     _PAGE_PAT,   _PAGE_PAT | _PAGE_PWT,
     _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
 
-/* effective mm type lookup table, according to MTRR and PAT */
+/* Effective mm type lookup table, according to MTRR and PAT. */
 static uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
 /********PAT(UC,WC,RS,RS,WT,WP,WB,UC-)*/
 /* RS means reserved type(2,3), and type is hardcoded here */
@@ -67,12 +64,13 @@ static uint8_t mm_type_tbl[MTRR_NUM_TYPE
             {0, 1, 2, 2, 4, 5, 6, 0}
 };
 
-/* reverse lookup table, to find a pat type according to MTRR and effective
- * memory type. This table is dynamically generated
+/*
+ * Reverse lookup table, to find a pat type according to MTRR and effective
+ * memory type. This table is dynamically generated.
  */
 static uint8_t mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES];
 
-/* lookup table for PAT entry of a given PAT value in host pat */
+/* Lookup table for PAT entry of a given PAT value in host PAT. */
 static uint8_t pat_entry_tbl[PAT_TYPE_NUMS];
 
 static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr,
@@ -139,220 +137,63 @@ bool_t is_var_mtrr_overlapped(struct mtr
     return 0;
 }
 
-/* reserved mtrr for guest OS */
-#define RESERVED_MTRR 2
+#define MTRR_PHYSMASK_VALID_BIT  11
+#define MTRR_PHYSMASK_SHIFT      12
+
+#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
+#define MTRR_PHYSBASE_SHIFT      12
+#define MTRR_VCNT                8
+
 #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
 #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
 bool_t mtrr_var_range_msr_set(struct mtrr_state *m, uint32_t msr,
                               uint64_t msr_content);
-bool_t mtrr_def_type_msr_set(struct mtrr_state *m, uint64_t msr_content);
 bool_t mtrr_fix_range_msr_set(struct mtrr_state *m, uint32_t row,
                               uint64_t msr_content);
-static void set_var_mtrr(uint32_t reg, struct mtrr_state *m,
-                         uint32_t base, uint32_t size,
-                         uint32_t type)
-{
-    struct mtrr_var_range *vr;
-
-    vr = &m->var_ranges[reg];
-
-    if ( size == 0 )
-    {
-        /* The invalid bit is kept in the mask, so we simply clear the
-         * relevant mask register to disable a range.
-         */
-        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), 0);
-    }
-    else
-    {
-        vr->base_lo = base << PAGE_SHIFT | type;
-        vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
-        vr->mask_lo = -size << PAGE_SHIFT | 0x800;
-        vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
-
-        mtrr_var_range_msr_set(m, MTRRphysBase_MSR(reg), *(uint64_t *)vr);
-        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg),
-                               *((uint64_t *)vr + 1));
-    }
-}
-/* From Intel Vol. III Section 10.11.4, the Range Size and Base Alignment has
- * some kind of requirement:
- * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
- * 2. The base address must be 2^N aligned, where the N here is equal to
- * the N in previous requirement. So a 8K range must be 8K aligned not 4K 
aligned.
- */
-static uint32_t range_to_mtrr(uint32_t reg, struct mtrr_state *m,
-                              uint32_t range_startk, uint32_t range_sizek,
-                              uint8_t type)
-{
-    if ( !range_sizek || (reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR)) )
-    {
-        gdprintk(XENLOG_WARNING,
-                "Failed to init var mtrr msr[%d]"
-                "range_size:%x, total available MSR:%d\n",
-                reg, range_sizek,
-                (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR));
-        return reg;
-    }
-
-    while ( range_sizek )
-    {
-        uint32_t max_align, align, sizek;
-
-        max_align = (range_startk == 0) ? 32 : ffs(range_startk);
-        align = min_t(uint32_t, fls(range_sizek), max_align);
-        sizek = 1 << (align - 1);
-
-        set_var_mtrr(reg++, m, range_startk, sizek, type);
-
-        range_startk += sizek;
-        range_sizek  -= sizek;
-
-        if ( reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR) )
-        {
-            gdprintk(XENLOG_WARNING,
-                    "Failed to init var mtrr msr[%d],"
-                    "total available MSR:%d\n",
-                    reg, (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR));
-            break;
-        }
-    }
-
-    return reg;
-}
-
-static void setup_fixed_mtrrs(struct vcpu *v)
-{
-    uint64_t content;
-    int32_t i;
-    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
-
-    /* 1. Map (0~A0000) as WB */
-    content = 0x0606060606060606ull;
-    mtrr_fix_range_msr_set(m, 0, content);
-    mtrr_fix_range_msr_set(m, 1, content);
-    /* 2. Map VRAM(A0000~C0000) as WC */
-    content = 0x0101010101010101;
-    mtrr_fix_range_msr_set(m, 2, content);
-    /* 3. Map (C0000~100000) as UC */
-    for ( i = 3; i < 11; i++)
-        mtrr_fix_range_msr_set(m, i, 0);
-}
-
-static void setup_var_mtrrs(struct vcpu *v)
-{
-    p2m_type_t p2m;
-    uint64_t e820_mfn;
-    int8_t *p = NULL;
-    uint8_t nr = 0;
-    int32_t i;
-    uint32_t reg = 0;
-    uint64_t size = 0;
-    uint64_t addr = 0;
-    struct e820entry *e820_table;
-
-    e820_mfn = mfn_x(gfn_to_mfn(v->domain,
-                    HVM_E820_PAGE >> PAGE_SHIFT, &p2m));
-
-    p = (int8_t *)map_domain_page(e820_mfn);
-
-    nr = *(uint8_t*)(p + HVM_E820_NR_OFFSET);
-    e820_table = (struct e820entry*)(p + HVM_E820_OFFSET);
-    /* search E820 table, set MTRR for RAM */
-    for ( i = 0; i < nr; i++)
-    {
-        if ( (e820_table[i].addr >= 0x100000) &&
-             (e820_table[i].type == E820_RAM) )
-        {
-            if ( e820_table[i].addr == 0x100000 )
-            {
-                size = e820_table[i].size + 0x100000 + PAGE_SIZE * 5;
-                addr = 0;
-            }
-            else
-            {
-                /* Larger than 4G */
-                size = e820_table[i].size;
-                addr = e820_table[i].addr;
-            }
-
-            reg = range_to_mtrr(reg, &v->arch.hvm_vcpu.mtrr,
-                                addr >> PAGE_SHIFT, size >> PAGE_SHIFT,
-                                MTRR_TYPE_WRBACK);
-        }
-    }
-}
-
-void init_mtrr_in_hyper(struct vcpu *v)
-{
-    /* TODO:MTRR should be initialized in BIOS or other places.
-     * workaround to do it in here
-     */
-    if ( v->arch.hvm_vcpu.mtrr.is_initialized )
-        return;
-
-    setup_fixed_mtrrs(v);
-    setup_var_mtrrs(v);
-    /* enable mtrr */
-    mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, 0xc00);
-
-    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
-}
-
-static int32_t reset_mtrr(struct mtrr_state *m)
-{
-    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
-    if ( m->var_ranges == NULL )
-        return -ENOMEM;
-    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
-    memset(m->fixed_ranges, 0, sizeof(m->fixed_ranges));
-    m->enabled = 0;
-    m->def_type = 0;/*mtrr is disabled*/
-    m->mtrr_cap = (0x5<<8)|MTRR_VCNT;/*wc,fix enabled, and vcnt=8*/
-    m->overlapped = 0;
-    return 0;
-}
-
-/* init global variables for MTRR and PAT */
-void global_init_mtrr_pat(void)
+
+static int hvm_mtrr_pat_init(void)
 {
     extern uint64_t host_pat;
-    uint32_t phys_addr;
-
-    init_mtrr_epat_tbl();
-    init_pat_entry_tbl(host_pat);
-    /* Get max physical address, set some global variable */
-    if ( cpuid_eax(0x80000000) < 0x80000008 )
-        phys_addr = 36;
-    else
-        phys_addr = cpuid_eax(0x80000008);
-
-    phys_base_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0xf00UL;
-    phys_mask_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0x7ffUL;
-
-    size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
-    size_and_mask = ~size_or_mask & 0xfff00000;
-}
-
-static void init_pat_entry_tbl(uint64_t pat)
-{
-    int32_t i, j;
+    unsigned int i, j, phys_addr;
+
+    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
+    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
+    {
+        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
+        {
+            int32_t tmp = mm_type_tbl[i][j];
+            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
+                mtrr_epat_tbl[i][tmp] = j;
+        }
+    }
 
     memset(&pat_entry_tbl, INVALID_MEM_TYPE,
            PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0]));
-
     for ( i = 0; i < PAT_TYPE_NUMS; i++ )
     {
         for ( j = 0; j < PAT_TYPE_NUMS; j++ )
         {
-            if ( pat_cr_2_paf(pat, j) == i )
+            if ( pat_cr_2_paf(host_pat, j) == i )
             {
                 pat_entry_tbl[i] = j;
                 break;
             }
         }
     }
-}
+
+    phys_addr = 36;
+    if ( cpuid_eax(0x80000000) >= 0x80000008 )
+        phys_addr = (uint8_t)cpuid_eax(0x80000008);
+
+    phys_base_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0xf00UL;
+    phys_mask_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0x7ffUL;
+
+    size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+    size_and_mask = ~size_or_mask & 0xfff00000;
+
+    return 0;
+}
+__initcall(hvm_mtrr_pat_init);
 
 uint8_t pat_type_2_pte_flags(uint8_t pat_type)
 {
@@ -368,24 +209,35 @@ uint8_t pat_type_2_pte_flags(uint8_t pat
     return pat_entry_2_pte_flags[pat_entry_tbl[PAT_TYPE_UNCACHABLE]];
 }
 
-int32_t reset_vmsr(struct mtrr_state *m, uint64_t *pat_ptr)
-{
-    int32_t rc;
-
-    rc = reset_mtrr(m);
-    if ( rc != 0 )
-        return rc;
-
-    *pat_ptr = ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
-               ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
-               ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
-               ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
-               ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
-               ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
-               ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
-               ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
-
-    return 0;
+int hvm_vcpu_cacheattr_init(struct vcpu *v)
+{
+    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
+
+    memset(m, 0, sizeof(*m));
+
+    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
+    if ( m->var_ranges == NULL )
+        return -ENOMEM;
+    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
+
+    m->mtrr_cap = (1u << 10) | (1u << 8) | MTRR_VCNT;
+
+    v->arch.hvm_vcpu.pat_cr =
+        ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
+        ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
+        ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
+        ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
+        ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
+        ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
+        ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
+        ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
+
+    return 0;
+}
+
+void hvm_vcpu_cacheattr_destroy(struct vcpu *v)
+{
+    xfree(v->arch.hvm_vcpu.mtrr.var_ranges);
 }
 
 /*
@@ -512,23 +364,6 @@ static uint8_t effective_mm_type(struct 
     return effective;
 }
 
-static void init_mtrr_epat_tbl(void)
-{
-    int32_t i, j;
-    /* set default value to an invalid type, just for checking conflict */
-    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
-
-    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
-    {
-        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
-        {
-            int32_t tmp = mm_type_tbl[i][j];
-            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
-                mtrr_epat_tbl[i][tmp] = j;
-        }
-    }
-}
-
 uint32_t get_pat_flags(struct vcpu *v,
                        uint32_t gl1e_flags,
                        paddr_t gpaddr,
@@ -856,7 +691,6 @@ static int hvm_load_mtrr_msr(struct doma
 
     mtrr_def_type_msr_set(mtrr_state, hw_mtrr.msr_mtrr_def_type);
 
-    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
     return 0;
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/Makefile
--- a/xen/arch/x86/hvm/svm/Makefile     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/svm/Makefile     Thu Apr 24 14:08:29 2008 -0600
@@ -1,8 +1,6 @@ subdir-$(x86_32) += x86_32
-subdir-$(x86_32) += x86_32
-subdir-$(x86_64) += x86_64
-
 obj-y += asid.o
 obj-y += emulate.o
+obj-y += entry.o
 obj-y += intr.o
 obj-y += svm.o
 obj-y += vmcb.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/entry.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/svm/entry.S      Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,178 @@
+/*
+ * entry.S: SVM architecture-specific entry/exit handling.
+ * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/types.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define VMRUN  .byte 0x0F,0x01,0xD8
+#define STGI   .byte 0x0F,0x01,0xDC
+#define CLGI   .byte 0x0F,0x01,0xDD
+
+#define get_current(reg)                        \
+        mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \
+        or  r(sp), r(reg);                      \
+        and $~(BYTES_PER_LONG-1),r(reg);        \
+        mov (r(reg)),r(reg);
+
+#if defined(__x86_64__)
+#define r(reg) %r##reg
+#define addr_of(lbl) lbl(%rip)
+#define call_with_regs(fn)                      \
+        mov  %rsp,%rdi;                         \
+        call fn;
+#else /* defined(__i386__) */
+#define r(reg) %e##reg
+#define addr_of(lbl) lbl
+#define UREGS_rax UREGS_eax
+#define UREGS_rip UREGS_eip
+#define UREGS_rsp UREGS_esp
+#define call_with_regs(fn)                      \
+        mov  %esp,%eax;                         \
+        push %eax;                              \
+        call fn;                                \
+        add  $4,%esp;
+#endif
+
+ENTRY(svm_asm_do_resume)
+        get_current(bx)
+        CLGI
+
+        mov  VCPU_processor(r(bx)),%eax
+        shl  $IRQSTAT_shift,r(ax)
+        lea  addr_of(irq_stat),r(dx)
+        testl $~0,(r(dx),r(ax),1)
+        jnz  .Lsvm_process_softirqs
+
+        call svm_asid_handle_vmrun
+        call svm_intr_assist
+
+        cmpb $0,addr_of(tb_init_done)
+        jnz  .Lsvm_trace
+.Lsvm_trace_done:
+
+        mov  VCPU_svm_vmcb(r(bx)),r(cx)
+        mov  UREGS_rax(r(sp)),r(ax)
+        mov  r(ax),VMCB_rax(r(cx))
+        mov  UREGS_rip(r(sp)),r(ax)
+        mov  r(ax),VMCB_rip(r(cx))
+        mov  UREGS_rsp(r(sp)),r(ax)
+        mov  r(ax),VMCB_rsp(r(cx))
+        mov  UREGS_eflags(r(sp)),r(ax)
+        mov  r(ax),VMCB_rflags(r(cx))
+
+        mov  VCPU_svm_vmcb_pa(r(bx)),r(ax)
+
+#if defined(__x86_64__)
+        pop  %r15
+        pop  %r14
+        pop  %r13
+        pop  %r12
+        pop  %rbp
+        pop  %rbx
+        pop  %r11
+        pop  %r10
+        pop  %r9
+        pop  %r8
+        add  $8,%rsp /* Skip %rax: restored by VMRUN. */
+        pop  %rcx
+        pop  %rdx
+        pop  %rsi
+        pop  %rdi
+#else /* defined(__i386__) */
+        pop  %ebx
+        pop  %ecx
+        pop  %edx
+        pop  %esi
+        pop  %edi
+        pop  %ebp
+#endif
+
+        VMRUN
+
+#if defined(__x86_64__)
+        push %rdi
+        push %rsi
+        push %rdx
+        push %rcx
+        push %rax
+        push %r8
+        push %r9
+        push %r10
+        push %r11
+        push %rbx
+        push %rbp
+        push %r12
+        push %r13
+        push %r14
+        push %r15
+#else /* defined(__i386__) */
+        push %ebp
+        push %edi
+        push %esi
+        push %edx
+        push %ecx
+        push %ebx
+#endif
+
+        get_current(bx)
+        movb $0,VCPU_svm_vmcb_in_sync(r(bx))
+        mov  VCPU_svm_vmcb(r(bx)),r(cx)
+        mov  VMCB_rax(r(cx)),r(ax)
+        mov  r(ax),UREGS_rax(r(sp))
+        mov  VMCB_rip(r(cx)),r(ax)
+        mov  r(ax),UREGS_rip(r(sp))
+        mov  VMCB_rsp(r(cx)),r(ax)
+        mov  r(ax),UREGS_rsp(r(sp))
+        mov  VMCB_rflags(r(cx)),r(ax)
+        mov  r(ax),UREGS_eflags(r(sp))
+
+#ifndef NDEBUG
+        mov  $0xbeef,%ax
+        mov  %ax,UREGS_error_code(r(sp))
+        mov  %ax,UREGS_entry_vector(r(sp))
+        mov  %ax,UREGS_saved_upcall_mask(r(sp))
+        mov  %ax,UREGS_cs(r(sp))
+        mov  %ax,UREGS_ds(r(sp))
+        mov  %ax,UREGS_es(r(sp))
+        mov  %ax,UREGS_fs(r(sp))
+        mov  %ax,UREGS_gs(r(sp))
+        mov  %ax,UREGS_ss(r(sp))
+#endif
+
+        STGI
+.globl svm_stgi_label
+svm_stgi_label:
+        call_with_regs(svm_vmexit_handler)
+        jmp  svm_asm_do_resume
+
+.Lsvm_process_softirqs:
+        STGI
+        call do_softirq
+        jmp  svm_asm_do_resume
+
+.Lsvm_trace:
+        call svm_trace_vmentry
+        jmp  .Lsvm_trace_done
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Thu Apr 24 14:08:29 2008 -0600
@@ -102,15 +102,17 @@ static void svm_dirq_assist(struct vcpu 
     struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     struct dev_intx_gsi_link *digl;
 
-    if ( !amd_iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
         return;
 
     for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
           irq < NR_IRQS;
           irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
     {
+        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+            continue;
+
         stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
-        clear_bit(irq, &hvm_irq_dpci->dirq_mask);
 
         list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
         {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Thu Apr 24 14:08:29 2008 -0600
@@ -911,6 +911,9 @@ static void svm_cpuid_intercept(
             __clear_bit(X86_FEATURE_PAE & 31, edx);
         __clear_bit(X86_FEATURE_PSE36 & 31, edx);
 
+        /* We always support MTRR MSRs. */
+        *edx |= bitmaskof(X86_FEATURE_MTRR);
+
         /* Filter all other features according to a whitelist. */
         *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
                  bitmaskof(X86_FEATURE_ALTMOVCR) |
@@ -924,7 +927,9 @@ static void svm_cpuid_intercept(
                  bitmaskof(X86_FEATURE_SYSCALL) |
                  bitmaskof(X86_FEATURE_MP) |
                  bitmaskof(X86_FEATURE_MMXEXT) |
-                 bitmaskof(X86_FEATURE_FFXSR));
+                 bitmaskof(X86_FEATURE_FFXSR) |
+                 bitmaskof(X86_FEATURE_3DNOW) |
+                 bitmaskof(X86_FEATURE_3DNOWEXT));
         break;
 
     case 0x80000007:
@@ -981,14 +986,6 @@ static int svm_msr_read_intercept(struct
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        msr_content = hvm_get_guest_time(v);
-        break;
-
-    case MSR_IA32_APICBASE:
-        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
-        break;
-
     case MSR_EFER:
         msr_content = v->arch.hvm_vcpu.guest_efer;
         break;
@@ -1013,18 +1010,6 @@ static int svm_msr_read_intercept(struct
 
     case MSR_K8_VM_HSAVE_PA:
         goto gpf;
-
-    case MSR_IA32_MCG_CAP:
-    case MSR_IA32_MCG_STATUS:
-    case MSR_IA32_MC0_STATUS:
-    case MSR_IA32_MC1_STATUS:
-    case MSR_IA32_MC2_STATUS:
-    case MSR_IA32_MC3_STATUS:
-    case MSR_IA32_MC4_STATUS:
-    case MSR_IA32_MC5_STATUS:
-        /* No point in letting the guest see real MCEs */
-        msr_content = 0;
-        break;
 
     case MSR_IA32_DEBUGCTLMSR:
         msr_content = vmcb->debugctlmsr;
@@ -1083,15 +1068,6 @@ static int svm_msr_write_intercept(struc
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        hvm_set_guest_time(v, msr_content);
-        pt_reset(v);
-        break;
-
-    case MSR_IA32_APICBASE:
-        vlapic_msr_set(vcpu_vlapic(v), msr_content);
-        break;
-
     case MSR_K8_VM_HSAVE_PA:
         goto gpf;
 
@@ -1152,12 +1128,12 @@ static void svm_do_msr_access(struct cpu
 
     if ( vmcb->exitinfo1 == 0 )
     {
-        rc = svm_msr_read_intercept(regs);
+        rc = hvm_msr_read_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
     }
     else
     {
-        rc = svm_msr_write_intercept(regs);
+        rc = hvm_msr_write_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
     }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_32/Makefile
--- a/xen/arch/x86/hvm/svm/x86_32/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_32/exits.S
--- a/xen/arch/x86/hvm/svm/x86_32/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-/*
- * exits.S: SVM architecture-specific exit handling.
- * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define GET_CURRENT(reg)         \
-        movl $STACK_SIZE-4,reg;  \
-        orl  %esp,reg;           \
-        andl $~3,reg;            \
-        movl (reg),reg;
-
-#define VMRUN  .byte 0x0F,0x01,0xD8
-#define STGI   .byte 0x0F,0x01,0xDC
-#define CLGI   .byte 0x0F,0x01,0xDD
-
-ENTRY(svm_asm_do_resume)
-        GET_CURRENT(%ebx)
-        CLGI
-
-        movl VCPU_processor(%ebx),%eax
-        shl  $IRQSTAT_shift,%eax
-        testl $~0,irq_stat(%eax,1)
-        jnz  svm_process_softirqs
-
-        call svm_asid_handle_vmrun
-        call svm_intr_assist
-
-        /* Check if the trace buffer is initialized. 
-         * Because the below condition is unlikely, we jump out of line
-         * instead of having a mostly taken branch over the unlikely code.
-         */
-        cmpb $0,tb_init_done
-        jnz  svm_trace
-svm_trace_done:
-
-        movl VCPU_svm_vmcb(%ebx),%ecx
-        movl UREGS_eax(%esp),%eax
-        movl %eax,VMCB_rax(%ecx)
-        movl UREGS_eip(%esp),%eax
-        movl %eax,VMCB_rip(%ecx)
-        movl UREGS_esp(%esp),%eax
-        movl %eax,VMCB_rsp(%ecx)
-        movl UREGS_eflags(%esp),%eax
-        movl %eax,VMCB_rflags(%ecx)
-
-        movl VCPU_svm_vmcb_pa(%ebx),%eax
-        popl %ebx
-        popl %ecx
-        popl %edx
-        popl %esi
-        popl %edi
-        popl %ebp
-
-        VMRUN
-
-        pushl %ebp
-        pushl %edi
-        pushl %esi
-        pushl %edx
-        pushl %ecx
-        pushl %ebx
-
-        GET_CURRENT(%ebx)
-        movb $0,VCPU_svm_vmcb_in_sync(%ebx)
-        movl VCPU_svm_vmcb(%ebx),%ecx
-        movl VMCB_rax(%ecx),%eax
-        movl %eax,UREGS_eax(%esp)
-        movl VMCB_rip(%ecx),%eax
-        movl %eax,UREGS_eip(%esp)
-        movl VMCB_rsp(%ecx),%eax
-        movl %eax,UREGS_esp(%esp)
-        movl VMCB_rflags(%ecx),%eax
-        movl %eax,UREGS_eflags(%esp)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%esp)
-        movw %ax,UREGS_entry_vector(%esp)
-        movw %ax,UREGS_saved_upcall_mask(%esp)
-        movw %ax,UREGS_cs(%esp)
-        movw %ax,UREGS_ds(%esp)
-        movw %ax,UREGS_es(%esp)
-        movw %ax,UREGS_fs(%esp)
-        movw %ax,UREGS_gs(%esp)
-        movw %ax,UREGS_ss(%esp)
-#endif
-
-        STGI
-.globl svm_stgi_label;
-svm_stgi_label:
-        movl %esp,%eax
-        push %eax
-        call svm_vmexit_handler
-        addl $4,%esp
-        jmp  svm_asm_do_resume
-
-        ALIGN
-svm_process_softirqs:
-        STGI
-        call do_softirq
-        jmp  svm_asm_do_resume
-
-svm_trace:
-        /* Call out to C, as this is not speed critical path
-         * Note: svm_trace_vmentry will recheck the tb_init_done,
-         * but this is on the slow path, so who cares 
-         */
-        call svm_trace_vmentry
-        jmp  svm_trace_done
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_64/Makefile
--- a/xen/arch/x86/hvm/svm/x86_64/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/svm/x86_64/exits.S
--- a/xen/arch/x86/hvm/svm/x86_64/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-/*
- * exits.S: AMD-V architecture-specific exit handling.
- * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define GET_CURRENT(reg)         \
-        movq $STACK_SIZE-8,reg;  \
-        orq  %rsp,reg;           \
-        andq $~7,reg;            \
-        movq (reg),reg;
-
-#define VMRUN  .byte 0x0F,0x01,0xD8
-#define STGI   .byte 0x0F,0x01,0xDC
-#define CLGI   .byte 0x0F,0x01,0xDD
-
-ENTRY(svm_asm_do_resume)
-        GET_CURRENT(%rbx)
-        CLGI
-
-        movl VCPU_processor(%rbx),%eax
-        shl  $IRQSTAT_shift,%rax
-        leaq irq_stat(%rip),%rdx
-        testl $~0,(%rdx,%rax,1)
-        jnz  svm_process_softirqs
-
-        call svm_asid_handle_vmrun
-        call svm_intr_assist
-
-        /* Check if the trace buffer is initialized. 
-         * Because the below condition is unlikely, we jump out of line
-         * instead of having a mostly taken branch over the unlikely code.
-         */
-        cmpb $0,tb_init_done(%rip)
-        jnz  svm_trace
-svm_trace_done:
-
-        movq VCPU_svm_vmcb(%rbx),%rcx
-        movq UREGS_rax(%rsp),%rax
-        movq %rax,VMCB_rax(%rcx)
-        movq UREGS_rip(%rsp),%rax
-        movq %rax,VMCB_rip(%rcx)
-        movq UREGS_rsp(%rsp),%rax
-        movq %rax,VMCB_rsp(%rcx)
-        movq UREGS_eflags(%rsp),%rax
-        movq %rax,VMCB_rflags(%rcx)
-
-        movq VCPU_svm_vmcb_pa(%rbx),%rax
-        popq %r15
-        popq %r14
-        popq %r13
-        popq %r12
-        popq %rbp
-        popq %rbx
-        popq %r11
-        popq %r10
-        popq %r9
-        popq %r8
-        addq $8,%rsp /* Skip %rax: restored by VMRUN. */
-        popq %rcx
-        popq %rdx
-        popq %rsi
-        popq %rdi
-
-        VMRUN
-
-        pushq %rdi
-        pushq %rsi
-        pushq %rdx
-        pushq %rcx
-        pushq %rax
-        pushq %r8
-        pushq %r9
-        pushq %r10
-        pushq %r11
-        pushq %rbx
-        pushq %rbp
-        pushq %r12
-        pushq %r13
-        pushq %r14
-        pushq %r15
-
-        GET_CURRENT(%rbx)
-        movb $0,VCPU_svm_vmcb_in_sync(%rbx)
-        movq VCPU_svm_vmcb(%rbx),%rcx
-        movq VMCB_rax(%rcx),%rax
-        movq %rax,UREGS_rax(%rsp)
-        movq VMCB_rip(%rcx),%rax
-        movq %rax,UREGS_rip(%rsp)
-        movq VMCB_rsp(%rcx),%rax
-        movq %rax,UREGS_rsp(%rsp)
-        movq VMCB_rflags(%rcx),%rax
-        movq %rax,UREGS_eflags(%rsp)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%rsp)
-        movw %ax,UREGS_entry_vector(%rsp)
-        movw %ax,UREGS_saved_upcall_mask(%rsp)
-        movw %ax,UREGS_cs(%rsp)
-        movw %ax,UREGS_ds(%rsp)
-        movw %ax,UREGS_es(%rsp)
-        movw %ax,UREGS_fs(%rsp)
-        movw %ax,UREGS_gs(%rsp)
-        movw %ax,UREGS_ss(%rsp)
-#endif
-
-        STGI
-.globl svm_stgi_label;
-svm_stgi_label:
-        movq %rsp,%rdi
-        call svm_vmexit_handler
-        jmp  svm_asm_do_resume
-
-        ALIGN
-svm_process_softirqs:
-        STGI
-        call do_softirq
-        jmp  svm_asm_do_resume
-
-svm_trace:
-        /* Call out to C, as this is not speed critical path
-         * Note: svm_trace_vmentry will recheck the tb_init_done,
-         * but this is on the slow path, so who cares 
-         */
-        call svm_trace_vmentry
-        jmp  svm_trace_done
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/vmx/Makefile     Thu Apr 24 14:08:29 2008 -0600
@@ -1,6 +1,4 @@ subdir-$(x86_32) += x86_32
-subdir-$(x86_32) += x86_32
-subdir-$(x86_64) += x86_64
-
+obj-y += entry.o
 obj-y += intr.o
 obj-y += realmode.o
 obj-y += vmcs.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/entry.S
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/entry.S      Thu Apr 24 14:08:29 2008 -0600
@@ -0,0 +1,198 @@
+/*
+ * entry.S: VMX architecture-specific entry/exit handling.
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/types.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define VMRESUME     .byte 0x0f,0x01,0xc3
+#define VMLAUNCH     .byte 0x0f,0x01,0xc2
+#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
+#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
+
+/* VMCS field encodings */
+#define GUEST_RSP    0x681c
+#define GUEST_RIP    0x681e
+#define GUEST_RFLAGS 0x6820
+
+#define get_current(reg)                        \
+        mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \
+        or  r(sp), r(reg);                      \
+        and $~(BYTES_PER_LONG-1),r(reg);        \
+        mov (r(reg)),r(reg);
+
+#if defined(__x86_64__)
+#define r(reg) %r##reg
+#define addr_of(lbl) lbl(%rip)
+#define call_with_regs(fn)                      \
+        mov  %rsp,%rdi;                         \
+        call fn;
+#else /* defined(__i386__) */
+#define r(reg) %e##reg
+#define addr_of(lbl) lbl
+#define UREGS_rip UREGS_eip
+#define UREGS_rsp UREGS_esp
+#define call_with_regs(fn)                      \
+        mov  %esp,%eax;                         \
+        push %eax;                              \
+        call fn;                                \
+        add  $4,%esp;
+#endif
+
+        ALIGN
+.globl vmx_asm_vmexit_handler
+vmx_asm_vmexit_handler:
+#if defined(__x86_64__)
+        push %rdi
+        push %rsi
+        push %rdx
+        push %rcx
+        push %rax
+        push %r8
+        push %r9
+        push %r10
+        push %r11
+        push %rbx
+        push %rbp
+        push %r12
+        push %r13
+        push %r14
+        push %r15
+#else /* defined(__i386__) */
+        push %eax
+        push %ebp
+        push %edi
+        push %esi
+        push %edx
+        push %ecx
+        push %ebx
+#endif
+
+        get_current(bx)
+
+        movb $1,VCPU_vmx_launched(r(bx))
+
+        lea  UREGS_rip(r(sp)),r(di)
+        mov  $GUEST_RIP,%eax
+        /*VMREAD(UREGS_rip)*/
+        .byte 0x0f,0x78,0x07  /* vmread r(ax),(r(di)) */
+        mov  $GUEST_RSP,%eax
+        VMREAD(UREGS_rsp)
+        mov  $GUEST_RFLAGS,%eax
+        VMREAD(UREGS_eflags)
+
+        mov  %cr2,r(ax)
+        mov  r(ax),VCPU_hvm_guest_cr2(r(bx))
+
+#ifndef NDEBUG
+        mov  $0xbeef,%ax
+        mov  %ax,UREGS_error_code(r(sp))
+        mov  %ax,UREGS_entry_vector(r(sp))
+        mov  %ax,UREGS_saved_upcall_mask(r(sp))
+        mov  %ax,UREGS_cs(r(sp))
+        mov  %ax,UREGS_ds(r(sp))
+        mov  %ax,UREGS_es(r(sp))
+        mov  %ax,UREGS_fs(r(sp))
+        mov  %ax,UREGS_gs(r(sp))
+        mov  %ax,UREGS_ss(r(sp))
+#endif
+
+        call_with_regs(vmx_vmexit_handler)
+
+.globl vmx_asm_do_vmentry
+vmx_asm_do_vmentry:
+        get_current(bx)
+        cli
+
+        mov  VCPU_processor(r(bx)),%eax
+        shl  $IRQSTAT_shift,r(ax)
+        lea  addr_of(irq_stat),r(dx)
+        cmpl $0,(r(dx),r(ax),1)
+        jnz  .Lvmx_process_softirqs
+
+        call vmx_intr_assist
+
+        testb $0xff,VCPU_vmx_emul(r(bx))
+        jnz  .Lvmx_goto_realmode
+
+        mov  VCPU_hvm_guest_cr2(r(bx)),r(ax)
+        mov  r(ax),%cr2
+        call vmx_trace_vmentry
+
+        lea  UREGS_rip(r(sp)),r(di)
+        mov  $GUEST_RIP,%eax
+        /*VMWRITE(UREGS_rip)*/
+        .byte 0x0f,0x79,0x07  /* vmwrite (r(di)),r(ax) */
+        mov  $GUEST_RSP,%eax
+        VMWRITE(UREGS_rsp)
+        mov  $GUEST_RFLAGS,%eax
+        VMWRITE(UREGS_eflags)
+
+        cmpb $0,VCPU_vmx_launched(r(bx))
+#if defined(__x86_64__)
+        pop  %r15
+        pop  %r14
+        pop  %r13
+        pop  %r12
+        pop  %rbp
+        pop  %rbx
+        pop  %r11
+        pop  %r10
+        pop  %r9
+        pop  %r8
+        pop  %rax
+        pop  %rcx
+        pop  %rdx
+        pop  %rsi
+        pop  %rdi
+#else /* defined(__i386__) */
+        pop  %ebx
+        pop  %ecx
+        pop  %edx
+        pop  %esi
+        pop  %edi
+        pop  %ebp
+        pop  %eax
+#endif
+        je   .Lvmx_launch
+
+/*.Lvmx_resume:*/
+        VMRESUME
+        call vm_resume_fail
+        ud2
+
+.Lvmx_launch:
+        VMLAUNCH
+        call vm_launch_fail
+        ud2
+
+.Lvmx_goto_realmode:
+        sti
+        call_with_regs(vmx_realmode)
+        jmp  vmx_asm_do_vmentry
+
+.Lvmx_process_softirqs:
+        sti
+        call do_softirq
+        jmp  vmx_asm_do_vmentry
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c       Thu Apr 24 14:08:29 2008 -0600
@@ -111,15 +111,17 @@ static void vmx_dirq_assist(struct vcpu 
     struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     struct dev_intx_gsi_link *digl;
 
-    if ( !vtd_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
         return;
 
     for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
           irq < NR_IRQS;
           irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
     {
+        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+            continue;
+
         stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
-        clear_bit(irq, &hvm_irq_dpci->dirq_mask);
 
         list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
         {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Thu Apr 24 14:08:29 2008 -0600
@@ -1622,17 +1622,11 @@ static int vmx_msr_read_intercept(struct
     u64 msr_content = 0;
     u32 ecx = regs->ecx, eax, edx;
     struct vcpu *v = current;
-    int index;
-    u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges;
-    u64 *fixed_range_base =  (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges;
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        msr_content = hvm_get_guest_time(v);
-        break;
     case MSR_IA32_SYSENTER_CS:
         msr_content = (u32)__vmread(GUEST_SYSENTER_CS);
         break;
@@ -1641,35 +1635,6 @@ static int vmx_msr_read_intercept(struct
         break;
     case MSR_IA32_SYSENTER_EIP:
         msr_content = __vmread(GUEST_SYSENTER_EIP);
-        break;
-    case MSR_IA32_APICBASE:
-        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
-        break;
-    case MSR_IA32_CR_PAT:
-        msr_content = v->arch.hvm_vcpu.pat_cr;
-        break;
-    case MSR_MTRRcap:
-        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
-        break;
-    case MSR_MTRRdefType:
-        msr_content = v->arch.hvm_vcpu.mtrr.def_type
-                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
-        break;
-    case MSR_MTRRfix64K_00000:
-        msr_content = fixed_range_base[0];
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        index = regs->ecx - MSR_MTRRfix16K_80000;
-        msr_content = fixed_range_base[index + 1];
-        break;
-    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
-        index = regs->ecx - MSR_MTRRfix4K_C0000;
-        msr_content = fixed_range_base[index + 3];
-        break;
-    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
-        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
-        msr_content = var_range_base[index];
         break;
     case MSR_IA32_DEBUGCTLMSR:
         msr_content = __vmread(GUEST_IA32_DEBUGCTL);
@@ -1679,17 +1644,6 @@ static int vmx_msr_read_intercept(struct
         break;
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
-    case MSR_IA32_MCG_CAP:
-    case MSR_IA32_MCG_STATUS:
-    case MSR_IA32_MC0_STATUS:
-    case MSR_IA32_MC1_STATUS:
-    case MSR_IA32_MC2_STATUS:
-    case MSR_IA32_MC3_STATUS:
-    case MSR_IA32_MC4_STATUS:
-    case MSR_IA32_MC5_STATUS:
-        /* No point in letting the guest see real MCEs */
-        msr_content = 0;
-        break;
     case MSR_IA32_MISC_ENABLE:
         rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
         /* Debug Trace Store is not supported. */
@@ -1729,8 +1683,8 @@ static int vmx_msr_read_intercept(struct
         goto gp_fault;
     }
 
-    regs->eax = msr_content & 0xFFFFFFFF;
-    regs->edx = msr_content >> 32;
+    regs->eax = (uint32_t)msr_content;
+    regs->edx = (uint32_t)(msr_content >> 32);
 
 done:
     hvmtrace_msr_read(v, ecx, msr_content);
@@ -1833,19 +1787,11 @@ void vmx_vlapic_msr_changed(struct vcpu 
     vmx_vmcs_exit(v);
 }
 
-extern bool_t mtrr_var_range_msr_set(struct mtrr_state *v,
-        u32 msr, u64 msr_content);
-extern bool_t mtrr_fix_range_msr_set(struct mtrr_state *v,
-        int row, u64 msr_content);
-extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
-extern bool_t pat_msr_set(u64 *pat, u64 msr);
-
 static int vmx_msr_write_intercept(struct cpu_user_regs *regs)
 {
     u32 ecx = regs->ecx;
     u64 msr_content;
     struct vcpu *v = current;
-    int index;
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
                 ecx, (u32)regs->eax, (u32)regs->edx);
@@ -1856,10 +1802,6 @@ static int vmx_msr_write_intercept(struc
 
     switch ( ecx )
     {
-    case MSR_IA32_TSC:
-        hvm_set_guest_time(v, msr_content);
-        pt_reset(v);
-        break;
     case MSR_IA32_SYSENTER_CS:
         __vmwrite(GUEST_SYSENTER_CS, msr_content);
         break;
@@ -1869,41 +1811,6 @@ static int vmx_msr_write_intercept(struc
     case MSR_IA32_SYSENTER_EIP:
         __vmwrite(GUEST_SYSENTER_EIP, msr_content);
         break;
-    case MSR_IA32_APICBASE:
-        vlapic_msr_set(vcpu_vlapic(v), msr_content);
-        break;
-    case MSR_IA32_CR_PAT:
-        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
-           goto gp_fault;
-        break;
-    case MSR_MTRRdefType:
-        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
-           goto gp_fault;
-        break;
-    case MSR_MTRRfix64K_00000:
-        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
-        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
-                                     index, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
-        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
-        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
-                                     index, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
-        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
-                                     regs->ecx, msr_content) )
-            goto gp_fault;
-        break;
-    case MSR_MTRRcap:
-        goto gp_fault;
     case MSR_IA32_DEBUGCTLMSR: {
         int i, rc = 0;
 
@@ -2330,12 +2237,12 @@ asmlinkage void vmx_vmexit_handler(struc
         break;
     case EXIT_REASON_MSR_READ:
         inst_len = __get_instruction_length(); /* Safe: RDMSR */
-        if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY )
+        if ( hvm_msr_read_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_MSR_WRITE:
         inst_len = __get_instruction_length(); /* Safe: WRMSR */
-        if ( vmx_msr_write_intercept(regs) == X86EMUL_OKAY )
+        if ( hvm_msr_write_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_32/Makefile
--- a/xen/arch/x86/hvm/vmx/x86_32/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_32/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-/*
- * exits.S: VMX architecture-specific exit handling.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define VMRESUME     .byte 0x0f,0x01,0xc3
-#define VMLAUNCH     .byte 0x0f,0x01,0xc2
-#define VMREAD(off)  .byte 0x0f,0x78,0x44,0x24,off
-#define VMWRITE(off) .byte 0x0f,0x79,0x44,0x24,off
-
-/* VMCS field encodings */
-#define GUEST_RSP    0x681c
-#define GUEST_RIP    0x681e
-#define GUEST_RFLAGS 0x6820
-
-#define GET_CURRENT(reg)         \
-        movl $STACK_SIZE-4, reg; \
-        orl  %esp, reg;          \
-        andl $~3,reg;            \
-        movl (reg),reg;
-
-#define HVM_SAVE_ALL_NOSEGREGS                                              \
-        pushl %eax;                                                         \
-        pushl %ebp;                                                         \
-        pushl %edi;                                                         \
-        pushl %esi;                                                         \
-        pushl %edx;                                                         \
-        pushl %ecx;                                                         \
-        pushl %ebx;
-
-#define HVM_RESTORE_ALL_NOSEGREGS               \
-        popl %ebx;                              \
-        popl %ecx;                              \
-        popl %edx;                              \
-        popl %esi;                              \
-        popl %edi;                              \
-        popl %ebp;                              \
-        popl %eax
-
-        ALIGN
-ENTRY(vmx_asm_vmexit_handler)
-        HVM_SAVE_ALL_NOSEGREGS
-        GET_CURRENT(%ebx)
-
-        movl $GUEST_RIP,%eax
-        VMREAD(UREGS_eip)
-        movl $GUEST_RSP,%eax
-        VMREAD(UREGS_esp)
-        movl $GUEST_RFLAGS,%eax
-        VMREAD(UREGS_eflags)
-
-        movl %cr2,%eax
-        movl %eax,VCPU_hvm_guest_cr2(%ebx)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%esp)
-        movw %ax,UREGS_entry_vector(%esp)
-        movw %ax,UREGS_saved_upcall_mask(%esp)
-        movw %ax,UREGS_cs(%esp)
-        movw %ax,UREGS_ds(%esp)
-        movw %ax,UREGS_es(%esp)
-        movw %ax,UREGS_fs(%esp)
-        movw %ax,UREGS_gs(%esp)
-        movw %ax,UREGS_ss(%esp)
-#endif
-
-        movl %esp,%eax
-        push %eax
-        call vmx_vmexit_handler
-        addl $4,%esp
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-vmx_process_softirqs:
-        sti
-        call do_softirq
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-ENTRY(vmx_asm_do_vmentry)
-        GET_CURRENT(%ebx)
-        cli                             # tests must not race interrupts
-
-        movl VCPU_processor(%ebx),%eax
-        shl  $IRQSTAT_shift,%eax
-        cmpl $0,irq_stat(%eax,1)
-        jnz  vmx_process_softirqs
-
-        call vmx_intr_assist
-
-        testb $0xff,VCPU_vmx_emul(%ebx)
-        jnz  vmx_goto_realmode
-
-        movl VCPU_hvm_guest_cr2(%ebx),%eax
-        movl %eax,%cr2
-        call vmx_trace_vmentry
-
-        movl $GUEST_RIP,%eax
-        VMWRITE(UREGS_eip)
-        movl $GUEST_RSP,%eax
-        VMWRITE(UREGS_esp)
-        movl $GUEST_RFLAGS,%eax
-        VMWRITE(UREGS_eflags)
-
-        cmpb $0,VCPU_vmx_launched(%ebx)
-        je   vmx_launch
-
-/*vmx_resume:*/
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMRESUME
-        call vm_resume_fail
-        ud2
-
-vmx_launch:
-        movb $1,VCPU_vmx_launched(%ebx)
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMLAUNCH
-        call vm_launch_fail
-        ud2
-
-vmx_goto_realmode:
-        sti
-        movl %esp,%eax
-        push %eax
-        call vmx_realmode
-        addl $4,%esp
-        jmp vmx_asm_do_vmentry
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_64/Makefile
--- a/xen/arch/x86/hvm/vmx/x86_64/Makefile      Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-obj-y += exits.o
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/hvm/vmx/x86_64/exits.S
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S       Thu Apr 24 14:02:16 2008 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,165 +0,0 @@
-/*
- * exits.S: VMX architecture-specific exit handling.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <xen/softirq.h>
-#include <asm/asm_defns.h>
-#include <asm/apicdef.h>
-#include <asm/page.h>
-#include <public/xen.h>
-
-#define VMRESUME     .byte 0x0f,0x01,0xc3
-#define VMLAUNCH     .byte 0x0f,0x01,0xc2
-#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
-#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
-
-/* VMCS field encodings */
-#define GUEST_RSP    0x681c
-#define GUEST_RIP    0x681e
-#define GUEST_RFLAGS 0x6820
-
-#define GET_CURRENT(reg)         \
-        movq $STACK_SIZE-8, reg; \
-        orq  %rsp, reg;          \
-        andq $~7,reg;            \
-        movq (reg),reg;
-
-#define HVM_SAVE_ALL_NOSEGREGS                  \
-        pushq %rdi;                             \
-        pushq %rsi;                             \
-        pushq %rdx;                             \
-        pushq %rcx;                             \
-        pushq %rax;                             \
-        pushq %r8;                              \
-        pushq %r9;                              \
-        pushq %r10;                             \
-        pushq %r11;                             \
-        pushq %rbx;                             \
-        pushq %rbp;                             \
-        pushq %r12;                             \
-        pushq %r13;                             \
-        pushq %r14;                             \
-        pushq %r15;
-
-#define HVM_RESTORE_ALL_NOSEGREGS               \
-        popq %r15;                              \
-        popq %r14;                              \
-        popq %r13;                              \
-        popq %r12;                              \
-        popq %rbp;                              \
-        popq %rbx;                              \
-        popq %r11;                              \
-        popq %r10;                              \
-        popq %r9;                               \
-        popq %r8;                               \
-        popq %rax;                              \
-        popq %rcx;                              \
-        popq %rdx;                              \
-        popq %rsi;                              \
-        popq %rdi
-
-        ALIGN
-ENTRY(vmx_asm_vmexit_handler)
-        HVM_SAVE_ALL_NOSEGREGS
-        GET_CURRENT(%rbx)
-
-        leaq UREGS_rip(%rsp),%rdi
-        movl $GUEST_RIP,%eax
-        /*VMREAD(UREGS_rip)*/
-        .byte 0x0f,0x78,0x07  /* vmread %rax,(%rdi) */
-        movl $GUEST_RSP,%eax
-        VMREAD(UREGS_rsp)
-        movl $GUEST_RFLAGS,%eax
-        VMREAD(UREGS_eflags)
-
-        movq %cr2,%rax
-        movq %rax,VCPU_hvm_guest_cr2(%rbx)
-
-#ifndef NDEBUG
-        movw $0xbeef,%ax
-        movw %ax,UREGS_error_code(%rsp)
-        movw %ax,UREGS_entry_vector(%rsp)
-        movw %ax,UREGS_saved_upcall_mask(%rsp)
-        movw %ax,UREGS_cs(%rsp)
-        movw %ax,UREGS_ds(%rsp)
-        movw %ax,UREGS_es(%rsp)
-        movw %ax,UREGS_fs(%rsp)
-        movw %ax,UREGS_gs(%rsp)
-        movw %ax,UREGS_ss(%rsp)
-#endif
-
-        movq %rsp,%rdi
-        call vmx_vmexit_handler
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-vmx_process_softirqs:
-        sti
-        call do_softirq
-        jmp vmx_asm_do_vmentry
-
-        ALIGN
-ENTRY(vmx_asm_do_vmentry)
-        GET_CURRENT(%rbx)
-        cli                             # tests must not race interrupts
-
-        movl  VCPU_processor(%rbx),%eax
-        shl   $IRQSTAT_shift,%rax
-        leaq  irq_stat(%rip),%rdx
-        cmpl  $0,(%rdx,%rax,1)
-        jnz   vmx_process_softirqs
-
-        call vmx_intr_assist
-
-        testb $0xff,VCPU_vmx_emul(%rbx)
-        jnz  vmx_goto_realmode
-
-        movq VCPU_hvm_guest_cr2(%rbx),%rax
-        movq %rax,%cr2
-        call vmx_trace_vmentry
-
-        leaq UREGS_rip(%rsp),%rdi
-        movl $GUEST_RIP,%eax
-        /*VMWRITE(UREGS_rip)*/
-        .byte 0x0f,0x79,0x07  /* vmwrite (%rdi),%rax */
-        movl $GUEST_RSP,%eax
-        VMWRITE(UREGS_rsp)
-        movl $GUEST_RFLAGS,%eax
-        VMWRITE(UREGS_eflags)
-
-        cmpb $0,VCPU_vmx_launched(%rbx)
-        je   vmx_launch
-
-/*vmx_resume:*/
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMRESUME
-        call vm_resume_fail
-        ud2
-
-vmx_launch:
-        movb $1,VCPU_vmx_launched(%rbx)
-        HVM_RESTORE_ALL_NOSEGREGS
-        VMLAUNCH
-        call vm_launch_fail
-        ud2
-
-vmx_goto_realmode:
-        sti
-        movq %rsp,%rdi
-        call vmx_realmode
-        jmp vmx_asm_do_vmentry
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/mm.c Thu Apr 24 14:08:29 2008 -0600
@@ -3279,15 +3279,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
         case XENMAPSPACE_shared_info:
             if ( xatp.idx == 0 )
                 mfn = virt_to_mfn(d->shared_info);
-            /* XXX: assumption here, this is called after E820 table is build
-             * need the E820 to initialize MTRR.
-             */
-            if ( is_hvm_domain(d) ) {
-                extern void init_mtrr_in_hyper(struct vcpu *);
-                struct vcpu *vs;
-                for_each_vcpu(d, vs)
-                    init_mtrr_in_hyper(vs);
-            }
             break;
         case XENMAPSPACE_grant_table:
             spin_lock(&d->grant_table->lock);
@@ -3625,29 +3616,18 @@ static int ptwr_emulated_cmpxchg(
 static int ptwr_emulated_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
-    unsigned long old,
-    unsigned long new,
+    void *p_old,
+    void *p_new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    paddr_t old = 0, new = 0;
+    if ( bytes > sizeof(paddr_t) )
+        return X86EMUL_UNHANDLEABLE;
+    memcpy(&old, p_old, bytes);
+    memcpy(&new, p_new, bytes);
     return ptwr_emulated_update(
         offset, old, new, bytes, 1,
-        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
-}
-
-static int ptwr_emulated_cmpxchg8b(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long old,
-    unsigned long old_hi,
-    unsigned long new,
-    unsigned long new_hi,
-    struct x86_emulate_ctxt *ctxt)
-{
-    if ( CONFIG_PAGING_LEVELS == 2 )
-        return X86EMUL_UNHANDLEABLE;
-    return ptwr_emulated_update(
-        offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
         container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
 }
 
@@ -3656,7 +3636,6 @@ static struct x86_emulate_ops ptwr_emula
     .insn_fetch = ptwr_emulated_read,
     .write      = ptwr_emulated_write,
     .cmpxchg    = ptwr_emulated_cmpxchg,
-    .cmpxchg8b  = ptwr_emulated_cmpxchg8b
 };
 
 /* Write page fault handler: check if guest is trying to modify a PTE. */
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Apr 24 14:08:29 2008 -0600
@@ -239,15 +239,15 @@ static int
 static int 
 hvm_emulate_cmpxchg(enum x86_segment seg,
                     unsigned long offset,
-                    unsigned long old,
-                    unsigned long new,
+                    void *p_old,
+                    void *p_new,
                     unsigned int bytes,
                     struct x86_emulate_ctxt *ctxt)
 {
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
     struct vcpu *v = current;
-    unsigned long addr;
+    unsigned long addr, old[2], new[2];
     int rc;
 
     if ( !is_x86_user_segment(seg) )
@@ -258,35 +258,21 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     if ( rc )
         return rc;
 
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-        v, addr, old, new, bytes, sh_ctxt);
-}
-
-static int 
-hvm_emulate_cmpxchg8b(enum x86_segment seg,
-                      unsigned long offset,
-                      unsigned long old_lo,
-                      unsigned long old_hi,
-                      unsigned long new_lo,
-                      unsigned long new_hi,
-                      struct x86_emulate_ctxt *ctxt)
-{
-    struct sh_emulate_ctxt *sh_ctxt =
-        container_of(ctxt, struct sh_emulate_ctxt, ctxt);
-    struct vcpu *v = current;
-    unsigned long addr;
-    int rc;
-
-    if ( !is_x86_user_segment(seg) )
-        return X86EMUL_UNHANDLEABLE;
-
-    rc = hvm_translate_linear_addr(
-        seg, offset, 8, hvm_access_write, sh_ctxt, &addr);
-    if ( rc )
-        return rc;
-
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
-        v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
+    old[0] = new[0] = 0;
+    memcpy(old, p_old, bytes);
+    memcpy(new, p_new, bytes);
+
+    if ( bytes <= sizeof(long) )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
+            v, addr, old[0], new[0], bytes, sh_ctxt);
+
+#ifdef __i386__
+    if ( bytes == 8 )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
+            v, addr, old[0], old[1], new[0], new[1], sh_ctxt);
+#endif
+
+    return X86EMUL_UNHANDLEABLE;
 }
 
 static struct x86_emulate_ops hvm_shadow_emulator_ops = {
@@ -294,7 +280,6 @@ static struct x86_emulate_ops hvm_shadow
     .insn_fetch = hvm_emulate_insn_fetch,
     .write      = hvm_emulate_write,
     .cmpxchg    = hvm_emulate_cmpxchg,
-    .cmpxchg8b  = hvm_emulate_cmpxchg8b,
 };
 
 static int
@@ -338,36 +323,34 @@ static int
 static int 
 pv_emulate_cmpxchg(enum x86_segment seg,
                    unsigned long offset,
-                   unsigned long old,
-                   unsigned long new,
+                   void *p_old,
+                   void *p_new,
                    unsigned int bytes,
                    struct x86_emulate_ctxt *ctxt)
 {
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
+    unsigned long old[2], new[2];
     struct vcpu *v = current;
+
     if ( !is_x86_user_segment(seg) )
         return X86EMUL_UNHANDLEABLE;
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-        v, offset, old, new, bytes, sh_ctxt);
-}
-
-static int 
-pv_emulate_cmpxchg8b(enum x86_segment seg,
-                     unsigned long offset,
-                     unsigned long old_lo,
-                     unsigned long old_hi,
-                     unsigned long new_lo,
-                     unsigned long new_hi,
-                     struct x86_emulate_ctxt *ctxt)
-{
-    struct sh_emulate_ctxt *sh_ctxt =
-        container_of(ctxt, struct sh_emulate_ctxt, ctxt);
-    struct vcpu *v = current;
-    if ( !is_x86_user_segment(seg) )
-        return X86EMUL_UNHANDLEABLE;
-    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
-        v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
+
+    old[0] = new[0] = 0;
+    memcpy(old, p_old, bytes);
+    memcpy(new, p_new, bytes);
+
+    if ( bytes <= sizeof(long) )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
+            v, offset, old[0], new[0], bytes, sh_ctxt);
+
+#ifdef __i386__
+    if ( bytes == 8 )
+        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
+            v, offset, old[0], old[1], new[0], new[1], sh_ctxt);
+#endif
+
+    return X86EMUL_UNHANDLEABLE;
 }
 
 static struct x86_emulate_ops pv_shadow_emulator_ops = {
@@ -375,7 +358,6 @@ static struct x86_emulate_ops pv_shadow_
     .insn_fetch = pv_emulate_read,
     .write      = pv_emulate_write,
     .cmpxchg    = pv_emulate_cmpxchg,
-    .cmpxchg8b  = pv_emulate_cmpxchg8b,
 };
 
 struct x86_emulate_ops *shadow_init_emulation(
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Apr 24 14:08:29 2008 -0600
@@ -2089,7 +2089,7 @@ static shadow_l1e_t * shadow_get_and_cre
         else 
         {
             /* Shadowing an actual guest l1 table */
-            if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
+            if ( !mfn_valid(gw->l1mfn) ) return NULL; /* No guest page. */
             *sl1mfn = get_shadow_status(v, gw->l1mfn, SH_type_l1_shadow);
             if ( !mfn_valid(*sl1mfn) ) 
             {
@@ -4365,7 +4365,7 @@ static void emulate_unmap_dest(struct vc
     atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version);
 }
 
-int
+static int
 sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
                       u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
 {
@@ -4389,7 +4389,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
     return X86EMUL_OKAY;
 }
 
-int
+static int
 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, 
                         unsigned long old, unsigned long new,
                         unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
@@ -4432,7 +4432,8 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
     return rv;
 }
 
-int
+#ifdef __i386__
+static int
 sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, 
                           unsigned long old_lo, unsigned long old_hi,
                           unsigned long new_lo, unsigned long new_hi,
@@ -4465,7 +4466,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
     shadow_unlock(v->domain);
     return rv;
 }
-
+#endif
 
 /**************************************************************************/
 /* Audit tools */
@@ -4738,7 +4739,9 @@ struct paging_mode sh_paging_mode = {
     .shadow.detach_old_tables      = sh_detach_old_tables,
     .shadow.x86_emulate_write      = sh_x86_emulate_write,
     .shadow.x86_emulate_cmpxchg    = sh_x86_emulate_cmpxchg,
+#ifdef __i386__
     .shadow.x86_emulate_cmpxchg8b  = sh_x86_emulate_cmpxchg8b,
+#endif
     .shadow.make_monitor_table     = sh_make_monitor_table,
     .shadow.destroy_monitor_table  = sh_destroy_monitor_table,
 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/setup.c      Thu Apr 24 14:08:29 2008 -0600
@@ -1019,10 +1019,6 @@ void __init __start_xen(unsigned long mb
         _initrd_len   = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
     }
 
-    iommu_setup();
-
-    amd_iommu_detect();
-
     /*
      * We're going to setup domain0 using the module(s) that we stashed safely
      * above our heap. The second module, if present, is an initrd ramdisk.
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/smp.c        Thu Apr 24 14:08:29 2008 -0600
@@ -75,20 +75,10 @@ static inline int __prepare_ICR2 (unsign
     return SET_APIC_DEST_FIELD(mask);
 }
 
-static inline void check_IPI_mask(cpumask_t cpumask)
-{
-    /*
-     * Sanity, and necessary. An IPI with no target generates a send accept
-     * error with Pentium and P6 APICs.
-     */
-    ASSERT(cpus_subset(cpumask, cpu_online_map));
-    ASSERT(!cpus_empty(cpumask));
-}
-
 void apic_wait_icr_idle(void)
 {
-       while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
-               cpu_relax();
+    while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
+        cpu_relax();
 }
 
 void send_IPI_mask_flat(cpumask_t cpumask, int vector)
@@ -97,7 +87,8 @@ void send_IPI_mask_flat(cpumask_t cpumas
     unsigned long cfg;
     unsigned long flags;
 
-    check_IPI_mask(cpumask);
+    /* An IPI with no target generates a send accept error from P5/P6 APICs. */
+    WARN_ON(mask == 0);
 
     local_irq_save(flags);
 
@@ -130,17 +121,9 @@ void send_IPI_mask_phys(cpumask_t mask, 
     unsigned long cfg, flags;
     unsigned int query_cpu;
 
-    check_IPI_mask(mask);
-
-    /*
-     * Hack. The clustered APIC addressing mode doesn't allow us to send 
-     * to an arbitrary mask, so I do a unicasts to each CPU instead. This 
-     * should be modified to do 1 message per cluster ID - mbligh
-     */ 
-
     local_irq_save(flags);
 
-    for_each_cpu_mask( query_cpu, mask )
+    for_each_cpu_mask ( query_cpu, mask )
     {
         /*
          * Wait for idle.
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/traps.c      Thu Apr 24 14:08:29 2008 -0600
@@ -479,6 +479,7 @@ static inline void do_trap(
 static inline void do_trap(
     int trapnr, struct cpu_user_regs *regs, int use_error_code)
 {
+    struct vcpu *curr = current;
     unsigned long fixup;
 
     DEBUGGER_trap_entry(trapnr, regs);
@@ -494,6 +495,14 @@ static inline void do_trap(
         dprintk(XENLOG_ERR, "Trap %d: %p -> %p\n",
                 trapnr, _p(regs->eip), _p(fixup));
         regs->eip = fixup;
+        return;
+    }
+
+    if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) &&
+         is_hvm_vcpu(curr) && curr->arch.hvm_vcpu.fpu_exception_callback )
+    {
+        curr->arch.hvm_vcpu.fpu_exception_callback(
+            curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs);
         return;
     }
 
@@ -1399,6 +1408,13 @@ static int admin_io_okay(
     unsigned int port, unsigned int bytes,
     struct vcpu *v, struct cpu_user_regs *regs)
 {
+    /*
+     * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
+     * We never permit direct access to that register.
+     */
+    if ( (port == 0xcf8) && (bytes == 4) )
+        return 0;
+
     return ioports_access_permitted(v->domain, port, port + bytes - 1);
 }
 
@@ -1431,10 +1447,10 @@ static uint32_t guest_io_read(
         {
             sub_data = pv_pit_handler(port, 0, 0);
         }
-        else if ( (port & 0xfffc) == 0xcf8 )
-        {
-            size = min(bytes, 4 - (port & 3));
-            sub_data = v->domain->arch.pci_cf8 >> ((port & 3) * 8);
+        else if ( (port == 0xcf8) && (bytes == 4) )
+        {
+            size = 4;
+            sub_data = v->domain->arch.pci_cf8;
         }
         else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
         {
@@ -1489,19 +1505,10 @@ static void guest_io_write(
         {
             pv_pit_handler(port, (uint8_t)data, 1);
         }
-        else if ( (port & 0xfffc) == 0xcf8 )
-        {
-            size = min(bytes, 4 - (port & 3));
-            if ( size == 4 )
-            {
-                v->domain->arch.pci_cf8 = data;
-            }
-            else
-            {
-                uint32_t mask = ((1u << (size * 8)) - 1) << ((port & 3) * 8);
-                v->domain->arch.pci_cf8 &= ~mask;
-                v->domain->arch.pci_cf8 |= (data << ((port & 3) * 8)) & mask;
-            }
+        else if ( (port == 0xcf8) && (bytes == 4) )
+        {
+            size = 4;
+            v->domain->arch.pci_cf8 = data;
         }
         else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
         {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/x86_emulate.c        Thu Apr 24 14:08:29 2008 -0600
@@ -11,23 +11,7 @@
 
 #include <asm/x86_emulate.h>
 
+/* Avoid namespace pollution. */
 #undef cmpxchg
 
-#define __emulate_fpu_insn(_op)                 \
-do{ int _exn;                                   \
-    asm volatile (                              \
-        "1: " _op "\n"                          \
-        "2: \n"                                 \
-        ".section .fixup,\"ax\"\n"              \
-        "3: mov $1,%0\n"                        \
-        "   jmp 2b\n"                           \
-        ".previous\n"                           \
-        ".section __ex_table,\"a\"\n"           \
-        "   "__FIXUP_ALIGN"\n"                  \
-        "   "__FIXUP_WORD" 1b,3b\n"             \
-        ".previous"                             \
-        : "=r" (_exn) : "0" (0) );              \
-    generate_exception_if(_exn, EXC_MF, -1);    \
-} while (0)
-
 #include "x86_emulate/x86_emulate.c"
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c    Thu Apr 24 14:08:29 2008 -0600
@@ -195,9 +195,9 @@ static uint8_t twobyte_table[256] = {
     /* 0x50 - 0x5F */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     /* 0x60 - 0x6F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
     /* 0x70 - 0x7F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
     /* 0x80 - 0x87 */
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
@@ -546,6 +546,62 @@ do {                                    
                      ? (uint16_t)_regs.eip : (uint32_t)_regs.eip);      \
 } while (0)
 
+struct fpu_insn_ctxt {
+    uint8_t insn_bytes;
+    uint8_t exn_raised;
+};
+
+static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
+{
+    struct fpu_insn_ctxt *fic = _fic;
+    fic->exn_raised = 1;
+    regs->eip += fic->insn_bytes;
+}
+
+#define get_fpu(_type, _fic)                                    \
+do{ (_fic)->exn_raised = 0;                                     \
+    fail_if(ops->get_fpu == NULL);                              \
+    rc = ops->get_fpu(fpu_handle_exception, _fic, _type, ctxt); \
+    if ( rc ) goto done;                                        \
+} while (0)
+#define put_fpu(_fic)                                           \
+do{                                                             \
+    if ( ops->put_fpu != NULL )                                 \
+        ops->put_fpu(ctxt);                                     \
+    generate_exception_if((_fic)->exn_raised, EXC_MF, -1);      \
+} while (0)
+
+#define emulate_fpu_insn(_op)                           \
+do{ struct fpu_insn_ctxt fic;                           \
+    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
+    asm volatile (                                      \
+        "movb $2f-1f,%0 \n"                             \
+        "1: " _op "     \n"                             \
+        "2:             \n"                             \
+        : "=m" (fic.insn_bytes) : : "memory" );         \
+    put_fpu(&fic);                                      \
+} while (0)
+
+#define emulate_fpu_insn_memdst(_op, _arg)              \
+do{ struct fpu_insn_ctxt fic;                           \
+    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
+    asm volatile (                                      \
+        "movb $2f-1f,%0 \n"                             \
+        "1: " _op " %1  \n"                             \
+        "2:             \n"                             \
+        : "=m" (fic.insn_bytes), "=m" (_arg)            \
+        : : "memory" );                                 \
+    put_fpu(&fic);                                      \
+} while (0)
+
+#define emulate_fpu_insn_stub(_bytes...)                                \
+do{ uint8_t stub[] = { _bytes, 0xc3 };                                  \
+    struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };        \
+    get_fpu(X86EMUL_FPU_fpu, &fic);                                     \
+    (*(void(*)(void))stub)();                                           \
+    put_fpu(&fic);                                                      \
+} while (0)
+
 static unsigned long __get_rep_prefix(
     struct cpu_user_regs *int_regs,
     struct cpu_user_regs *ext_regs,
@@ -851,6 +907,7 @@ protmode_load_seg(
     struct { uint32_t a, b; } desc;
     unsigned long val;
     uint8_t dpl, rpl, cpl;
+    uint32_t new_desc_b;
     int rc, fault_type = EXC_TS;
 
     /* NULL selector? */
@@ -933,10 +990,11 @@ protmode_load_seg(
         }
 
         /* Ensure Accessed flag is set. */
+        new_desc_b = desc.b | 0x100;
         rc = ((desc.b & 0x100) ? X86EMUL_OKAY : 
               ops->cmpxchg(
-                  x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b,
-                  desc.b | 0x100, 4, ctxt));
+                  x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
+                  &desc.b, &new_desc_b, 4, ctxt));
     } while ( rc == X86EMUL_CMPXCHG_FAILED );
 
     if ( rc )
@@ -2036,8 +2094,8 @@ x86_emulate(
             /* nothing to do */;
         else if ( lock_prefix )
             rc = ops->cmpxchg(
-                dst.mem.seg, dst.mem.off, dst.orig_val,
-                dst.val, dst.bytes, ctxt);
+                dst.mem.seg, dst.mem.off, &dst.orig_val,
+                &dst.val, dst.bytes, ctxt);
         else
             rc = ops->write(
                 dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt);
@@ -2399,9 +2457,7 @@ x86_emulate(
     }
 
     case 0x9b:  /* wait/fwait */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
-        __emulate_fpu_insn("fwait");
+        emulate_fpu_insn("fwait");
         break;
 
     case 0x9c: /* pushf */
@@ -2721,77 +2777,89 @@ x86_emulate(
     }
 
     case 0xd9: /* FPU 0xd9 */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         switch ( modrm )
         {
-        case 0xc0: __emulate_fpu_insn(".byte 0xd9,0xc0"); break;
-        case 0xc1: __emulate_fpu_insn(".byte 0xd9,0xc1"); break;
-        case 0xc2: __emulate_fpu_insn(".byte 0xd9,0xc2"); break;
-        case 0xc3: __emulate_fpu_insn(".byte 0xd9,0xc3"); break;
-        case 0xc4: __emulate_fpu_insn(".byte 0xd9,0xc4"); break;
-        case 0xc5: __emulate_fpu_insn(".byte 0xd9,0xc5"); break;
-        case 0xc6: __emulate_fpu_insn(".byte 0xd9,0xc6"); break;
-        case 0xc7: __emulate_fpu_insn(".byte 0xd9,0xc7"); break;
-        case 0xe0: __emulate_fpu_insn(".byte 0xd9,0xe0"); break;
-        case 0xe8: __emulate_fpu_insn(".byte 0xd9,0xe8"); break;
-        case 0xee: __emulate_fpu_insn(".byte 0xd9,0xee"); break;
+        case 0xc0 ... 0xc7: /* fld %stN */
+        case 0xc8 ... 0xcf: /* fxch %stN */
+        case 0xd0: /* fnop */
+        case 0xe0: /* fchs */
+        case 0xe1: /* fabs */
+        case 0xe4: /* ftst */
+        case 0xe5: /* fxam */
+        case 0xe8: /* fld1 */
+        case 0xe9: /* fldl2t */
+        case 0xea: /* fldl2e */
+        case 0xeb: /* fldpi */
+        case 0xec: /* fldlg2 */
+        case 0xed: /* fldln2 */
+        case 0xee: /* fldz */
+        case 0xf0: /* f2xm1 */
+        case 0xf1: /* fyl2x */
+        case 0xf2: /* fptan */
+        case 0xf3: /* fpatan */
+        case 0xf4: /* fxtract */
+        case 0xf5: /* fprem1 */
+        case 0xf6: /* fdecstp */
+        case 0xf7: /* fincstp */
+        case 0xf8: /* fprem */
+        case 0xf9: /* fyl2xp1 */
+        case 0xfa: /* fsqrt */
+        case 0xfb: /* fsincos */
+        case 0xfc: /* frndint */
+        case 0xfd: /* fscale */
+        case 0xfe: /* fsin */
+        case 0xff: /* fcos */
+            emulate_fpu_insn_stub(0xd9, modrm);
+            break;
         default:
             fail_if((modrm_reg & 7) != 7);
             fail_if(modrm >= 0xc0);
             /* fnstcw m2byte */
             ea.bytes = 2;
             dst = ea;
-            asm volatile ( "fnstcw %0" : "=m" (dst.val) );
+            emulate_fpu_insn_memdst("fnstcw", dst.val);
         }
         break;
 
     case 0xdb: /* FPU 0xdb */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         fail_if(modrm != 0xe3);
         /* fninit */
-        asm volatile ( "fninit" );
+        emulate_fpu_insn("fninit");
         break;
 
     case 0xdd: /* FPU 0xdd */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         fail_if((modrm_reg & 7) != 7);
         fail_if(modrm >= 0xc0);
         /* fnstsw m2byte */
         ea.bytes = 2;
         dst = ea;
-        asm volatile ( "fnstsw %0" : "=m" (dst.val) );
+        emulate_fpu_insn_memdst("fnstsw", dst.val);
         break;
 
     case 0xde: /* FPU 0xde */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         switch ( modrm )
         {
-        case 0xd9: __emulate_fpu_insn(".byte 0xde,0xd9"); break;
-        case 0xf8: __emulate_fpu_insn(".byte 0xde,0xf8"); break;
-        case 0xf9: __emulate_fpu_insn(".byte 0xde,0xf9"); break;
-        case 0xfa: __emulate_fpu_insn(".byte 0xde,0xfa"); break;
-        case 0xfb: __emulate_fpu_insn(".byte 0xde,0xfb"); break;
-        case 0xfc: __emulate_fpu_insn(".byte 0xde,0xfc"); break;
-        case 0xfd: __emulate_fpu_insn(".byte 0xde,0xfd"); break;
-        case 0xfe: __emulate_fpu_insn(".byte 0xde,0xfe"); break;
-        case 0xff: __emulate_fpu_insn(".byte 0xde,0xff"); break;
-        default: goto cannot_emulate;
+        case 0xc0 ... 0xc7: /* faddp %stN */
+        case 0xc8 ... 0xcf: /* fmulp %stN */
+        case 0xd9: /* fcompp */
+        case 0xe0 ... 0xe7: /* fsubrp %stN */
+        case 0xe8 ... 0xef: /* fsubp %stN */
+        case 0xf0 ... 0xf7: /* fdivrp %stN */
+        case 0xf8 ... 0xff: /* fdivp %stN */
+            emulate_fpu_insn_stub(0xde, modrm);
+            break;
+        default:
+            goto cannot_emulate;
         }
         break;
 
     case 0xdf: /* FPU 0xdf */
-        fail_if(ops->load_fpu_ctxt == NULL);
-        ops->load_fpu_ctxt(ctxt);
         fail_if(modrm != 0xe0);
         /* fnstsw %ax */
         dst.bytes = 2;
         dst.type = OP_REG;
         dst.reg = (unsigned long *)&_regs.eax;
-        asm volatile ( "fnstsw %0" : "=m" (dst.val) );
+        emulate_fpu_insn_memdst("fnstsw", dst.val);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -2975,6 +3043,7 @@ x86_emulate(
 
     case 0xa3: bt: /* bt */
         emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
+        dst.type = OP_NONE;
         break;
 
     case 0xa4: /* shld imm8,r,r/m */
@@ -3067,7 +3136,11 @@ x86_emulate(
               : "=r" (dst.val), "=q" (zf)
               : "r" (src.val), "1" (0) );
         _regs.eflags &= ~EFLG_ZF;
-        _regs.eflags |= zf ? EFLG_ZF : 0;
+        if ( zf )
+        {
+            _regs.eflags |= EFLG_ZF;
+            dst.type = OP_NONE;
+        }
         break;
     }
 
@@ -3077,7 +3150,11 @@ x86_emulate(
               : "=r" (dst.val), "=q" (zf)
               : "r" (src.val), "1" (0) );
         _regs.eflags &= ~EFLG_ZF;
-        _regs.eflags |= zf ? EFLG_ZF : 0;
+        if ( zf )
+        {
+            _regs.eflags |= EFLG_ZF;
+            dst.type = OP_NONE;
+        }
         break;
     }
 
@@ -3310,6 +3387,44 @@ x86_emulate(
         break;
     }
 
+    case 0x6f: /* movq mm/m64,mm */ {
+        uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 };
+        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
+        uint64_t val;
+        if ( ea.type == OP_MEM )
+        {
+            unsigned long lval, hval;
+            if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &lval, 4, ctxt)) ||
+                 (rc = ops->read(ea.mem.seg, ea.mem.off+4, &hval, 4, ctxt)) )
+                goto done;
+            val = ((uint64_t)hval << 32) | (uint32_t)lval;
+            stub[2] = modrm & 0x38; /* movq (%eax),%mmN */
+        }
+        get_fpu(X86EMUL_FPU_mmx, &fic);
+        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
+        put_fpu(&fic);
+        break;
+    }
+
+    case 0x7f: /* movq mm,mm/m64 */ {
+        uint8_t stub[] = { 0x0f, 0x7f, modrm, 0xc3 };
+        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
+        uint64_t val;
+        if ( ea.type == OP_MEM )
+            stub[2] = modrm & 0x38; /* movq %mmN,(%eax) */
+        get_fpu(X86EMUL_FPU_mmx, &fic);
+        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
+        put_fpu(&fic);
+        if ( ea.type == OP_MEM )
+        {
+            unsigned long lval = (uint32_t)val, hval = (uint32_t)(val >> 32);
+            if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, lval, 4, ctxt)) ||
+                 (rc = ops->write(ea.mem.seg, ea.mem.off+4, hval, 4, ctxt)) )
+                goto done;
+        }
+        break;
+    }
+
     case 0x80 ... 0x8f: /* jcc (near) */ {
         int rel = (((op_bytes == 2) && !mode_64bit())
                    ? (int32_t)insn_fetch_type(int16_t)
@@ -3346,60 +3461,49 @@ x86_emulate(
         src.val = x86_seg_gs;
         goto pop_seg;
 
-    case 0xc7: /* Grp9 (cmpxchg8b) */
-#if defined(__i386__)
-    {
-        unsigned long old_lo, old_hi;
+    case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
+        unsigned long old[2], exp[2], new[2];
+        unsigned int i;
+
         generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
         generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
-        if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) ||
-             (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) )
-            goto done;
-        if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
-        {
-            _regs.eax = old_lo;
-            _regs.edx = old_hi;
+        op_bytes *= 2;
+
+        /* Get actual old value. */
+        for ( i = 0; i < (op_bytes/sizeof(long)); i++ )
+            if ( (rc = ops->read(ea.mem.seg, ea.mem.off + i*sizeof(long),
+                                 &old[i], sizeof(long), ctxt)) != 0 )
+                goto done;
+
+        /* Get expected and proposed values. */
+        if ( op_bytes == 8 )
+        {
+            ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] = _regs.edx;
+            ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] = _regs.ecx;
+        }
+        else
+        {
+            exp[0] = _regs.eax; exp[1] = _regs.edx;
+            new[0] = _regs.ebx; new[1] = _regs.ecx;
+        }
+
+        if ( memcmp(old, exp, op_bytes) )
+        {
+            /* Expected != actual: store actual to rDX:rAX and clear ZF. */
+            _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
+            _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
             _regs.eflags &= ~EFLG_ZF;
         }
-        else if ( ops->cmpxchg8b == NULL )
-        {
-            rc = X86EMUL_UNHANDLEABLE;
-            goto done;
-        }
         else
         {
-            if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi,
-                                      _regs.ebx, _regs.ecx, ctxt)) != 0 )
+            /* Expected == actual: attempt atomic cmpxchg and set ZF. */
+            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
+                                    new, op_bytes, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
         break;
     }
-#elif defined(__x86_64__)
-    {
-        unsigned long old, new;
-        generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
-        generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
-        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 )
-            goto done;
-        if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
-             ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
-        {
-            _regs.eax = (uint32_t)(old>>0);
-            _regs.edx = (uint32_t)(old>>32);
-            _regs.eflags &= ~EFLG_ZF;
-        }
-        else
-        {
-            new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
-                                    new, 8, ctxt)) != 0 )
-                goto done;
-            _regs.eflags |= EFLG_ZF;
-        }
-        break;
-    }
-#endif
 
     case 0xc8 ... 0xcf: /* bswap */
         dst.type = OP_REG;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/arch/x86/x86_emulate/x86_emulate.h
--- a/xen/arch/x86/x86_emulate/x86_emulate.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h    Thu Apr 24 14:08:29 2008 -0600
@@ -95,6 +95,12 @@ struct segment_register {
  /* (cmpxchg accessor): CMPXCHG failed. Maps to X86EMUL_RETRY in caller. */
 #define X86EMUL_CMPXCHG_FAILED 3
 
+/* FPU sub-types which may be requested via ->get_fpu(). */
+enum x86_emulate_fpu_type {
+    X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */
+    X86EMUL_FPU_mmx  /* MMX instruction set (%mm0-%mm7) */
+};
+
 /*
  * These operations represent the instruction emulator's interface to memory.
  * 
@@ -104,8 +110,7 @@ struct segment_register {
  *     some out-of-band mechanism, unknown to the emulator. The memop signals
  *     failure by returning X86EMUL_EXCEPTION to the emulator, which will
  *     then immediately bail.
- *  2. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
- *     cmpxchg8b_emulated need support 8-byte accesses.
+ *  2. Valid access sizes are 1, 2, 4 and 8 (x86/64 only) bytes.
  *  3. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
  */
 struct x86_emulate_ops
@@ -153,34 +158,16 @@ struct x86_emulate_ops
 
     /*
      * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
-     *  @old:   [IN ] Value expected to be current at @addr.
-     *  @new:   [IN ] Value to write to @addr.
+     *  @p_old: [IN ] Pointer to value expected to be current at @addr.
+     *  @p_new: [IN ] Pointer to value to write to @addr.
+     *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
      */
     int (*cmpxchg)(
         enum x86_segment seg,
         unsigned long offset,
-        unsigned long old,
-        unsigned long new,
-        unsigned int bytes,
-        struct x86_emulate_ctxt *ctxt);
-
-    /*
-     * cmpxchg8b: Emulate an atomic (LOCKed) CMPXCHG8B operation.
-     *  @old:   [IN ] Value expected to be current at @addr.
-     *  @new:   [IN ] Value to write to @addr.
-     * NOTES:
-     *  1. This function is only ever called when emulating a real CMPXCHG8B.
-     *  2. This function is *never* called on x86/64 systems.
-     *  2. Not defining this function (i.e., specifying NULL) is equivalent
-     *     to defining a function that always returns X86EMUL_UNHANDLEABLE.
-     */
-    int (*cmpxchg8b)(
-        enum x86_segment seg,
-        unsigned long offset,
-        unsigned long old_lo,
-        unsigned long old_hi,
-        unsigned long new_lo,
-        unsigned long new_hi,
+        void *p_old,
+        void *p_new,
+        unsigned int bytes,
         struct x86_emulate_ctxt *ctxt);
 
     /*
@@ -342,8 +329,19 @@ struct x86_emulate_ops
         uint8_t insn_len,
         struct x86_emulate_ctxt *ctxt);
 
-    /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */
-    void (*load_fpu_ctxt)(
+    /*
+     * get_fpu: Load emulated environment's FPU state onto processor.
+     *  @exn_callback: On any FPU or SIMD exception, pass control to
+     *                 (*exception_callback)(exception_callback_arg, regs).
+     */
+    int (*get_fpu)(
+        void (*exception_callback)(void *, struct cpu_user_regs *),
+        void *exception_callback_arg,
+        enum x86_emulate_fpu_type type,
+        struct x86_emulate_ctxt *ctxt);
+
+    /* put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers. */
+    void (*put_fpu)(
         struct x86_emulate_ctxt *ctxt);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
diff -r 239b44eeb2d6 -r dc510776dd59 xen/common/trace.c
--- a/xen/common/trace.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/common/trace.c        Thu Apr 24 14:08:29 2008 -0600
@@ -374,6 +374,15 @@ static inline int insert_lost_records(st
                            (unsigned char *)&ed);
 }
 
+/*
+ * Notification is performed in qtasklet to avoid deadlocks with contexts
+ * which __trace_var() may be called from (e.g., scheduler critical regions).
+ */
+static void trace_notify_dom0(unsigned long unused)
+{
+    send_guest_global_virq(dom0, VIRQ_TBUF);
+}
+static DECLARE_TASKLET(trace_notify_dom0_tasklet, trace_notify_dom0, 0);
 
 /**
  * trace - Enters a trace tuple into the trace buffer for the current CPU.
@@ -506,7 +515,7 @@ void __trace_var(u32 event, int cycles, 
     /* Notify trace buffer consumer that we've crossed the high water mark. */
     if ( started_below_highwater &&
          (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
-        send_guest_global_virq(dom0, VIRQ_TBUF);
+        tasklet_schedule(&trace_notify_dom0_tasklet);
 }
 
 /*
diff -r 239b44eeb2d6 -r dc510776dd59 xen/common/xencomm.c
--- a/xen/common/xencomm.c      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/common/xencomm.c      Thu Apr 24 14:08:29 2008 -0600
@@ -323,7 +323,7 @@ xencomm_copy_chunk_to(
                (unsigned long)xencomm_vaddr(paddr, page));
 
     memcpy(xencomm_vaddr(paddr, page), (void *)from, len);
-    xencomm_mark_dirty(xencomm_vaddr(paddr, page), len);
+    xencomm_mark_dirty((unsigned long)xencomm_vaddr(paddr, page), len);
     put_page(page);
 
     return 0;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/char/console.c        Thu Apr 24 14:08:29 2008 -0600
@@ -322,7 +322,7 @@ static long guest_console_write(XEN_GUES
 
     while ( count > 0 )
     {
-        while ( serial_tx_space(sercon_handle) < (SERIAL_TXBUFSZ / 2) )
+        while ( serial_tx_space(sercon_handle) < (serial_txbufsz / 2) )
         {
             if ( hypercall_preempt_check() )
                 break;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/char/serial.c Thu Apr 24 14:08:29 2008 -0600
@@ -15,6 +15,19 @@
 #include <xen/mm.h>
 #include <xen/serial.h>
 
+/* Never drop characters, even if the async transmit buffer fills. */
+/* #define SERIAL_NEVER_DROP_CHARS 1 */
+
+unsigned int serial_txbufsz = 16384;
+static void __init parse_serial_tx_buffer(const char *s)
+{
+    serial_txbufsz = max((unsigned int)parse_size_and_unit(s, NULL), 512u);
+}
+custom_param("serial_tx_buffer", parse_serial_tx_buffer);
+
+#define mask_serial_rxbuf_idx(_i) ((_i)&(serial_rxbufsz-1))
+#define mask_serial_txbuf_idx(_i) ((_i)&(serial_txbufsz-1))
+
 static struct serial_port com[2] = {
     { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }, 
     { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }
@@ -36,8 +49,8 @@ void serial_rx_interrupt(struct serial_p
             fn = port->rx_hi;
         else if ( !(c & 0x80) && (port->rx_lo != NULL) )
             fn = port->rx_lo;
-        else if ( (port->rxbufp - port->rxbufc) != SERIAL_RXBUFSZ )
-            port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufp++)] = c;            
+        else if ( (port->rxbufp - port->rxbufc) != serial_rxbufsz )
+            port->rxbuf[mask_serial_rxbuf_idx(port->rxbufp++)] = c;            
     }
 
     spin_unlock_irqrestore(&port->rx_lock, flags);
@@ -72,7 +85,7 @@ void serial_tx_interrupt(struct serial_p
             if ( port->txbufc == port->txbufp )
                 break;
             port->driver->putc(
-                port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
+                port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
         }
     }
 
@@ -81,22 +94,24 @@ void serial_tx_interrupt(struct serial_p
 
 static void __serial_putc(struct serial_port *port, char c)
 {
-    int i;
-
     if ( (port->txbuf != NULL) && !port->sync )
     {
         /* Interrupt-driven (asynchronous) transmitter. */
-        if ( (port->txbufp - port->txbufc) == SERIAL_TXBUFSZ )
-        {
-            /* Buffer is full: we spin, but could alternatively drop chars. */
+#ifdef SERIAL_NEVER_DROP_CHARS
+        if ( (port->txbufp - port->txbufc) == serial_txbufsz )
+        {
+            /* Buffer is full: we spin waiting for space to appear. */
+            int i;
             while ( !port->driver->tx_empty(port) )
                 cpu_relax();
             for ( i = 0; i < port->tx_fifo_size; i++ )
                 port->driver->putc(
-                    port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
-            port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
-        }
-        else if ( ((port->txbufp - port->txbufc) == 0) &&
+                    port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
+            port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c;
+            return;
+        }
+#endif
+        if ( ((port->txbufp - port->txbufc) == 0) &&
                   port->driver->tx_empty(port) )
         {
             /* Buffer and UART FIFO are both empty. */
@@ -105,7 +120,7 @@ static void __serial_putc(struct serial_
         else
         {
             /* Normal case: buffer the character. */
-            port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
+            port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c;
         }
     }
     else if ( port->driver->tx_empty )
@@ -200,7 +215,7 @@ char serial_getc(int handle)
             
             if ( port->rxbufp != port->rxbufc )
             {
-                c = port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufc++)];
+                c = port->rxbuf[mask_serial_rxbuf_idx(port->rxbufc++)];
                 spin_unlock_irqrestore(&port->rx_lock, flags);
                 break;
             }
@@ -336,7 +351,7 @@ void serial_start_sync(int handle)
             while ( !port->driver->tx_empty(port) )
                 cpu_relax();
             port->driver->putc(
-                port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
+                port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
         }
     }
 
@@ -364,9 +379,9 @@ int serial_tx_space(int handle)
 {
     struct serial_port *port;
     if ( handle == -1 )
-        return SERIAL_TXBUFSZ;
-    port = &com[handle & SERHND_IDX];
-    return SERIAL_TXBUFSZ - (port->txbufp - port->txbufc);
+        return serial_txbufsz;
+    port = &com[handle & SERHND_IDX];
+    return serial_txbufsz - (port->txbufp - port->txbufc);
 }
 
 void __devinit serial_init_preirq(void)
@@ -431,7 +446,7 @@ void serial_async_transmit(struct serial
     BUG_ON(!port->driver->tx_empty);
     if ( port->txbuf == NULL )
         port->txbuf = alloc_xenheap_pages(
-            get_order_from_bytes(SERIAL_TXBUFSZ));
+            get_order_from_bytes(serial_txbufsz));
 }
 
 /*
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_acpi.c
--- a/xen/drivers/passthrough/amd/iommu_acpi.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c  Thu Apr 24 14:08:29 2008 -0600
@@ -139,7 +139,7 @@ static int __init register_exclusion_ran
     iommu = find_iommu_for_device(bus, devfn);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
+        amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
         return -ENODEV;
     }
     req = ivrs_mappings[bdf].dte_requestor_id;
@@ -221,7 +221,7 @@ static int __init parse_ivmd_device_sele
     bdf = ivmd_block->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
         return -ENODEV;
     }
 
@@ -238,21 +238,18 @@ static int __init parse_ivmd_device_rang
     first_bdf = ivmd_block->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: "
-                "Invalid Range_First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVMD Error: Invalid Range_First Dev_Id 0x%x\n", first_bdf);
         return -ENODEV;
     }
 
     last_bdf = ivmd_block->last_dev_id;
     if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: "
-                "Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
-        return -ENODEV;
-    }
-
-    dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n",
-            first_bdf, last_bdf);
+        amd_iov_error(
+            "IVMD Error: Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
+        return -ENODEV;
+    }
 
     for ( bdf = first_bdf, error = 0; (bdf <= last_bdf) && !error; bdf++ )
         error = register_exclusion_range_for_device(
@@ -272,8 +269,7 @@ static int __init parse_ivmd_device_iomm
                                     ivmd_block->cap_offset);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR,
-                "IVMD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
+        amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
                 ivmd_block->header.dev_id, ivmd_block->cap_offset);
         return -ENODEV;
     }
@@ -290,7 +286,7 @@ static int __init parse_ivmd_block(struc
     if ( ivmd_block->header.length <
          sizeof(struct acpi_ivmd_block_header) )
     {
-        dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n");
+        amd_iov_error("IVMD Error: Invalid Block Length!\n");
         return -ENODEV;
     }
 
@@ -299,10 +295,9 @@ static int __init parse_ivmd_block(struc
     base = start_addr & PAGE_MASK;
     limit = (start_addr + mem_length - 1) & PAGE_MASK;
 
-    dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n",
-            ivmd_block->header.type);
-    dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr);
-    dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length);
+    amd_iov_info("IVMD Block: Type 0x%x\n",ivmd_block->header.type);
+    amd_iov_info(" Start_Addr_Phys 0x%lx\n", start_addr);
+    amd_iov_info(" Mem_Length 0x%lx\n", mem_length);
 
     if ( get_field_from_byte(ivmd_block->header.flags,
                              AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK,
@@ -321,7 +316,7 @@ static int __init parse_ivmd_block(struc
     }
     else
     {
-        dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n");
+        amd_iov_error("IVMD Error: Invalid Flag Field!\n");
         return -ENODEV;
     }
 
@@ -344,7 +339,7 @@ static int __init parse_ivmd_block(struc
                                        base, limit, iw, ir);
 
     default:
-        dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n");
+        amd_iov_error("IVMD Error: Invalid Block Type!\n");
         return -ENODEV;
     }
 }
@@ -354,7 +349,7 @@ static u16 __init parse_ivhd_device_padd
 {
     if ( header_length < (block_length + pad_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
@@ -369,8 +364,7 @@ static u16 __init parse_ivhd_device_sele
     bdf = ivhd_device->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
         return 0;
     }
 
@@ -393,14 +387,14 @@ static u16 __init parse_ivhd_device_rang
     dev_length = sizeof(struct acpi_ivhd_device_range);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     if ( ivhd_device->range.trailer.type !=
          AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
+        amd_iov_error("IVHD Error: "
                 "Invalid Range: End_Type 0x%x\n",
                 ivhd_device->range.trailer.type);
         return 0;
@@ -409,21 +403,20 @@ static u16 __init parse_ivhd_device_rang
     first_bdf = ivhd_device->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
         return 0;
     }
 
     last_bdf = ivhd_device->range.trailer.dev_id;
     if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
-            first_bdf, last_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
 
     /* override flags for range of devices */
     sys_mgt = get_field_from_byte(ivhd_device->header.flags,
@@ -444,28 +437,25 @@ static u16 __init parse_ivhd_device_alia
     dev_length = sizeof(struct acpi_ivhd_device_alias);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     bdf = ivhd_device->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
         return 0;
     }
 
     alias_id = ivhd_device->alias.dev_id;
     if ( alias_id >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Alias Dev_Id 0x%x\n", alias_id);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
+        amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
 
     /* override requestor_id and flags for device */
     ivrs_mappings[bdf].dte_requestor_id = alias_id;
@@ -490,15 +480,14 @@ static u16 __init parse_ivhd_device_alia
     dev_length = sizeof(struct acpi_ivhd_device_alias_range);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     if ( ivhd_device->alias_range.trailer.type !=
          AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
+        amd_iov_error("IVHD Error: "
                 "Invalid Range: End_Type 0x%x\n",
                 ivhd_device->alias_range.trailer.type);
         return 0;
@@ -507,30 +496,28 @@ static u16 __init parse_ivhd_device_alia
     first_bdf = ivhd_device->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR,"IVHD Error: "
-                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
         return 0;
     }
 
     last_bdf = ivhd_device->alias_range.trailer.dev_id;
     if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
         return 0;
     }
 
     alias_id = ivhd_device->alias_range.alias.dev_id;
     if ( alias_id >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Alias Dev_Id 0x%x\n", alias_id);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
-            first_bdf, last_bdf);
-    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
+        amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
+    amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
 
     /* override requestor_id and flags for range of devices */
     sys_mgt = get_field_from_byte(ivhd_device->header.flags,
@@ -555,16 +542,14 @@ static u16 __init parse_ivhd_device_exte
     dev_length = sizeof(struct acpi_ivhd_device_extended);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     bdf = ivhd_device->header.dev_id;
     if ( bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
+        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
         return 0;
     }
 
@@ -587,15 +572,14 @@ static u16 __init parse_ivhd_device_exte
     dev_length = sizeof(struct acpi_ivhd_device_extended_range);
     if ( header_length < (block_length + dev_length) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Device_Entry Length!\n");
+        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
         return 0;
     }
 
     if ( ivhd_device->extended_range.trailer.type !=
          AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
+        amd_iov_error("IVHD Error: "
                 "Invalid Range: End_Type 0x%x\n",
                 ivhd_device->extended_range.trailer.type);
         return 0;
@@ -604,20 +588,20 @@ static u16 __init parse_ivhd_device_exte
     first_bdf = ivhd_device->header.dev_id;
     if ( first_bdf >= ivrs_bdf_entries )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
+        amd_iov_error(
+            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
         return 0;
     }
 
     last_bdf = ivhd_device->extended_range.trailer.dev_id;
     if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: "
-                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
-        return 0;
-    }
-
-    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
+        amd_iov_error(
+            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
+        return 0;
+    }
+
+    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n",
             first_bdf, last_bdf);
 
     /* override flags for range of devices */
@@ -639,7 +623,7 @@ static int __init parse_ivhd_block(struc
     if ( ivhd_block->header.length <
          sizeof(struct acpi_ivhd_block_header) )
     {
-        dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n");
+        amd_iov_error("IVHD Error: Invalid Block Length!\n");
         return -ENODEV;
     }
 
@@ -647,21 +631,16 @@ static int __init parse_ivhd_block(struc
                                     ivhd_block->cap_offset);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR,
-                "IVHD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
+        amd_iov_error("IVHD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
                 ivhd_block->header.dev_id, ivhd_block->cap_offset);
         return -ENODEV;
     }
 
-    dprintk(XENLOG_INFO, "IVHD Block:\n");
-    dprintk(XENLOG_INFO, " Cap_Offset 0x%x\n",
-            ivhd_block->cap_offset);
-    dprintk(XENLOG_INFO, " MMIO_BAR_Phys 0x%lx\n",
-            (unsigned long)ivhd_block->mmio_base);
-    dprintk(XENLOG_INFO, " PCI_Segment 0x%x\n",
-            ivhd_block->pci_segment);
-    dprintk(XENLOG_INFO, " IOMMU_Info 0x%x\n",
-            ivhd_block->iommu_info);
+    amd_iov_info("IVHD Block:\n");
+    amd_iov_info(" Cap_Offset 0x%x\n", ivhd_block->cap_offset);
+    amd_iov_info(" MMIO_BAR_Phys 0x%"PRIx64"\n",ivhd_block->mmio_base);
+    amd_iov_info( " PCI_Segment 0x%x\n", ivhd_block->pci_segment);
+    amd_iov_info( " IOMMU_Info 0x%x\n", ivhd_block->iommu_info);
 
     /* override IOMMU support flags */
     iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
@@ -692,13 +671,10 @@ static int __init parse_ivhd_block(struc
         ivhd_device = (union acpi_ivhd_device *)
             ((u8 *)ivhd_block + block_length);
 
-        dprintk(XENLOG_INFO, "IVHD Device Entry:\n");
-        dprintk(XENLOG_INFO, " Type 0x%x\n",
-                ivhd_device->header.type);
-        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n",
-                ivhd_device->header.dev_id);
-        dprintk(XENLOG_INFO, " Flags 0x%x\n",
-                ivhd_device->header.flags);
+        amd_iov_info( "IVHD Device Entry:\n");
+        amd_iov_info( " Type 0x%x\n", ivhd_device->header.type);
+        amd_iov_info( " Dev_Id 0x%x\n", ivhd_device->header.dev_id);
+        amd_iov_info( " Flags 0x%x\n", ivhd_device->header.flags);
 
         switch ( ivhd_device->header.type )
         {
@@ -741,8 +717,7 @@ static int __init parse_ivhd_block(struc
                 ivhd_block->header.length, block_length);
             break;
         default:
-            dprintk(XENLOG_ERR, "IVHD Error: "
-                    "Invalid Device Type!\n");
+            amd_iov_error("IVHD Error: Invalid Device Type!\n");
             dev_length = 0;
             break;
         }
@@ -774,46 +749,49 @@ static int __init parse_ivrs_block(struc
         return parse_ivmd_block(ivmd_block);
 
     default:
-        dprintk(XENLOG_ERR, "IVRS Error: Invalid Block Type!\n");
+        amd_iov_error("IVRS Error: Invalid Block Type!\n");
         return -ENODEV;
     }
 
     return 0;
 }
 
-void __init dump_acpi_table_header(struct acpi_table_header *table)
-{
+static void __init dump_acpi_table_header(struct acpi_table_header *table)
+{
+#ifdef AMD_IOV_DEBUG
     int i;
 
-    printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n");
-    printk(XENLOG_INFO " Signature ");
+    amd_iov_info("ACPI Table:\n");
+    amd_iov_info(" Signature ");
     for ( i = 0; i < ACPI_NAME_SIZE; i++ )
         printk("%c", table->signature[i]);
     printk("\n");
 
-    printk(" Length 0x%x\n", table->length);
-    printk(" Revision 0x%x\n", table->revision);
-    printk(" CheckSum 0x%x\n", table->checksum);
-
-    printk(" OEM_Id ");
+    amd_iov_info(" Length 0x%x\n", table->length);
+    amd_iov_info(" Revision 0x%x\n", table->revision);
+    amd_iov_info(" CheckSum 0x%x\n", table->checksum);
+
+    amd_iov_info(" OEM_Id ");
     for ( i = 0; i < ACPI_OEM_ID_SIZE; i++ )
         printk("%c", table->oem_id[i]);
     printk("\n");
 
-    printk(" OEM_Table_Id ");
+    amd_iov_info(" OEM_Table_Id ");
     for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; i++ )
         printk("%c", table->oem_table_id[i]);
     printk("\n");
 
-    printk(" OEM_Revision 0x%x\n", table->oem_revision);
-
-    printk(" Creator_Id ");
+    amd_iov_info(" OEM_Revision 0x%x\n", table->oem_revision);
+
+    amd_iov_info(" Creator_Id ");
     for ( i = 0; i < ACPI_NAME_SIZE; i++ )
         printk("%c", table->asl_compiler_id[i]);
     printk("\n");
 
-    printk(" Creator_Revision 0x%x\n",
+    amd_iov_info(" Creator_Revision 0x%x\n",
            table->asl_compiler_revision);
+#endif
+
 }
 
 int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size)
@@ -827,9 +805,7 @@ int __init parse_ivrs_table(unsigned lon
 
     BUG_ON(!table);
 
-#if 0
     dump_acpi_table_header(table);
-#endif
 
     /* validate checksum: sum of entire table == 0 */
     checksum = 0;
@@ -838,7 +814,7 @@ int __init parse_ivrs_table(unsigned lon
         checksum += raw_table[i];
     if ( checksum )
     {
-        dprintk(XENLOG_ERR, "IVRS Error: "
+        amd_iov_error("IVRS Error: "
                 "Invalid Checksum 0x%x\n", checksum);
         return -ENODEV;
     }
@@ -850,15 +826,15 @@ int __init parse_ivrs_table(unsigned lon
         ivrs_block = (struct acpi_ivrs_block_header *)
             ((u8 *)table + length);
 
-        dprintk(XENLOG_INFO, "IVRS Block:\n");
-        dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type);
-        dprintk(XENLOG_INFO, " Flags 0x%x\n", ivrs_block->flags);
-        dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length);
-        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id);
+        amd_iov_info("IVRS Block:\n");
+        amd_iov_info(" Type 0x%x\n", ivrs_block->type);
+        amd_iov_info(" Flags 0x%x\n", ivrs_block->flags);
+        amd_iov_info(" Length 0x%x\n", ivrs_block->length);
+        amd_iov_info(" Dev_Id 0x%x\n", ivrs_block->dev_id);
 
         if ( table->length < (length + ivrs_block->length) )
         {
-            dprintk(XENLOG_ERR, "IVRS Error: "
+            amd_iov_error("IVRS Error: "
                     "Table Length Exceeded: 0x%x -> 0x%lx\n",
                     table->length,
                     (length + ivrs_block->length));
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_detect.c
--- a/xen/drivers/passthrough/amd/iommu_detect.c        Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_detect.c        Thu Apr 24 14:08:29 
2008 -0600
@@ -85,6 +85,45 @@ int __init get_iommu_last_downstream_bus
     return 0;
 }
 
+static int __init get_iommu_msi_capabilities(u8 bus, u8 dev, u8 func,
+            struct amd_iommu *iommu)
+{
+    int cap_ptr, cap_id;
+    u32 cap_header;
+    u16 control;
+    int count = 0;
+
+    cap_ptr = pci_conf_read8(bus, dev, func,
+            PCI_CAPABILITY_LIST);
+
+    while ( cap_ptr >= PCI_MIN_CAP_OFFSET &&
+        count < PCI_MAX_CAP_BLOCKS )
+    {
+        cap_ptr &= PCI_CAP_PTR_MASK;
+        cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
+        cap_id = get_field_from_reg_u32(cap_header,
+                PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT);
+
+        if ( cap_id == PCI_CAP_ID_MSI )
+        {
+            iommu->msi_cap = cap_ptr;
+            break;
+        }
+        cap_ptr = get_field_from_reg_u32(cap_header,
+                PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT);
+        count++;
+    }
+
+    if ( !iommu->msi_cap )
+        return -ENODEV;
+
+    amd_iov_info("Found MSI capability block \n");
+    control = pci_conf_read16(bus, dev, func,
+            iommu->msi_cap + PCI_MSI_FLAGS);
+    iommu->maskbit = control & PCI_MSI_FLAGS_MASKBIT;
+    return 0;
+}
+
 int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
                                   struct amd_iommu *iommu)
 {
@@ -99,8 +138,7 @@ int __init get_iommu_capabilities(u8 bus
 
     if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) )
     {
-        dprintk(XENLOG_ERR ,
-                "AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
+        amd_iov_error("Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
         return -ENODEV;
     }
 
@@ -133,6 +171,8 @@ int __init get_iommu_capabilities(u8 bus
     iommu->msi_number = get_field_from_reg_u32(
         misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT);
 
+    get_iommu_msi_capabilities(bus, dev, func, iommu);
+
     return 0;
 }
 
@@ -176,24 +216,24 @@ static int __init scan_functions_for_iom
     int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback)
 {
     int func, hdr_type;
-    int count, error = 0;
-
-    func = 0;
-    count = 1;
-    while ( VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
-                                                PCI_VENDOR_ID)) &&
-            !error && (func < count) )
+    int count = 1, error = 0;
+
+    for ( func = 0;
+          (func < count) && !error &&
+              VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
+                                                  PCI_VENDOR_ID));
+          func++ )
+
     {
         hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
 
-        if ( func == 0 && IS_PCI_MULTI_FUNCTION(hdr_type) )
+        if ( (func == 0) && IS_PCI_MULTI_FUNCTION(hdr_type) )
             count = PCI_MAX_FUNC_COUNT;
 
         if ( IS_PCI_TYPE0_HEADER(hdr_type) ||
              IS_PCI_TYPE1_HEADER(hdr_type) )
             error = scan_caps_for_iommu(bus, dev, func,
                                         iommu_detect_callback);
-        func++;
     }
 
     return error;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Thu Apr 24 14:08:29 2008 -0600
@@ -27,6 +27,7 @@
 #include "../pci_regs.h"
 
 extern int nr_amd_iommus;
+static struct amd_iommu *vector_to_iommu[NR_VECTORS];
 
 int __init map_iommu_mmio_region(struct amd_iommu *iommu)
 {
@@ -34,8 +35,7 @@ int __init map_iommu_mmio_region(struct 
 
     if ( nr_amd_iommus > MAX_AMD_IOMMUS )
     {
-        gdprintk(XENLOG_ERR,
-                 "IOMMU: nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus);
+        amd_iov_error("nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus);
         return -ENOMEM;
     }
 
@@ -109,6 +109,33 @@ void __init register_iommu_cmd_buffer_in
     writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET);
 }
 
+void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu)
+{
+    u64 addr_64, addr_lo, addr_hi;
+    u32 power_of2_entries;
+    u32 entry;
+
+    addr_64 = (u64)virt_to_maddr(iommu->event_log.buffer);
+    addr_lo = addr_64 & DMA_32BIT_MASK;
+    addr_hi = addr_64 >> 32;
+
+    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
+                         IOMMU_EVENT_LOG_BASE_LOW_MASK,
+                         IOMMU_EVENT_LOG_BASE_LOW_SHIFT, &entry);
+    writel(entry, iommu->mmio_base + IOMMU_EVENT_LOG_BASE_LOW_OFFSET);
+
+    power_of2_entries = get_order_from_bytes(iommu->event_log.alloc_size) +
+                        IOMMU_EVENT_LOG_POWER_OF2_ENTRIES_PER_PAGE;
+
+    set_field_in_reg_u32((u32)addr_hi, 0,
+                        IOMMU_EVENT_LOG_BASE_HIGH_MASK,
+                        IOMMU_EVENT_LOG_BASE_HIGH_SHIFT, &entry);
+    set_field_in_reg_u32(power_of2_entries, entry,
+                        IOMMU_EVENT_LOG_LENGTH_MASK,
+                        IOMMU_EVENT_LOG_LENGTH_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_EVENT_LOG_BASE_HIGH_OFFSET);
+}
+
 static void __init set_iommu_translation_control(struct amd_iommu *iommu,
                                                  int enable)
 {
@@ -179,10 +206,281 @@ static void __init register_iommu_exclus
     writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET);
 }
 
+static void __init set_iommu_event_log_control(struct amd_iommu *iommu,
+            int enable)
+{
+    u32 entry;
+
+    entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
+                         IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK,
+                         IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
+                         IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_CONTROL_EVENT_LOG_INT_MASK,
+                         IOMMU_CONTROL_EVENT_LOG_INT_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
+                         IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_CONTROL_COMP_WAIT_INT_MASK,
+                         IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry);
+    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+}
+
+static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[])
+{
+    u32 tail, head, *event_log;
+    int i;
+
+     BUG_ON( !iommu || !event );
+
+    /* make sure there's an entry in the log */
+    tail = get_field_from_reg_u32(
+                readl(iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET),
+                IOMMU_EVENT_LOG_TAIL_MASK,
+                IOMMU_EVENT_LOG_TAIL_SHIFT);
+    if ( tail != iommu->event_log_head )
+    {
+        /* read event log entry */
+        event_log = (u32 *)(iommu->event_log.buffer +
+                                        (iommu->event_log_head *
+                                        IOMMU_EVENT_LOG_ENTRY_SIZE));
+        for ( i = 0; i < IOMMU_EVENT_LOG_U32_PER_ENTRY; i++ )
+            event[i] = event_log[i];
+        if ( ++iommu->event_log_head == iommu->event_log.entries )
+            iommu->event_log_head = 0;
+
+        /* update head pointer */
+        set_field_in_reg_u32(iommu->event_log_head, 0,
+                             IOMMU_EVENT_LOG_HEAD_MASK,
+                             IOMMU_EVENT_LOG_HEAD_SHIFT, &head);
+        writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET);
+        return 0;
+    }
+
+    return -EFAULT;
+}
+
+static void amd_iommu_msi_data_init(struct amd_iommu *iommu, int vector)
+{
+    u32 msi_data;
+    u8 bus = (iommu->bdf >> 8) & 0xff;
+    u8 dev = PCI_SLOT(iommu->bdf & 0xff);
+    u8 func = PCI_FUNC(iommu->bdf & 0xff);
+
+    msi_data = MSI_DATA_TRIGGER_EDGE |
+        MSI_DATA_LEVEL_ASSERT |
+        MSI_DATA_DELIVERY_FIXED |
+        MSI_DATA_VECTOR(vector);
+
+    pci_conf_write32(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_DATA_64, msi_data);
+}
+
+static void amd_iommu_msi_addr_init(struct amd_iommu *iommu, int phy_cpu)
+{
+
+    int bus = (iommu->bdf >> 8) & 0xff;
+    int dev = PCI_SLOT(iommu->bdf & 0xff);
+    int func = PCI_FUNC(iommu->bdf & 0xff);
+
+    u32 address_hi = 0;
+    u32 address_lo = MSI_ADDR_HEADER |
+            MSI_ADDR_DESTMODE_PHYS |
+            MSI_ADDR_REDIRECTION_CPU |
+            MSI_ADDR_DESTID_CPU(phy_cpu);
+
+    pci_conf_write32(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_ADDRESS_LO, address_lo);
+    pci_conf_write32(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_ADDRESS_HI, address_hi);
+}
+
+static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag)
+{
+    u16 control;
+    int bus = (iommu->bdf >> 8) & 0xff;
+    int dev = PCI_SLOT(iommu->bdf & 0xff);
+    int func = PCI_FUNC(iommu->bdf & 0xff);
+
+    control = pci_conf_read16(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_FLAGS);
+    control &= ~(1);
+    if ( flag )
+        control |= flag;
+    pci_conf_write16(bus, dev, func,
+        iommu->msi_cap + PCI_MSI_FLAGS, control);
+}
+
+static void iommu_msi_unmask(unsigned int vector)
+{
+    unsigned long flags;
+    struct amd_iommu *iommu = vector_to_iommu[vector];
+
+    /* FIXME: do not support mask bits at the moment */
+    if ( iommu->maskbit )
+        return;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void iommu_msi_mask(unsigned int vector)
+{
+    unsigned long flags;
+    struct amd_iommu *iommu = vector_to_iommu[vector];
+
+    /* FIXME: do not support mask bits at the moment */
+    if ( iommu->maskbit )
+        return;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static unsigned int iommu_msi_startup(unsigned int vector)
+{
+    iommu_msi_unmask(vector);
+    return 0;
+}
+
+static void iommu_msi_end(unsigned int vector)
+{
+    iommu_msi_unmask(vector);
+    ack_APIC_irq();
+}
+
+static void iommu_msi_set_affinity(unsigned int vector, cpumask_t dest)
+{
+    struct amd_iommu *iommu = vector_to_iommu[vector];
+    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
+}
+
+static struct hw_interrupt_type iommu_msi_type = {
+    .typename = "AMD_IOV_MSI",
+    .startup = iommu_msi_startup,
+    .shutdown = iommu_msi_mask,
+    .enable = iommu_msi_unmask,
+    .disable = iommu_msi_mask,
+    .ack = iommu_msi_mask,
+    .end = iommu_msi_end,
+    .set_affinity = iommu_msi_set_affinity,
+};
+
+static void parse_event_log_entry(u32 entry[])
+{
+    u16 domain_id, device_id;
+    u32 code;
+    u64 *addr;
+    char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY",
+                                         "IO_PAGE_FALT",
+                                         "DEV_TABLE_HW_ERROR",
+                                         "PAGE_TABLE_HW_ERROR",
+                                         "ILLEGAL_COMMAND_ERROR",
+                                         "COMMAND_HW_ERROR",
+                                         "IOTLB_INV_TIMEOUT",
+                                         "INVALID_DEV_REQUEST"};
+
+    code = get_field_from_reg_u32(entry[1],
+                                           IOMMU_EVENT_CODE_MASK,
+                                           IOMMU_EVENT_CODE_SHIFT);
+
+    if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST)
+        || (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) )
+    {
+        amd_iov_error("Invalid event log entry!\n");
+        return;
+    }
+
+    if ( code == IOMMU_EVENT_IO_PAGE_FALT )
+    {
+        device_id = get_field_from_reg_u32(entry[0],
+                                           IOMMU_EVENT_DEVICE_ID_MASK,
+                                           IOMMU_EVENT_DEVICE_ID_SHIFT);
+        domain_id = get_field_from_reg_u32(entry[1],
+                                           IOMMU_EVENT_DOMAIN_ID_MASK,
+                                           IOMMU_EVENT_DOMAIN_ID_SHIFT);
+        addr= (u64*) (entry + 2);
+        printk(XENLOG_ERR "AMD_IOV: "
+            "%s: domain:%d, device id:0x%x, fault address:0x%"PRIx64"\n",
+            event_str[code-1], domain_id, device_id, *addr);
+    }
+}
+
+static void amd_iommu_page_fault(int vector, void *dev_id,
+                             struct cpu_user_regs *regs)
+{
+    u32  event[4];
+    unsigned long flags;
+    int ret = 0;
+    struct amd_iommu *iommu = dev_id;
+
+    spin_lock_irqsave(&iommu->lock, flags);
+    ret = amd_iommu_read_event_log(iommu, event);
+    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    if ( ret != 0 )
+        return;
+    parse_event_log_entry(event);
+}
+
+static int set_iommu_interrupt_handler(struct amd_iommu *iommu)
+{
+    int vector, ret;
+    unsigned long flags;
+
+    vector = assign_irq_vector(AUTO_ASSIGN);
+    vector_to_iommu[vector] = iommu;
+
+    /* make irq == vector */
+    irq_vector[vector] = vector;
+    vector_irq[vector] = vector;
+
+    if ( !vector )
+    {
+        amd_iov_error("no vectors\n");
+        return 0;
+    }
+
+    irq_desc[vector].handler = &iommu_msi_type;
+    ret = request_irq(vector, amd_iommu_page_fault, 0, "dmar", iommu);
+    if ( ret )
+    {
+        amd_iov_error("can't request irq\n");
+        return 0;
+    }
+
+    spin_lock_irqsave(&iommu->lock, flags);
+
+    amd_iommu_msi_data_init (iommu, vector);
+    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
+    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
+
+    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    return vector;
+}
+
 void __init enable_iommu(struct amd_iommu *iommu)
 {
+    unsigned long flags;
+
+    set_iommu_interrupt_handler(iommu);
+
+    spin_lock_irqsave(&iommu->lock, flags);
+
     register_iommu_exclusion_range(iommu);
     set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
+    set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
     set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
-    printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus);
-}
+
+    spin_unlock_irqrestore(&iommu->lock, flags);
+
+    printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus);
+}
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/amd/iommu_map.c   Thu Apr 24 14:08:29 2008 -0600
@@ -154,8 +154,7 @@ void flush_command_buffer(struct amd_iom
         }
         else
         {
-            dprintk(XENLOG_WARNING, "AMD IOMMU: Warning:"
-                    " ComWaitInt bit did not assert!\n");
+            amd_iov_warning("Warning: ComWaitInt bit did not assert!\n");
         }
     }
 }
@@ -402,10 +401,9 @@ int amd_iommu_map_page(struct domain *d,
     pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
     if ( pte == NULL )
     {
-        dprintk(XENLOG_ERR,
-                "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
+        amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
         spin_unlock_irqrestore(&hd->mapping_lock, flags);
-        return -EIO;
+        return -EFAULT;
     }
 
     set_page_table_entry_present((u32 *)pte, maddr, iw, ir);
@@ -439,10 +437,9 @@ int amd_iommu_unmap_page(struct domain *
     pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
     if ( pte == NULL )
     {
-        dprintk(XENLOG_ERR,
-                "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
+        amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
         spin_unlock_irqrestore(&hd->mapping_lock, flags);
-        return -EIO;
+        return -EFAULT;
     }
 
     /* mark PTE as 'page not present' */
@@ -479,9 +476,8 @@ int amd_iommu_reserve_domain_unity_map(
             hd->root_table, hd->paging_mode, phys_addr >> PAGE_SHIFT);
         if ( pte == NULL )
         {
-            dprintk(XENLOG_ERR,
-                    "AMD IOMMU: Invalid IO pagetable entry "
-                    "phys_addr = %lx\n", phys_addr);
+            amd_iov_error(
+            "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
             spin_unlock_irqrestore(&hd->mapping_lock, flags);
             return -EFAULT;
         }
@@ -528,8 +524,7 @@ int amd_iommu_sync_p2m(struct domain *d)
         pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
         if ( pte == NULL )
         {
-            dprintk(XENLOG_ERR,
-                    "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
+            amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
             spin_unlock_irqrestore(&hd->mapping_lock, flags);
             return -EFAULT;
         }
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Thu Apr 24 14:08:29 
2008 -0600
@@ -29,16 +29,11 @@ struct list_head amd_iommu_head;
 struct list_head amd_iommu_head;
 long amd_iommu_poll_comp_wait = COMPLETION_WAIT_DEFAULT_POLLING_COUNT;
 static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
-int nr_amd_iommus = 0;
-
-unsigned short ivrs_bdf_entries = 0;
-struct ivrs_mappings *ivrs_mappings = NULL;
-
-/* will set if amd-iommu HW is found */
-int amd_iommu_enabled = 0;
-
-static int enable_amd_iommu = 0;
-boolean_param("enable_amd_iommu", enable_amd_iommu);
+static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES;
+int nr_amd_iommus;
+
+unsigned short ivrs_bdf_entries;
+struct ivrs_mappings *ivrs_mappings;
 
 static void deallocate_domain_page_tables(struct hvm_iommu *hd)
 {
@@ -73,25 +68,8 @@ static void __init deallocate_iommu_reso
 static void __init deallocate_iommu_resources(struct amd_iommu *iommu)
 {
     deallocate_iommu_table_struct(&iommu->dev_table);
-    deallocate_iommu_table_struct(&iommu->cmd_buffer);;
-}
-
-static void __init detect_cleanup(void)
-{
-    struct amd_iommu *iommu, *next;
-
-    list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
-    {
-        list_del(&iommu->list);
-        deallocate_iommu_resources(iommu);
-        xfree(iommu);
-    }
-
-    if ( ivrs_mappings )
-    {
-        xfree(ivrs_mappings);
-        ivrs_mappings = NULL;
-    }
+    deallocate_iommu_table_struct(&iommu->cmd_buffer);
+    deallocate_iommu_table_struct(&iommu->event_log);
 }
 
 static int __init allocate_iommu_table_struct(struct table_struct *table,
@@ -102,7 +80,7 @@ static int __init allocate_iommu_table_s
 
     if ( !table->buffer )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating %s\n", name);
+        amd_iov_error("Error allocating %s\n", name);
         return -ENOMEM;
     }
 
@@ -139,6 +117,20 @@ static int __init allocate_iommu_resourc
                                      "Command Buffer") != 0 )
         goto error_out;
 
+    /* allocate 'event log' in power of 2 increments of 4K */
+    iommu->event_log_head = 0;
+    iommu->event_log.alloc_size =
+        PAGE_SIZE << get_order_from_bytes(
+            PAGE_ALIGN(amd_iommu_event_log_entries *
+                        IOMMU_EVENT_LOG_ENTRY_SIZE));
+
+    iommu->event_log.entries =
+        iommu->event_log.alloc_size / IOMMU_EVENT_LOG_ENTRY_SIZE;
+
+    if ( allocate_iommu_table_struct(&iommu->event_log,
+                                     "Event Log") != 0 )
+        goto error_out;
+
     return 0;
 
  error_out:
@@ -153,7 +145,7 @@ int iommu_detect_callback(u8 bus, u8 dev
     iommu = (struct amd_iommu *) xmalloc(struct amd_iommu);
     if ( !iommu )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating amd_iommu\n");
+        amd_iov_error("Error allocating amd_iommu\n");
         return -ENOMEM;
     }
     memset(iommu, 0, sizeof(struct amd_iommu));
@@ -203,6 +195,7 @@ static int __init amd_iommu_init(void)
             goto error_out;
         register_iommu_dev_table_in_mmio_space(iommu);
         register_iommu_cmd_buffer_in_mmio_space(iommu);
+        register_iommu_event_log_in_mmio_space(iommu);
 
         spin_unlock_irqrestore(&iommu->lock, flags);
     }
@@ -220,18 +213,14 @@ static int __init amd_iommu_init(void)
     }
 
     if ( acpi_table_parse(ACPI_IVRS, parse_ivrs_table) != 0 )
-        dprintk(XENLOG_INFO, "AMD IOMMU: Did not find IVRS table!\n");
+        amd_iov_error("Did not find IVRS table!\n");
 
     for_each_amd_iommu ( iommu )
     {
-        spin_lock_irqsave(&iommu->lock, flags);
         /* enable IOMMU translation services */
         enable_iommu(iommu);
         nr_amd_iommus++;
-        spin_unlock_irqrestore(&iommu->lock, flags);
-    }
-
-    amd_iommu_enabled = 1;
+    }
 
     return 0;
 
@@ -262,7 +251,7 @@ struct amd_iommu *find_iommu_for_device(
     return NULL;
 }
 
-void amd_iommu_setup_domain_device(
+static void amd_iommu_setup_domain_device(
     struct domain *domain, struct amd_iommu *iommu, int bdf)
 {
     void *dte;
@@ -288,12 +277,12 @@ void amd_iommu_setup_domain_device(
         sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable;
         dev_ex = ivrs_mappings[req_id].dte_allow_exclusion;
         amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr,
-                                      req_id, sys_mgt, dev_ex,
+                                      hd->domain_id, sys_mgt, dev_ex,
                                       hd->paging_mode);
 
         invalidate_dev_table_entry(iommu, req_id);
         flush_command_buffer(iommu);
-        dprintk(XENLOG_INFO, "AMD IOMMU: Set DTE req_id:%x, "
+        amd_iov_info("Enable DTE:0x%x, "
                 "root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n",
                 req_id, root_ptr, hd->domain_id, hd->paging_mode);
 
@@ -301,9 +290,9 @@ void amd_iommu_setup_domain_device(
     }
 }
 
-void __init amd_iommu_setup_dom0_devices(void)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(dom0);
+static void amd_iommu_setup_dom0_devices(struct domain *d)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct amd_iommu *iommu;
     struct pci_dev *pdev;
     int bus, dev, func;
@@ -333,80 +322,72 @@ void __init amd_iommu_setup_dom0_devices
                     find_iommu_for_device(bus, pdev->devfn) : NULL;
 
                 if ( iommu )
-                    amd_iommu_setup_domain_device(dom0, iommu, bdf);
+                    amd_iommu_setup_domain_device(d, iommu, bdf);
             }
         }
     }
 }
 
-int amd_iommu_detect(void)
-{
-    unsigned long i;
+int amd_iov_detect(void)
+{
     int last_bus;
-    struct amd_iommu *iommu;
-
-    if ( !enable_amd_iommu )
-    {
-        printk("AMD IOMMU: Disabled\n");
-        return 0;
-    }
+    struct amd_iommu *iommu, *next;
 
     INIT_LIST_HEAD(&amd_iommu_head);
 
     if ( scan_for_iommu(iommu_detect_callback) != 0 )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error detection\n");
+        amd_iov_error("Error detection\n");
         goto error_out;
     }
 
     if ( !iommu_found() )
     {
-        printk("AMD IOMMU: Not found!\n");
-        return 0;
-    }
-    else
-    {
-        /* allocate 'ivrs mappings' table */
-        /* note: the table has entries to accomodate all IOMMUs */
-        last_bus = 0;
-        for_each_amd_iommu ( iommu )
-            if ( iommu->last_downstream_bus > last_bus )
-                last_bus = iommu->last_downstream_bus;
-
-        ivrs_bdf_entries = (last_bus + 1) *
-            IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
-        ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
-
-        if ( !ivrs_mappings )
-        {
-            dprintk(XENLOG_ERR, "AMD IOMMU:"
-                    " Error allocating IVRS DevMappings table\n");
-            goto error_out;
-        }
-        memset(ivrs_mappings, 0,
-               ivrs_bdf_entries * sizeof(struct ivrs_mappings));
-    }
+        printk("AMD_IOV: IOMMU not found!\n");
+        goto error_out;
+    }
+
+    /* allocate 'ivrs mappings' table */
+    /* note: the table has entries to accomodate all IOMMUs */
+    last_bus = 0;
+    for_each_amd_iommu ( iommu )
+        if ( iommu->last_downstream_bus > last_bus )
+            last_bus = iommu->last_downstream_bus;
+
+    ivrs_bdf_entries = (last_bus + 1) *
+        IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
+    ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
+    if ( ivrs_mappings == NULL )
+    {
+        amd_iov_error("Error allocating IVRS DevMappings table\n");
+        goto error_out;
+    }
+    memset(ivrs_mappings, 0,
+           ivrs_bdf_entries * sizeof(struct ivrs_mappings));
 
     if ( amd_iommu_init() != 0 )
     {
-        dprintk(XENLOG_ERR, "AMD IOMMU: Error initialization\n");
-        goto error_out;
-    }
-
-    if ( iommu_domain_init(dom0) != 0 )
-        goto error_out;
-
-    /* setup 1:1 page table for dom0 */
-    for ( i = 0; i < max_page; i++ )
-        amd_iommu_map_page(dom0, i, i);
-
-    amd_iommu_setup_dom0_devices();
+        amd_iov_error("Error initialization\n");
+        goto error_out;
+    }
+
     return 0;
 
  error_out:
-    detect_cleanup();
+    list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
+    {
+        list_del(&iommu->list);
+        deallocate_iommu_resources(iommu);
+        xfree(iommu);
+    }
+
+    if ( ivrs_mappings )
+    {
+        xfree(ivrs_mappings);
+        ivrs_mappings = NULL;
+    }
+
     return -ENODEV;
-
 }
 
 static int allocate_domain_resources(struct hvm_iommu *hd)
@@ -447,12 +428,10 @@ static int get_paging_mode(unsigned long
             return -ENOMEM;
     }
 
-    dprintk(XENLOG_INFO, "AMD IOMMU: paging mode = %d\n", level);
-
     return level;
 }
 
-int amd_iommu_domain_init(struct domain *domain)
+static int amd_iommu_domain_init(struct domain *domain)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
 
@@ -463,10 +442,18 @@ int amd_iommu_domain_init(struct domain 
         return -ENOMEM;
     }
 
-    if ( is_hvm_domain(domain) )
-        hd->paging_mode = IOMMU_PAGE_TABLE_LEVEL_4;
-    else
-        hd->paging_mode = get_paging_mode(max_page);
+    hd->paging_mode = is_hvm_domain(domain)?
+        IOMMU_PAGE_TABLE_LEVEL_4 : get_paging_mode(max_page);
+
+    if ( domain->domain_id == 0 )
+    {
+        unsigned long i; 
+       /* setup 1:1 page table for dom0 */
+        for ( i = 0; i < max_page; i++ )
+            amd_iommu_map_page(domain, i, i);
+
+        amd_iommu_setup_dom0_devices(domain);
+    }
 
     hd->domain_id = domain->domain_id;
 
@@ -490,7 +477,7 @@ static void amd_iommu_disable_domain_dev
         memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE);
         invalidate_dev_table_entry(iommu, req_id);
         flush_command_buffer(iommu);
-        dprintk(XENLOG_INFO , "AMD IOMMU: disable DTE 0x%x,"
+        amd_iov_info("Disable DTE:0x%x,"
                 " domain_id:%d, paging_mode:%d\n",
                 req_id,  domain_hvm_iommu(domain)->domain_id,
                 domain_hvm_iommu(domain)->paging_mode);
@@ -525,7 +512,7 @@ static int reassign_device( struct domai
 
         if ( !iommu )
         {
-            gdprintk(XENLOG_ERR , "AMD IOMMU: fail to find iommu."
+            amd_iov_error("Fail to find iommu."
                      " %x:%x.%x cannot be assigned to domain %d\n", 
                      bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
             return -ENODEV;
@@ -540,8 +527,7 @@ static int reassign_device( struct domai
         spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
 
         amd_iommu_setup_domain_device(target, iommu, bdf);
-        gdprintk(XENLOG_INFO ,
-                 "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n",
+        amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                  source->domain_id, target->domain_id);
 
@@ -550,7 +536,7 @@ static int reassign_device( struct domai
     return 0;
 }
 
-int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
+static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
 {
     int bdf = (bus << 8) | devfn;
     int req_id = ivrs_mappings[bdf].dte_requestor_id;
@@ -580,8 +566,7 @@ static void release_domain_devices(struc
     {
         pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
         pdev_flr(pdev->bus, pdev->devfn);
-        gdprintk(XENLOG_INFO ,
-                 "AMD IOMMU: release devices %x:%x.%x\n",
+        amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
                  pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         reassign_device(d, dom0, pdev->bus, pdev->devfn);
     }
@@ -637,16 +622,13 @@ static void deallocate_iommu_page_tables
     hd ->root_table = NULL;
 }
 
-void amd_iommu_domain_destroy(struct domain *d)
-{
-    if ( !amd_iommu_enabled )
-        return;
-
+static void amd_iommu_domain_destroy(struct domain *d)
+{
     deallocate_iommu_page_tables(d);
     release_domain_devices(d);
 }
 
-void amd_iommu_return_device(
+static void amd_iommu_return_device(
     struct domain *s, struct domain *t, u8 bus, u8 devfn)
 {
     pdev_flr(bus, devfn);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/iommu.c   Thu Apr 24 14:08:29 2008 -0600
@@ -18,6 +18,11 @@
 
 extern struct iommu_ops intel_iommu_ops;
 extern struct iommu_ops amd_iommu_ops;
+int intel_vtd_setup(void);
+int amd_iov_detect(void);
+
+int iommu_enabled = 1;
+boolean_param("iommu", iommu_enabled);
 
 int iommu_domain_init(struct domain *domain)
 {
@@ -134,3 +139,28 @@ void deassign_device(struct domain *d, u
 
     return hd->platform_ops->reassign_device(d, dom0, bus, devfn);
 }
+
+static int iommu_setup(void)
+{
+    int rc = -ENODEV;
+
+    if ( !iommu_enabled )
+        goto out;
+
+    switch ( boot_cpu_data.x86_vendor )
+    {
+    case X86_VENDOR_INTEL:
+        rc = intel_vtd_setup();
+        break;
+    case X86_VENDOR_AMD:
+        rc = amd_iov_detect();
+        break;
+    }
+
+    iommu_enabled = (rc == 0);
+
+ out:
+    printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
+    return rc;
+}
+__initcall(iommu_setup);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/dmar.c        Thu Apr 24 14:08:29 2008 -0600
@@ -30,8 +30,7 @@
 #include "dmar.h"
 #include "../pci_regs.h"
 
-int vtd_enabled;
-boolean_param("vtd", vtd_enabled);
+int vtd_enabled = 1;
 
 #undef PREFIX
 #define PREFIX VTDPREFIX "ACPI DMAR:"
@@ -79,14 +78,9 @@ struct acpi_drhd_unit * ioapic_to_drhd(u
 struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id)
 {
     struct acpi_drhd_unit *drhd;
-    list_for_each_entry( drhd, &acpi_drhd_units, list ) {
-        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) {
-            dprintk(XENLOG_INFO VTDPREFIX,
-                    "ioapic_to_drhd: drhd->address = %lx\n",
-                    drhd->address);
+    list_for_each_entry( drhd, &acpi_drhd_units, list )
+        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) )
             return drhd;
-        }
-    }
     return NULL;
 }
 
@@ -94,15 +88,9 @@ struct iommu * ioapic_to_iommu(unsigned 
 {
     struct acpi_drhd_unit *drhd;
 
-    list_for_each_entry( drhd, &acpi_drhd_units, list ) {
-        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) {
-            dprintk(XENLOG_INFO VTDPREFIX,
-                    "ioapic_to_iommu: drhd->address = %lx\n",
-                    drhd->address);
+    list_for_each_entry( drhd, &acpi_drhd_units, list )
+        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) )
             return drhd->iommu;
-        }
-    }
-    dprintk(XENLOG_INFO VTDPREFIX, "returning NULL\n");
     return NULL;
 }
 
@@ -150,21 +138,11 @@ struct acpi_drhd_unit * acpi_find_matche
 
         if ( acpi_pci_device_match(drhd->devices,
                                    drhd->devices_cnt, dev) )
-        {
-            dprintk(XENLOG_INFO VTDPREFIX, 
-                    "acpi_find_matched_drhd_unit: drhd->address = %lx\n",
-                    drhd->address);
             return drhd;
-        }
     }
 
     if ( include_all_drhd )
-    {
-        dprintk(XENLOG_INFO VTDPREFIX, 
-                "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n",
-                include_all_drhd->address);
         return include_all_drhd;
-    }
 
     return NULL;
 }
@@ -174,11 +152,9 @@ struct acpi_rmrr_unit * acpi_find_matche
     struct acpi_rmrr_unit *rmrr;
 
     list_for_each_entry ( rmrr, &acpi_rmrr_units, list )
-    {
         if ( acpi_pci_device_match(rmrr->devices,
                                    rmrr->devices_cnt, dev) )
             return rmrr;
-    }
 
     return NULL;
 }
@@ -199,11 +175,7 @@ struct acpi_atsr_unit * acpi_find_matche
     }
 
     if ( all_ports_atsru )
-    {
-        dprintk(XENLOG_INFO VTDPREFIX,
-                "acpi_find_matched_atsr_unit: all_ports_atsru\n");
         return all_ports_atsru;;
-    }
 
     return NULL;
 }
@@ -604,22 +576,24 @@ int acpi_dmar_init(void)
 {
     int rc;
 
-    if ( !vtd_enabled )
-        return -ENODEV;
+    rc = -ENODEV;
+    if ( !iommu_enabled )
+        goto fail;
 
     if ( (rc = vtd_hw_check()) != 0 )
-        return rc;
+        goto fail;
 
     acpi_table_parse(ACPI_DMAR, acpi_parse_dmar);
 
+    rc = -ENODEV;
     if ( list_empty(&acpi_drhd_units) )
-    {
-        dprintk(XENLOG_ERR VTDPREFIX, "No DMAR devices found\n");
-        vtd_enabled = 0;
-        return -ENODEV;
-    }
+        goto fail;
 
     printk("Intel VT-d has been enabled\n");
 
     return 0;
-}
+
+ fail:
+    vtd_enabled = 0;
+    return -ENODEV;
+}
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 24 14:08:29 2008 -0600
@@ -41,6 +41,9 @@ static int domid_bitmap_size;           
 static int domid_bitmap_size;           /* domain id bitmap size in bits */
 static unsigned long *domid_bitmap;     /* iommu domain id bitmap */
 
+static void setup_dom0_devices(struct domain *d);
+static void setup_dom0_rmrr(struct domain *d);
+
 #define DID_FIELD_WIDTH 16
 #define DID_HIGH_OFFSET 8
 static void context_set_domain_id(struct context_entry *context,
@@ -78,17 +81,12 @@ static struct intel_iommu *alloc_intel_i
     struct intel_iommu *intel;
 
     intel = xmalloc(struct intel_iommu);
-    if ( !intel )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "Allocate intel_iommu failed.\n");
+    if ( intel == NULL )
         return NULL;
-    }
     memset(intel, 0, sizeof(struct intel_iommu));
 
     spin_lock_init(&intel->qi_ctrl.qinval_lock);
     spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
-
     spin_lock_init(&intel->ir_ctrl.iremap_lock);
 
     return intel;
@@ -96,68 +94,22 @@ static struct intel_iommu *alloc_intel_i
 
 static void free_intel_iommu(struct intel_iommu *intel)
 {
-    if ( intel )
-    {
-        xfree(intel);
-        intel = NULL;
-    }
+    xfree(intel);
 }
 
 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
 {
-    if ( !iommu )
-        return NULL;
-
-    if ( !iommu->intel )
-    {
-        iommu->intel = alloc_intel_iommu();
-        if ( !iommu->intel )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "iommu_qi_ctrl: Allocate iommu->intel failed.\n");
-            return NULL;
-        }
-    }
-
-    return &(iommu->intel->qi_ctrl);
+    return iommu ? &iommu->intel->qi_ctrl : NULL;
 }
 
 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
 {
-    if ( !iommu )
-        return NULL;
-
-    if ( !iommu->intel )
-    {
-        iommu->intel = alloc_intel_iommu();
-        if ( !iommu->intel )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "iommu_ir_ctrl: Allocate iommu->intel failed.\n");
-            return NULL;
-        }
-    }
-
-    return &(iommu->intel->ir_ctrl);
+    return iommu ? &iommu->intel->ir_ctrl : NULL;
 }
 
 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
 {
-    if ( !iommu )
-        return NULL;
-
-    if ( !iommu->intel )
-    {
-        iommu->intel = alloc_intel_iommu();
-        if ( !iommu->intel )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "iommu_get_flush: Allocate iommu->intel failed.\n");
-            return NULL;
-        }
-    }
-
-    return &(iommu->intel->flush);
+    return iommu ? &iommu->intel->flush : NULL;
 }
 
 unsigned int clflush_size;
@@ -276,11 +228,7 @@ static u64 addr_to_dma_page_maddr(struct
             dma_set_pte_addr(*pte, maddr);
             vaddr = map_vtd_domain_page(maddr);
             if ( !vaddr )
-            {
-                unmap_vtd_domain_page(parent);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return 0;
-            }
+                break;
 
             /*
              * high level table always sets r/w, last level
@@ -294,14 +242,9 @@ static u64 addr_to_dma_page_maddr(struct
         {
             vaddr = map_vtd_domain_page(pte->val);
             if ( !vaddr )
-            {
-                unmap_vtd_domain_page(parent);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return 0;
-            }
+                break;
         }
 
-        unmap_vtd_domain_page(parent);
         if ( level == 2 )
         {
             pte_maddr = pte->val & PAGE_MASK_4K;
@@ -309,11 +252,13 @@ static u64 addr_to_dma_page_maddr(struct
             break;
         }
 
+        unmap_vtd_domain_page(parent);
         parent = (struct dma_pte *)vaddr;
         vaddr = NULL;
         level--;
     }
 
+    unmap_vtd_domain_page(parent);
     spin_unlock_irqrestore(&hd->mapping_lock, flags);
     return pte_maddr;
 }
@@ -688,7 +633,7 @@ void dma_pte_free_pagetable(struct domai
     struct dma_pte *page, *pte;
     int total = agaw_to_level(hd->agaw);
     int level;
-    u32 tmp;
+    u64 tmp;
     u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -709,7 +654,10 @@ void dma_pte_free_pagetable(struct domai
         {
             pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
             if ( pg_maddr == 0 )
-                return;
+            {
+                tmp += level_size(level);
+                continue;
+            }
             page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
             pte = page + address_level_offset(tmp, level);
             dma_clear_pte(*pte);
@@ -730,18 +678,11 @@ void dma_pte_free_pagetable(struct domai
     }
 }
 
-/* iommu handling */
 static int iommu_set_root_entry(struct iommu *iommu)
 {
     u32 cmd, sts;
     unsigned long flags;
-
-    if ( iommu == NULL )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "iommu_set_root_entry: iommu == NULL\n");
-        return -EINVAL;
-    }
+    s_time_t start_time;
 
     if ( iommu->root_maddr != 0 )
     {
@@ -760,11 +701,14 @@ static int iommu_set_root_entry(struct i
     dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
 
     /* Make sure hardware complete it */
+    start_time = NOW();
     for ( ; ; )
     {
         sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
         if ( sts & DMA_GSTS_RTPS )
             break;
+        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
+            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
         cpu_relax();
     }
 
@@ -777,6 +721,7 @@ static int iommu_enable_translation(stru
 {
     u32 sts;
     unsigned long flags;
+    s_time_t start_time;
 
     dprintk(XENLOG_INFO VTDPREFIX,
             "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
@@ -784,11 +729,14 @@ static int iommu_enable_translation(stru
     iommu->gcmd |= DMA_GCMD_TE;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
     /* Make sure hardware complete it */
+    start_time = NOW();
     for ( ; ; )
     {
         sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
         if ( sts & DMA_GSTS_TES )
             break;
+        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
+            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
         cpu_relax();
     }
 
@@ -802,17 +750,21 @@ int iommu_disable_translation(struct iom
 {
     u32 sts;
     unsigned long flags;
+    s_time_t start_time;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     iommu->gcmd &= ~ DMA_GCMD_TE;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
 
     /* Make sure hardware complete it */
+    start_time = NOW();
     for ( ; ; )
     {
         sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
         if ( !(sts & DMA_GSTS_TES) )
             break;
+        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
+            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
         cpu_relax();
     }
     spin_unlock_irqrestore(&iommu->register_lock, flags);
@@ -1039,69 +991,64 @@ int iommu_set_interrupt(struct iommu *io
     return vector;
 }
 
-struct iommu *iommu_alloc(void *hw_data)
-{
-    struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
+static int iommu_alloc(struct acpi_drhd_unit *drhd)
+{
     struct iommu *iommu;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
         gdprintk(XENLOG_ERR VTDPREFIX,
                  "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
-        return NULL;
+        return -ENOMEM;
     }
 
     iommu = xmalloc(struct iommu);
-    if ( !iommu )
-        return NULL;
+    if ( iommu == NULL )
+        return -ENOMEM;
     memset(iommu, 0, sizeof(struct iommu));
 
+    iommu->intel = alloc_intel_iommu();
+    if ( iommu->intel == NULL )
+    {
+        xfree(iommu);
+        return -ENOMEM;
+    }
+
     set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
-    iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
-
-    printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
-           iommu->reg, drhd->address);
-
+    iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
     nr_iommus++;
-
-    if ( !iommu->reg )
-    {
-        printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
-        goto error;
-    }
 
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
 
-    printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
-    printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
-
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
 
-    iommu->intel = alloc_intel_iommu();
-
     drhd->iommu = iommu;
-    return iommu;
- error:
-    xfree(iommu);
-    return NULL;
-}
-
-static void free_iommu(struct iommu *iommu)
-{
-    if ( !iommu )
+    return 0;
+}
+
+static void iommu_free(struct acpi_drhd_unit *drhd)
+{
+    struct iommu *iommu = drhd->iommu;
+
+    if ( iommu == NULL )
         return;
+
     if ( iommu->root_maddr != 0 )
     {
         free_pgtable_maddr(iommu->root_maddr);
         iommu->root_maddr = 0;
     }
+
     if ( iommu->reg )
         iounmap(iommu->reg);
+
     free_intel_iommu(iommu->intel);
     free_irq(iommu->vector);
     xfree(iommu);
+
+    drhd->iommu = NULL;
 }
 
 #define guestwidth_to_adjustwidth(gaw) ({       \
@@ -1111,22 +1058,21 @@ static void free_iommu(struct iommu *iom
         agaw = 64;                              \
     agaw; })
 
-int intel_iommu_domain_init(struct domain *domain)
-{
-    struct hvm_iommu *hd = domain_hvm_iommu(domain);
+static int intel_iommu_domain_init(struct domain *d)
+{
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
     struct iommu *iommu = NULL;
     int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
-    int adjust_width, agaw;
+    int i, adjust_width, agaw;
     unsigned long sagaw;
     struct acpi_drhd_unit *drhd;
 
-    if ( !vtd_enabled || list_empty(&acpi_drhd_units) )
-        return 0;
-
-    for_each_drhd_unit ( drhd )
-        iommu = drhd->iommu ? : iommu_alloc(drhd);
-
-    /* calculate AGAW */
+    INIT_LIST_HEAD(&hd->pdev_list);
+
+    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+    iommu = drhd->iommu;
+
+    /* Calculate AGAW. */
     if ( guest_width > cap_mgaw(iommu->cap) )
         guest_width = cap_mgaw(iommu->cap);
     adjust_width = guestwidth_to_adjustwidth(guest_width);
@@ -1142,6 +1088,26 @@ int intel_iommu_domain_init(struct domai
             return -ENODEV;
     }
     hd->agaw = agaw;
+
+    if ( d->domain_id == 0 )
+    {
+        /* Set up 1:1 page table for dom0. */
+        for ( i = 0; i < max_page; i++ )
+            iommu_map_page(d, i, i);
+
+        setup_dom0_devices(d);
+        setup_dom0_rmrr(d);
+
+        iommu_flush_all();
+
+        for_each_drhd_unit ( drhd )
+        {
+            iommu = drhd->iommu;
+            if ( iommu_enable_translation(iommu) )
+                return -EIO;
+        }
+    }
+
     return 0;
 }
 
@@ -1153,28 +1119,15 @@ static int domain_context_mapping_one(
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct context_entry *context, *context_entries;
     unsigned long flags;
-    int ret = 0;
     u64 maddr;
 
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
-    if ( !context )
+
+    if ( context_present(*context) )
     {
         unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "domain_context_mapping_one:context == NULL:"
-                 "bdf = %x:%x:%x\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-        return -ENOMEM;
-    }
-
-    if ( context_present(*context) )
-    {
-        unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_WARNING VTDPREFIX,
-                 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
 
@@ -1203,12 +1156,6 @@ static int domain_context_mapping_one(
     context_set_present(*context);
     iommu_flush_cache_entry(iommu, context);
 
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
-             " hd->pgd_maddr=%"PRIx64"\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-             context->hi, context->lo, hd->pgd_maddr);
-
     unmap_vtd_domain_page(context_entries);
 
     if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
@@ -1218,7 +1165,8 @@ static int domain_context_mapping_one(
     else
         iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
     spin_unlock_irqrestore(&iommu->lock, flags);
-    return ret;
+
+    return 0;
 }
 
 static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
@@ -1377,28 +1325,12 @@ static int domain_context_unmap_one(
     maddr = bus_to_context_maddr(iommu, bus);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
-    if ( !context )
+
+    if ( !context_present(*context) )
     {
         unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-        return -ENOMEM;
-    }
-
-    if ( !context_present(*context) )
-    {
-        unmap_vtd_domain_page(context_entries);
-        gdprintk(XENLOG_WARNING VTDPREFIX,
-                 "domain_context_unmap_one-%x:%x:%x- "
-                 "context NOT present:return\n",
-                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
         return 0;
     }
-
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "domain_context_unmap_one: bdf = %x:%x:%x\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
     spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
@@ -1431,24 +1363,12 @@ static int domain_context_unmap(
         sub_bus = pci_conf_read8(
             pdev->bus, PCI_SLOT(pdev->devfn),
             PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
-
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "domain_context_unmap:BRIDGE:%x:%x:%x "
-                 "sec_bus=%x sub_bus=%x\n",
-                 pdev->bus, PCI_SLOT(pdev->devfn),
-                 PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
         break;
     case DEV_TYPE_PCIe_ENDPOINT:
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
-                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         ret = domain_context_unmap_one(domain, iommu,
                                        (u8)(pdev->bus), (u8)(pdev->devfn));
         break;
     case DEV_TYPE_PCI:
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
-                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         if ( pdev->bus == 0 )
             ret = domain_context_unmap_one(
                 domain, iommu,
@@ -1502,35 +1422,29 @@ void reassign_device_ownership(
     int status;
     unsigned long flags;
 
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "reassign_device-%x:%x:%x- source = %d target = %d\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-             source->domain_id, target->domain_id);
-
     pdev_flr(bus, devfn);
 
     for_each_pdev( source, pdev )
-    {
-        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
-            continue;
-
-        drhd = acpi_find_matched_drhd_unit(pdev);
-        iommu = drhd->iommu;
-        domain_context_unmap(source, iommu, pdev);
-
-        /* Move pci device from the source domain to target domain. */
-        spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
-        spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
-        list_move(&pdev->list, &target_hd->pdev_list);
-        spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
-        spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
-
-        status = domain_context_mapping(target, iommu, pdev);
-        if ( status != 0 )
-            gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
-
-        break;
-    }
+        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
+            goto found;
+
+    return;
+
+ found:
+    drhd = acpi_find_matched_drhd_unit(pdev);
+    iommu = drhd->iommu;
+    domain_context_unmap(source, iommu, pdev);
+
+    /* Move pci device from the source domain to target domain. */
+    spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
+    spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
+    list_move(&pdev->list, &target_hd->pdev_list);
+    spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
+    spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
+
+    status = domain_context_mapping(target, iommu, pdev);
+    if ( status != 0 )
+        gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
 }
 
 void return_devices_to_dom0(struct domain *d)
@@ -1541,9 +1455,6 @@ void return_devices_to_dom0(struct domai
     while ( !list_empty(&hd->pdev_list) )
     {
         pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
-        dprintk(XENLOG_INFO VTDPREFIX,
-                "return_devices_to_dom0: bdf = %x:%x:%x\n",
-                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
         reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
     }
 
@@ -1600,7 +1511,7 @@ int intel_iommu_map_page(
         return 0;
 #endif
 
-    pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K);
+    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
     if ( pg_maddr == 0 )
         return -ENOMEM;
     page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
@@ -1643,11 +1554,11 @@ int intel_iommu_unmap_page(struct domain
 }
 
 int iommu_page_mapping(struct domain *domain, paddr_t iova,
-                       void *hpa, size_t size, int prot)
+                       paddr_t hpa, size_t size, int prot)
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    unsigned long start_pfn, end_pfn;
+    u64 start_pfn, end_pfn;
     struct dma_pte *page = NULL, *pte = NULL;
     int index;
     u64 pg_maddr;
@@ -1657,9 +1568,8 @@ int iommu_page_mapping(struct domain *do
     if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
         return -EINVAL;
     iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
-    start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
-    end_pfn = (unsigned long)
-        ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
+    start_pfn = hpa >> PAGE_SHIFT_4K;
+    end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
     index = 0;
     while ( start_pfn < end_pfn )
     {
@@ -1668,7 +1578,7 @@ int iommu_page_mapping(struct domain *do
             return -ENOMEM;
         page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
         pte = page + (start_pfn & LEVEL_MASK);
-        dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
+        dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
         iommu_flush_cache_entry(iommu, pte);
         unmap_vtd_domain_page(page);
@@ -1727,7 +1637,7 @@ static int iommu_prepare_rmrr_dev(
     /* page table init */
     size = rmrr->end_address - rmrr->base_address + 1;
     ret = iommu_page_mapping(d, rmrr->base_address,
-                             (void *)rmrr->base_address, size,
+                             rmrr->base_address, size,
                              DMA_PTE_READ|DMA_PTE_WRITE);
     if ( ret )
         return ret;
@@ -1743,37 +1653,15 @@ static int iommu_prepare_rmrr_dev(
     return ret;
 }
 
-void __init setup_dom0_devices(void)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
+static void setup_dom0_devices(struct domain *d)
+{
+    struct hvm_iommu *hd;
     struct acpi_drhd_unit *drhd;
     struct pci_dev *pdev;
     int bus, dev, func, ret;
     u32 l;
 
-#ifdef DEBUG_VTD_CONTEXT_ENTRY
-    for ( bus = 0; bus < 256; bus++ )
-    {
-        for ( dev = 0; dev < 32; dev++ )
-        { 
-            for ( func = 0; func < 8; func++ )
-            {
-                struct context_entry *context;
-                struct pci_dev device;
-
-                device.bus = bus; 
-                device.devfn = PCI_DEVFN(dev, func); 
-                drhd = acpi_find_matched_drhd_unit(&device);
-                context = device_to_context_entry(drhd->iommu,
-                                                  bus, PCI_DEVFN(dev, func));
-                if ( (context->lo != 0) || (context->hi != 0) )
-                    dprintk(XENLOG_INFO VTDPREFIX,
-                            "setup_dom0_devices-%x:%x:%x- context not 0\n",
-                            bus, dev, func);
-            }
-        }    
-    }        
-#endif
+    hd = domain_hvm_iommu(d);
 
     for ( bus = 0; bus < 256; bus++ )
     {
@@ -1792,18 +1680,13 @@ void __init setup_dom0_devices(void)
                 list_add_tail(&pdev->list, &hd->pdev_list);
 
                 drhd = acpi_find_matched_drhd_unit(pdev);
-                ret = domain_context_mapping(dom0, drhd->iommu, pdev);
+                ret = domain_context_mapping(d, drhd->iommu, pdev);
                 if ( ret != 0 )
                     gdprintk(XENLOG_ERR VTDPREFIX,
                              "domain_context_mapping failed\n");
             }
         }
     }
-
-    for_each_pdev ( dom0, pdev )
-        dprintk(XENLOG_INFO VTDPREFIX,
-                "setup_dom0_devices: bdf = %x:%x:%x\n",
-                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 }
 
 void clear_fault_bits(struct iommu *iommu)
@@ -1850,13 +1733,6 @@ static int init_vtd_hw(void)
         flush->context = flush_context_reg;
         flush->iotlb = flush_iotlb_reg;
     }
-    return 0;
-}
-
-static int init_vtd2_hw(void)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
 
     for_each_drhd_unit ( drhd )
     {
@@ -1873,52 +1749,38 @@ static int init_vtd2_hw(void)
             dprintk(XENLOG_ERR VTDPREFIX,
                     "Interrupt Remapping hardware not found\n");
     }
-    return 0;
-}
-
-static int enable_vtd_translation(void)
-{
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-
-    for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        if ( iommu_enable_translation(iommu) )
-            return -EIO;
-    }
-    return 0;
-}
-
-static void setup_dom0_rmrr(void)
+
+    return 0;
+}
+
+static void setup_dom0_rmrr(struct domain *d)
 {
     struct acpi_rmrr_unit *rmrr;
     struct pci_dev *pdev;
     int ret;
 
     for_each_rmrr_device ( rmrr, pdev )
-        ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
+        ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
         if ( ret )
             gdprintk(XENLOG_ERR VTDPREFIX,
                      "IOMMU: mapping reserved region failed\n");
     end_for_each_rmrr_device ( rmrr, pdev )
 }
 
-int iommu_setup(void)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
+int intel_vtd_setup(void)
+{
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    unsigned long i;
 
     if ( !vtd_enabled )
-        return 0;
+        return -ENODEV;
 
     spin_lock_init(&domid_bitmap_lock);
-    INIT_LIST_HEAD(&hd->pdev_list);
-
-    /* setup clflush size */
     clflush_size = get_clflush_size();
+
+    for_each_drhd_unit ( drhd )
+        if ( iommu_alloc(drhd) != 0 )
+            goto error;
 
     /* Allocate IO page directory page for the domain. */
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -1933,27 +1795,15 @@ int iommu_setup(void)
     memset(domid_bitmap, 0, domid_bitmap_size / 8);
     set_bit(0, domid_bitmap);
 
-    /* setup 1:1 page table for dom0 */
-    for ( i = 0; i < max_page; i++ )
-        iommu_map_page(dom0, i, i);
-
     init_vtd_hw();
-    setup_dom0_devices();
-    setup_dom0_rmrr();
-    iommu_flush_all();
-    enable_vtd_translation();
-    init_vtd2_hw();
 
     return 0;
 
  error:
-    printk("iommu_setup() failed\n");
     for_each_drhd_unit ( drhd )
-    {
-        iommu = drhd->iommu;
-        free_iommu(iommu);
-    }
-    return -EIO;
+        iommu_free(drhd);
+    vtd_enabled = 0;
+    return -ENOMEM;
 }
 
 /*
@@ -1979,10 +1829,6 @@ int intel_iommu_assign_device(struct dom
 
     if ( list_empty(&acpi_drhd_units) )
         return ret;
-
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "assign_device: bus = %x dev = %x func = %x\n",
-             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 
     reassign_device_ownership(dom0, d, bus, devfn);
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/utils.c       Thu Apr 24 14:08:29 2008 -0600
@@ -60,10 +60,10 @@ int vtd_hw_check(void)
             dprintk(XENLOG_WARNING VTDPREFIX,
                     "***  vendor = %x device = %x revision = %x\n",
                     vendor, device, revision);
-            vtd_enabled = 0;
             return -ENODEV;
         }
     }
+
     return 0;
 }
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 24 14:08:29 2008 -0600
@@ -114,8 +114,6 @@ void hvm_dpci_isairq_eoi(struct domain *
                 if ( --dpci->mirq[i].pending == 0 )
                 {
                     spin_unlock(&dpci->dirq_lock);
-                    gdprintk(XENLOG_INFO VTDPREFIX,
-                             "hvm_dpci_isairq_eoi:: mirq = %x\n", i);
                     stop_timer(&dpci->hvm_timer[irq_to_vector(i)]);
                     pirq_guest_eoi(d, i);
                 }
@@ -130,8 +128,6 @@ void iommu_set_pgd(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
     unsigned long p2m_table;
-    int level = agaw_to_level(hd->agaw);
-    l3_pgentry_t *l3e;
 
     p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
 
@@ -153,12 +149,12 @@ void iommu_set_pgd(struct domain *d)
                 return;
             }
             pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             unmap_domain_page(dpte);
             break;
         case VTD_PAGE_TABLE_LEVEL_4:
             pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             break;
         default:
             gdprintk(XENLOG_ERR VTDPREFIX,
@@ -173,6 +169,8 @@ void iommu_set_pgd(struct domain *d)
         int i;
         u64 pmd_maddr;
         unsigned long flags;
+        l3_pgentry_t *l3e;
+        int level = agaw_to_level(hd->agaw);
 
         spin_lock_irqsave(&hd->mapping_lock, flags);
         hd->pgd_maddr = alloc_pgtable_maddr();
@@ -236,6 +234,8 @@ void iommu_set_pgd(struct domain *d)
 
 #elif CONFIG_PAGING_LEVELS == 4
         mfn_t pgd_mfn;
+        l3_pgentry_t *l3e;
+        int level = agaw_to_level(hd->agaw);
 
         switch ( level )
         {
@@ -250,12 +250,12 @@ void iommu_set_pgd(struct domain *d)
             }
 
             pgd_mfn = _mfn(l3e_get_pfn(*l3e));
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             unmap_domain_page(l3e);
             break;
         case VTD_PAGE_TABLE_LEVEL_4:
             pgd_mfn = _mfn(p2m_table);
-            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
             break;
         default:
             gdprintk(XENLOG_ERR VTDPREFIX,
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/amd-iommu.h
--- a/xen/include/asm-x86/amd-iommu.h   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/amd-iommu.h   Thu Apr 24 14:08:29 2008 -0600
@@ -28,10 +28,9 @@
 
 #define iommu_found()           (!list_empty(&amd_iommu_head))
 
-extern int amd_iommu_enabled;
 extern struct list_head amd_iommu_head;
 
-extern int __init amd_iommu_detect(void);
+extern int __init amd_iov_detect(void);
 
 struct table_struct {
     void *buffer;
@@ -79,6 +78,9 @@ struct amd_iommu {
     int exclusion_allow_all;
     uint64_t exclusion_base;
     uint64_t exclusion_limit;
+
+    int msi_cap;
+    int maskbit;
 };
 
 struct ivrs_mappings {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/bitops.h
--- a/xen/include/asm-x86/bitops.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/bitops.h      Thu Apr 24 14:08:29 2008 -0600
@@ -331,10 +331,9 @@ extern unsigned int __find_next_zero_bit
 extern unsigned int __find_next_zero_bit(
     const unsigned long *addr, unsigned int size, unsigned int offset);
 
-/* return index of first bit set in val or BITS_PER_LONG when no bit is set */
-static inline unsigned int __scanbit(unsigned long val)
-{
-    asm ( "bsf %1,%0" : "=r" (val) : "r" (val), "0" (BITS_PER_LONG) );
+static inline unsigned int __scanbit(unsigned long val, unsigned long max)
+{
+    asm ( "bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max) );
     return (unsigned int)val;
 }
 
@@ -346,9 +345,9 @@ static inline unsigned int __scanbit(uns
  * Returns the bit-number of the first set bit, not the number of the byte
  * containing a bit.
  */
-#define find_first_bit(addr,size) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  (__scanbit(*(const unsigned long *)addr)) : \
+#define find_first_bit(addr,size)                               \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
+  (__scanbit(*(const unsigned long *)addr, size)) :             \
   __find_first_bit(addr,size)))
 
 /**
@@ -357,9 +356,9 @@ static inline unsigned int __scanbit(uns
  * @offset: The bitnumber to start searching at
  * @size: The maximum size to search
  */
-#define find_next_bit(addr,size,off) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  ((off) + (__scanbit((*(const unsigned long *)addr) >> (off)))) : \
+#define find_next_bit(addr,size,off)                                     \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                \
+  ((off) + (__scanbit((*(const unsigned long *)addr) >> (off), size))) : \
   __find_next_bit(addr,size,off)))
 
 /**
@@ -370,9 +369,9 @@ static inline unsigned int __scanbit(uns
  * Returns the bit-number of the first zero bit, not the number of the byte
  * containing a bit.
  */
-#define find_first_zero_bit(addr,size) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  (__scanbit(~*(const unsigned long *)addr)) : \
+#define find_first_zero_bit(addr,size)                          \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
+  (__scanbit(~*(const unsigned long *)addr, size)) :            \
   __find_first_zero_bit(addr,size)))
 
 /**
@@ -381,9 +380,9 @@ static inline unsigned int __scanbit(uns
  * @offset: The bitnumber to start searching at
  * @size: The maximum size to search
  */
-#define find_next_zero_bit(addr,size,off) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
-  ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off))))) : \
+#define find_next_zero_bit(addr,size,off)                                   \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                   \
+  ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off)), size))) : \
   __find_next_zero_bit(addr,size,off)))
 
 
@@ -391,8 +390,7 @@ static inline unsigned int __scanbit(uns
  * find_first_set_bit - find the first set bit in @word
  * @word: the word to search
  * 
- * Returns the bit-number of the first set bit. If no bits are set then the
- * result is undefined.
+ * Returns the bit-number of the first set bit. The input must *not* be zero.
  */
 static inline unsigned int find_first_set_bit(unsigned long word)
 {
@@ -401,26 +399,10 @@ static inline unsigned int find_first_se
 }
 
 /**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
-{
-    asm ( "bsf %1,%0"
-          :"=r" (word)
-          :"r" (~word));
-    return word;
-}
-
-/**
  * ffs - find first bit set
  * @x: the word to search
  *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
+ * This is defined the same way as the libc and compiler builtin ffs routines.
  */
 static inline int ffs(unsigned long x)
 {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h     Thu Apr 24 14:08:29 2008 -0600
@@ -139,6 +139,8 @@ int hvm_vcpu_initialise(struct vcpu *v);
 int hvm_vcpu_initialise(struct vcpu *v);
 void hvm_vcpu_destroy(struct vcpu *v);
 void hvm_vcpu_down(struct vcpu *v);
+int hvm_vcpu_cacheattr_init(struct vcpu *v);
+void hvm_vcpu_cacheattr_destroy(struct vcpu *v);
 
 void hvm_send_assist_req(struct vcpu *v);
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/hvm/support.h Thu Apr 24 14:08:29 2008 -0600
@@ -130,5 +130,7 @@ int hvm_set_cr0(unsigned long value);
 int hvm_set_cr0(unsigned long value);
 int hvm_set_cr3(unsigned long value);
 int hvm_set_cr4(unsigned long value);
+int hvm_msr_read_intercept(struct cpu_user_regs *regs);
+int hvm_msr_write_intercept(struct cpu_user_regs *regs);
 
 #endif /* __ASM_X86_HVM_SUPPORT_H__ */
diff -r 239b44eeb2d6 -r dc510776dd59 
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Thu Apr 24 14:08:29 
2008 -0600
@@ -35,6 +35,9 @@
 /* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */
 #define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES       512
 
+/* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */
+#define IOMMU_EVENT_LOG_DEFAULT_ENTRIES     512
+
 #define BITMAP_ENTRIES_PER_BYTE                8
 
 #define PTE_PER_TABLE_SHIFT            9
@@ -303,6 +306,11 @@
 #define IOMMU_EVENT_COMMAND_HW_ERROR           0x6
 #define IOMMU_EVENT_IOTLB_INV_TIMEOUT          0x7
 #define IOMMU_EVENT_INVALID_DEV_REQUEST                0x8
+
+#define IOMMU_EVENT_DOMAIN_ID_MASK           0x0000FFFF
+#define IOMMU_EVENT_DOMAIN_ID_SHIFT          0
+#define IOMMU_EVENT_DEVICE_ID_MASK           0x0000FFFF
+#define IOMMU_EVENT_DEVICE_ID_SHIFT          0
 
 /* Control Register */
 #define IOMMU_CONTROL_MMIO_OFFSET                      0x18
@@ -427,4 +435,33 @@
 #define IOMMU_IO_READ_ENABLED           1
 #define HACK_BIOS_SETTINGS                  0
 
+/* MSI interrupt */
+#define MSI_DATA_VECTOR_SHIFT       0
+#define MSI_DATA_VECTOR(v)      (((u8)v) << MSI_DATA_VECTOR_SHIFT)
+
+#define MSI_DATA_DELIVERY_SHIFT     8
+#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
+#define MSI_DATA_DELIVERY_LOWPRI    (1 << MSI_DATA_DELIVERY_SHIFT)
+
+#define MSI_DATA_LEVEL_SHIFT        14
+#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
+#define MSI_DATA_LEVEL_ASSERT   (1 << MSI_DATA_LEVEL_SHIFT)
+
+#define MSI_DATA_TRIGGER_SHIFT      15
+#define MSI_DATA_TRIGGER_EDGE   (0 << MSI_DATA_TRIGGER_SHIFT)
+#define  MSI_DATA_TRIGGER_LEVEL  (1 << MSI_DATA_TRIGGER_SHIFT)
+
+#define MSI_TARGET_CPU_SHIFT        12
+#define MSI_ADDR_HEADER         0xfee00000
+#define MSI_ADDR_DESTID_MASK        0xfff0000f
+#define MSI_ADDR_DESTID_CPU(cpu)    ((cpu) << MSI_TARGET_CPU_SHIFT)
+
+#define MSI_ADDR_DESTMODE_SHIFT     2
+#define MSI_ADDR_DESTMODE_PHYS  (0 << MSI_ADDR_DESTMODE_SHIFT)
+#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
+
+#define MSI_ADDR_REDIRECTION_SHIFT  3
+#define MSI_ADDR_REDIRECTION_CPU    (0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+
 #endif /* _ASM_X86_64_AMD_IOMMU_DEFS_H */
diff -r 239b44eeb2d6 -r dc510776dd59 
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Thu Apr 24 14:02:16 
2008 -0600
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Thu Apr 24 14:08:29 
2008 -0600
@@ -35,6 +35,19 @@
 #define DMA_32BIT_MASK  0x00000000ffffffffULL
 #define PAGE_ALIGN(addr)    (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
 
+#ifdef AMD_IOV_DEBUG
+#define amd_iov_info(fmt, args...) \
+    printk(XENLOG_INFO "AMD_IOV: " fmt, ## args)
+#define amd_iov_warning(fmt, args...) \
+    printk(XENLOG_WARNING "AMD_IOV: " fmt, ## args)
+#define amd_iov_error(fmt, args...) \
+    printk(XENLOG_ERR "AMD_IOV: %s:%d: " fmt, __FILE__ , __LINE__ , ## args)
+#else
+#define amd_iov_info(fmt, args...)
+#define amd_iov_warning(fmt, args...)
+#define amd_iov_error(fmt, args...)
+#endif
+
 typedef int (*iommu_detect_callback_ptr_t)(
     u8 bus, u8 dev, u8 func, u8 cap_ptr);
 
@@ -49,6 +62,7 @@ void __init unmap_iommu_mmio_region(stru
 void __init unmap_iommu_mmio_region(struct amd_iommu *iommu);
 void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu);
 void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu);
+void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu);
 void __init enable_iommu(struct amd_iommu *iommu);
 
 /* mapping functions */
@@ -69,11 +83,6 @@ void invalidate_dev_table_entry(struct a
 /* send cmd to iommu */
 int send_iommu_command(struct amd_iommu *iommu, u32 cmd[]);
 void flush_command_buffer(struct amd_iommu *iommu);
-
-/* iommu domain funtions */
-int amd_iommu_domain_init(struct domain *domain);
-void amd_iommu_setup_domain_device(struct domain *domain,
-    struct amd_iommu *iommu, int bdf);
 
 /* find iommu for bdf */
 struct amd_iommu *find_iommu_for_device(int bus, int devfn);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h    Thu Apr 24 14:08:29 2008 -0600
@@ -83,7 +83,16 @@ struct hvm_vcpu {
      */
     unsigned long       mmio_gva;
     unsigned long       mmio_gpfn;
+    /* Callback into x86_emulate when emulating FPU/MMX/XMM instructions. */
+    void (*fpu_exception_callback)(void *, struct cpu_user_regs *);
+    void *fpu_exception_callback_arg;
+    /* We may read up to m128 as a number of device-model transactions. */
+    paddr_t mmio_large_read_pa;
+    uint8_t mmio_large_read[16];
+    unsigned int mmio_large_read_bytes;
+    /* We may write up to m128 as a number of device-model transactions. */
+    paddr_t mmio_large_write_pa;
+    unsigned int mmio_large_write_bytes;
 };
 
 #endif /* __ASM_X86_HVM_VCPU_H__ */
-
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/mtrr.h        Thu Apr 24 14:08:29 2008 -0600
@@ -11,13 +11,6 @@
 #define MTRR_TYPE_WRBACK     6
 #define MTRR_NUM_TYPES       7
 #define MEMORY_NUM_TYPES     MTRR_NUM_TYPES
-
-#define MTRR_PHYSMASK_VALID_BIT  11
-#define MTRR_PHYSMASK_SHIFT      12
-
-#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
-#define MTRR_PHYSBASE_SHIFT      12
-#define MTRR_VCNT            8
 
 #define NORMAL_CACHE_MODE          0
 #define NO_FILL_CACHE_MODE         2
@@ -58,7 +51,6 @@ struct mtrr_state {
        u64       mtrr_cap;
        /* ranges in var MSRs are overlapped or not:0(no overlapped) */
        bool_t    overlapped;
-       bool_t    is_initialized;
 };
 
 extern void mtrr_save_fixed_ranges(void *);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/asm-x86/paging.h      Thu Apr 24 14:08:29 2008 -0600
@@ -83,12 +83,14 @@ struct shadow_paging_mode {
                                             unsigned long new,
                                             unsigned int bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);
+#ifdef __i386__
     int           (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
                                             unsigned long old_lo, 
                                             unsigned long old_hi, 
                                             unsigned long new_lo,
                                             unsigned long new_hi,
                                             struct sh_emulate_ctxt *sh_ctxt);
+#endif
     mfn_t         (*make_monitor_table    )(struct vcpu *v);
     void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
     int           (*guess_wrmap           )(struct vcpu *v, 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/public/xsm/acm.h
--- a/xen/include/public/xsm/acm.h      Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/public/xsm/acm.h      Thu Apr 24 14:08:29 2008 -0600
@@ -91,7 +91,7 @@
  * whenever the interpretation of the related
  * policy's data structure changes
  */
-#define ACM_POLICY_VERSION 3
+#define ACM_POLICY_VERSION 4
 #define ACM_CHWALL_VERSION 1
 #define ACM_STE_VERSION  1
 
@@ -131,6 +131,10 @@ typedef uint16_t domaintype_t;
 /* high-16 = version, low-16 = check magic */
 #define ACM_MAGIC  0x0001debc
 
+/* size of the SHA1 hash identifying the XML policy from which the
+   binary policy was created */
+#define ACM_SHA1_HASH_SIZE    20
+
 /* each offset in bytes from start of the struct they
  * are part of */
 
@@ -160,6 +164,7 @@ struct acm_policy_buffer {
     uint32_t secondary_policy_code;
     uint32_t secondary_buffer_offset;
     struct acm_policy_version xml_pol_version; /* add in V3 */
+    uint8_t xml_policy_hash[ACM_SHA1_HASH_SIZE]; /* added in V4 */
 };
 
 
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/xen/iommu.h   Thu Apr 24 14:08:29 2008 -0600
@@ -27,9 +27,8 @@
 #include <public/domctl.h>
 
 extern int vtd_enabled;
-extern int amd_iommu_enabled;
+extern int iommu_enabled;
 
-#define iommu_enabled ( amd_iommu_enabled || vtd_enabled )
 #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
 #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
 
@@ -72,7 +71,6 @@ struct iommu {
     struct intel_iommu *intel;
 };
 
-int iommu_setup(void);
 int iommu_domain_init(struct domain *d);
 void iommu_domain_destroy(struct domain *d);
 int device_assigned(u8 bus, u8 devfn);
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xen/serial.h
--- a/xen/include/xen/serial.h  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/xen/serial.h  Thu Apr 24 14:08:29 2008 -0600
@@ -16,12 +16,10 @@ void serial_set_rx_handler(int handle, s
 void serial_set_rx_handler(int handle, serial_rx_fn fn);
 
 /* Number of characters we buffer for a polling receiver. */
-#define SERIAL_RXBUFSZ 32
-#define MASK_SERIAL_RXBUF_IDX(_i) ((_i)&(SERIAL_RXBUFSZ-1))
+#define serial_rxbufsz 32
 
 /* Number of characters we buffer for an interrupt-driven transmitter. */
-#define SERIAL_TXBUFSZ 16384
-#define MASK_SERIAL_TXBUF_IDX(_i) ((_i)&(SERIAL_TXBUFSZ-1))
+extern unsigned int serial_txbufsz;
 
 struct uart_driver;
 
@@ -39,7 +37,7 @@ struct serial_port {
     /* Receiver callback functions (asynchronous receivers). */
     serial_rx_fn        rx_lo, rx_hi, rx;
     /* Receive data buffer (polling receivers). */
-    char                rxbuf[SERIAL_RXBUFSZ];
+    char                rxbuf[serial_rxbufsz];
     unsigned int        rxbufp, rxbufc;
     /* Serial I/O is concurrency-safe. */
     spinlock_t          rx_lock, tx_lock;
diff -r 239b44eeb2d6 -r dc510776dd59 xen/include/xsm/acm/acm_core.h
--- a/xen/include/xsm/acm/acm_core.h    Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/include/xsm/acm/acm_core.h    Thu Apr 24 14:08:29 2008 -0600
@@ -34,6 +34,7 @@ struct acm_binary_policy {
     u16 primary_policy_code;
     u16 secondary_policy_code;
     struct acm_policy_version xml_pol_version;
+    u8 xml_policy_hash[ACM_SHA1_HASH_SIZE];
 };
 
 struct chwall_binary_policy {
diff -r 239b44eeb2d6 -r dc510776dd59 xen/tools/Makefile
--- a/xen/tools/Makefile        Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/tools/Makefile        Thu Apr 24 14:08:29 2008 -0600
@@ -4,12 +4,12 @@ include $(XEN_ROOT)/Config.mk
 
 .PHONY: default
 default:
-       $(MAKE) -C figlet
+       [ -d figlet ] && $(MAKE) -C figlet
        $(MAKE) symbols
 
 .PHONY: clean
 clean:
-       $(MAKE) -C figlet clean
+       [ -d figlet ] && $(MAKE) -C figlet clean
        rm -f *.o symbols
 
 symbols: symbols.c
diff -r 239b44eeb2d6 -r dc510776dd59 xen/tools/figlet/figlet.c
--- a/xen/tools/figlet/figlet.c Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/tools/figlet/figlet.c Thu Apr 24 14:08:29 2008 -0600
@@ -1488,18 +1488,7 @@ static void myputchar(unsigned char c)
 
     putc(c, stderr);
 
-    if ( nr_chars == 0 )
-        putchar('"');
-
-    putchar('\\');
-    putchar('0' + ((c>>6)&7));
-    putchar('0' + ((c>>3)&7));
-    putchar('0' + ((c>>0)&7));
-
-    if ( c == '\n' )
-        startline = 1;
-
-    if ( ++nr_chars == 18 ) 
+    if ( nr_chars == 18 ) 
     {
         nr_chars = 0;
         putchar('"');
@@ -1507,6 +1496,17 @@ static void myputchar(unsigned char c)
         putchar('\\');
         putchar('\n');
     }
+
+    if ( nr_chars++ == 0 )
+        putchar('"');
+
+    putchar('\\');
+    putchar('0' + ((c>>6)&7));
+    putchar('0' + ((c>>3)&7));
+    putchar('0' + ((c>>0)&7));
+
+    if ( c == '\n' )
+        startline = 1;
 }
 
 void putstring(string)
diff -r 239b44eeb2d6 -r dc510776dd59 xen/xsm/acm/acm_policy.c
--- a/xen/xsm/acm/acm_policy.c  Thu Apr 24 14:02:16 2008 -0600
+++ b/xen/xsm/acm/acm_policy.c  Thu Apr 24 14:08:29 2008 -0600
@@ -156,6 +156,10 @@ _acm_update_policy(void *buf, u32 buf_si
            &pol->xml_pol_version,
            sizeof(acm_bin_pol.xml_pol_version));
 
+    memcpy(&acm_bin_pol.xml_policy_hash,
+           pol->xml_policy_hash,
+           sizeof(acm_bin_pol.xml_policy_hash));
+
     if ( acm_primary_ops->is_default_policy() &&
          acm_secondary_ops->is_default_policy() )
         require_update = 0;
@@ -257,6 +261,10 @@ acm_get_policy(XEN_GUEST_HANDLE_64(void)
     memcpy(&bin_pol->xml_pol_version,
            &acm_bin_pol.xml_pol_version,
            sizeof(struct acm_policy_version));
+
+    memcpy(&bin_pol->xml_policy_hash,
+           &acm_bin_pol.xml_policy_hash,
+           sizeof(acm_bin_pol.xml_policy_hash));
 
     ret = acm_dump_policy_reference(
                policy_buffer + be32_to_cpu(bin_pol->policy_reference_offset),

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>