WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 30 Jan 2009 06:40:10 -0800
Delivery-date: Fri, 30 Jan 2009 06:41:55 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1233115605 -32400
# Node ID 79f259a26a11cb57617982ce3bc829cdd76fff46
# Parent  4fd4dcf2f8916ab4656911a76e52fc6b1ad42c2f
# Parent  31983c30c460fb405b4fc6ab8e2ae49ada2cfec5
merge with xen-unstable.hg
---
 tools/firmware/rombios/32bitgateway.h          |   18 
 xen/arch/ia64/tools/p2m_foreign/Makefile       |   51 
 xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c  |  233 ----
 xen/include/public/elfstructs.h                |  527 ----------
 xen/include/public/libelf.h                    |  265 -----
 Config.mk                                      |   15 
 buildconfigs/mk.linux-2.6-common               |    4 
 docs/check_pkgs                                |   12 
 docs/misc/dump-core-format.txt                 |   13 
 docs/misc/vtd.txt                              |   29 
 stubdom/Makefile                               |    1 
 stubdom/README                                 |    5 
 tools/Rules.mk                                 |    5 
 tools/blktap/drivers/Makefile                  |    6 
 tools/console/Makefile                         |    4 
 tools/examples/xmexample.hvm                   |   33 
 tools/firmware/Makefile                        |    4 
 tools/firmware/Rules.mk                        |    8 
 tools/firmware/hvmloader/32bitbios_support.c   |   32 
 tools/firmware/hvmloader/Makefile              |    4 
 tools/firmware/hvmloader/acpi/Makefile         |   18 
 tools/firmware/hvmloader/acpi/build.c          |   56 -
 tools/firmware/hvmloader/acpi/dsdt.asl         |    2 
 tools/firmware/hvmloader/acpi/dsdt.c           |    8 
 tools/firmware/hvmloader/cacheattr.c           |   24 
 tools/firmware/hvmloader/config.h              |   32 
 tools/firmware/hvmloader/hvmloader.c           |  206 ++-
 tools/firmware/hvmloader/mp_tables.c           |    4 
 tools/firmware/hvmloader/smbios.c              |   29 
 tools/firmware/hvmloader/smp.c                 |    2 
 tools/firmware/hvmloader/util.c                |  185 +--
 tools/firmware/hvmloader/util.h                |   17 
 tools/firmware/rombios/32bit/32bitbios.c       |   41 
 tools/firmware/rombios/32bit/Makefile          |   15 
 tools/firmware/rombios/32bit/pmm.c             |  531 ++++++++++
 tools/firmware/rombios/32bit/rombios_compat.h  |    4 
 tools/firmware/rombios/32bit/tcgbios/Makefile  |   14 
 tools/firmware/rombios/32bit/tcgbios/tcgbios.c |   18 
 tools/firmware/rombios/32bitgateway.c          |  459 ++------
 tools/firmware/rombios/32bitprotos.h           |   63 -
 tools/firmware/rombios/Makefile                |    2 
 tools/firmware/rombios/rombios.c               |   88 +
 tools/firmware/rombios/tcgbios.c               |  168 ---
 tools/firmware/vgabios/vbe.c                   |   18 
 tools/firmware/vgabios/vbe.h                   |   64 -
 tools/firmware/vgabios/vbetables-gen.c         |   41 
 tools/firmware/vgabios/vgabios.c               |    6 
 tools/flask/libflask/Makefile                  |    1 
 tools/flask/loadpolicy/Makefile                |    6 
 tools/fs-back/Makefile                         |    4 
 tools/include/Makefile                         |    3 
 tools/include/xen-foreign/reference.size       |    2 
 tools/libaio/src/Makefile                      |    2 
 tools/libfsimage/Rules.mk                      |    4 
 tools/libfsimage/common/Makefile               |    3 
 tools/libxc/Makefile                           |    5 
 tools/libxc/xc_core.c                          |   68 +
 tools/libxc/xc_core.h                          |   10 
 tools/libxc/xc_core_ia64.c                     |   17 
 tools/libxc/xc_core_x86.c                      |   81 +
 tools/libxc/xc_core_x86.h                      |   13 
 tools/libxc/xc_dom.h                           |    2 
 tools/libxc/xc_domain.c                        |   30 
 tools/libxc/xc_elf.h                           |    2 
 tools/libxc/xc_hvm_build.c                     |  189 +--
 tools/libxc/xc_private.c                       |   10 
 tools/libxc/xc_ptrace_core.c                   |    4 
 tools/libxc/xenctrl.h                          |    6 
 tools/misc/Makefile                            |    4 
 tools/misc/xenpm.c                             |  894 ++++++++++-------
 tools/pygrub/Makefile                          |    4 
 tools/python/Makefile                          |    3 
 tools/python/xen/lowlevel/xc/xc.c              |   10 
 tools/python/xen/util/oshelp.py                |    2 
 tools/python/xen/xend/XendConfig.py            |   12 
 tools/python/xen/xend/XendDPCI.py              |    7 
 tools/python/xen/xend/XendDomain.py            |    4 
 tools/python/xen/xend/XendDomainInfo.py        |   23 
 tools/python/xen/xend/balloon.py               |    4 
 tools/python/xen/xend/image.py                 |   13 
 tools/python/xen/xend/server/pciif.py          |   20 
 tools/python/xen/xend/server/relocate.py       |    2 
 tools/python/xen/xm/create.dtd                 |    7 
 tools/python/xen/xm/create.py                  |   54 -
 tools/python/xen/xm/main.py                    |   34 
 tools/python/xen/xm/xenapi_create.py           |   12 
 tools/tests/blowfish.mk                        |    8 
 tools/vnet/libxutil/Makefile                   |    7 
 tools/vtpm/Makefile                            |    2 
 tools/vtpm/Rules.mk                            |    6 
 tools/vtpm_manager/Rules.mk                    |    6 
 tools/xcutils/Makefile                         |    7 
 tools/xcutils/readnotes.c                      |    2 
 tools/xenmon/Makefile                          |    4 
 tools/xenpmd/Makefile                          |    4 
 tools/xenstat/libxenstat/Makefile              |    4 
 tools/xenstat/xentop/Makefile                  |    4 
 tools/xenstore/Makefile                        |    7 
 tools/xentrace/Makefile                        |    5 
 xen/Rules.mk                                   |   39 
 xen/arch/ia64/Makefile                         |    6 
 xen/arch/ia64/Rules.mk                         |   17 
 xen/arch/ia64/xen/domain.c                     |    2 
 xen/arch/ia64/xen/irq.c                        |    2 
 xen/arch/ia64/xen/machine_kexec.c              |    1 
 xen/arch/ia64/xen/mm.c                         |   99 -
 xen/arch/ia64/xen/xensetup.c                   |    5 
 xen/arch/x86/Makefile                          |    7 
 xen/arch/x86/Rules.mk                          |   16 
 xen/arch/x86/acpi/cpu_idle.c                   |    9 
 xen/arch/x86/acpi/power.c                      |    1 
 xen/arch/x86/apic.c                            |    4 
 xen/arch/x86/boot/Makefile                     |    3 
 xen/arch/x86/boot/mkelf32.c                    |    2 
 xen/arch/x86/bzimage.c                         |  242 ++++
 xen/arch/x86/cpu/common.c                      |   37 
 xen/arch/x86/cpu/mcheck/mce_intel.c            |   43 
 xen/arch/x86/domain.c                          |   13 
 xen/arch/x86/domain_build.c                    |  157 ++-
 xen/arch/x86/hvm/hvm.c                         |   15 
 xen/arch/x86/hvm/mtrr.c                        |   20 
 xen/arch/x86/hvm/vmsi.c                        |    2 
 xen/arch/x86/hvm/vmx/vmcs.c                    |   34 
 xen/arch/x86/io_apic.c                         |   14 
 xen/arch/x86/irq.c                             |   22 
 xen/arch/x86/machine_kexec.c                   |    3 
 xen/arch/x86/microcode.c                       |   58 -
 xen/arch/x86/microcode_amd.c                   |  265 ++---
 xen/arch/x86/microcode_intel.c                 |   18 
 xen/arch/x86/mm.c                              |  527 +++++-----
 xen/arch/x86/mm/Makefile                       |    2 
 xen/arch/x86/mm/hap/Makefile                   |    2 
 xen/arch/x86/mm/hap/hap.c                      |    2 
 xen/arch/x86/mm/hap/p2m-ept.c                  |   61 -
 xen/arch/x86/mm/p2m.c                          |   61 -
 xen/arch/x86/mm/shadow/Makefile                |    2 
 xen/arch/x86/mm/shadow/common.c                |   15 
 xen/arch/x86/mm/shadow/multi.c                 |    7 
 xen/arch/x86/mm/shadow/private.h               |   15 
 xen/arch/x86/msi.c                             |   15 
 xen/arch/x86/nmi.c                             |    2 
 xen/arch/x86/oprofile/nmi_int.c                |   42 
 xen/arch/x86/oprofile/op_model_p4.c            |    4 
 xen/arch/x86/physdev.c                         |   20 
 xen/arch/x86/setup.c                           |  114 +-
 xen/arch/x86/smpboot.c                         |   12 
 xen/arch/x86/tboot.c                           |   12 
 xen/arch/x86/time.c                            |    7 
 xen/arch/x86/traps.c                           |   28 
 xen/arch/x86/x86_32/machine_kexec.c            |    3 
 xen/arch/x86/x86_32/mm.c                       |   15 
 xen/arch/x86/x86_64/Makefile                   |   13 
 xen/arch/x86/x86_64/compat/mm.c                |   14 
 xen/arch/x86/x86_64/machine_kexec.c            |    9 
 xen/arch/x86/x86_64/mm.c                       |  126 +-
 xen/common/Makefile                            |   10 
 xen/common/compat/Makefile                     |    4 
 xen/common/compat/memory.c                     |   69 -
 xen/common/inflate.c                           | 1303 +++++++++++++++++++++++++
 xen/common/kexec.c                             |    1 
 xen/common/keyhandler.c                        |    4 
 xen/common/libelf/libelf-dominfo.c             |   15 
 xen/common/libelf/libelf-private.h             |    4 
 xen/common/memory.c                            |   79 -
 xen/common/page_alloc.c                        |  133 +-
 xen/common/xenoprof.c                          |   63 -
 xen/drivers/acpi/pmstat.c                      |   10 
 xen/drivers/acpi/reboot.c                      |    4 
 xen/drivers/cpufreq/utility.c                  |   30 
 xen/drivers/passthrough/amd/iommu_init.c       |   36 
 xen/drivers/passthrough/amd/iommu_intr.c       |   20 
 xen/drivers/passthrough/amd/iommu_map.c        |  158 +--
 xen/drivers/passthrough/amd/pci_amd_iommu.c    |  143 +-
 xen/drivers/passthrough/io.c                   |  123 +-
 xen/drivers/passthrough/iommu.c                |    1 
 xen/drivers/passthrough/vtd/dmar.c             |   18 
 xen/drivers/passthrough/vtd/iommu.c            |   17 
 xen/drivers/passthrough/vtd/iommu.h            |    3 
 xen/drivers/video/vga.c                        |    2 
 xen/include/Makefile                           |    2 
 xen/include/asm-ia64/domain.h                  |    6 
 xen/include/asm-x86/config.h                   |    8 
 xen/include/asm-x86/domain.h                   |    7 
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h  |   38 
 xen/include/asm-x86/hvm/vmx/vmcs.h             |    9 
 xen/include/asm-x86/hvm/vmx/vmx.h              |    3 
 xen/include/asm-x86/irq.h                      |    9 
 xen/include/asm-x86/microcode.h                |   49 
 xen/include/asm-x86/mm.h                       |  105 --
 xen/include/asm-x86/mtrr.h                     |    8 
 xen/include/asm-x86/paging.h                   |    7 
 xen/include/asm-x86/processor.h                |    3 
 xen/include/asm-x86/smp.h                      |    1 
 xen/include/asm-x86/time.h                     |    1 
 xen/include/public/domctl.h                    |    1 
 xen/include/public/elfnote.h                   |   13 
 xen/include/public/hvm/hvm_info_table.h        |   28 
 xen/include/public/memory.h                    |   43 
 xen/include/public/xen.h                       |    4 
 xen/include/xen/elf.h                          |    2 
 xen/include/xen/elfstructs.h                   |  527 ++++++++++
 xen/include/xen/hvm/iommu.h                    |    2 
 xen/include/xen/hvm/irq.h                      |   11 
 xen/include/xen/iommu.h                        |    1 
 xen/include/xen/libelf.h                       |  271 +++++
 xen/include/xen/mm.h                           |    4 
 xen/include/xen/sched.h                        |    6 
 xen/include/xen/xenoprof.h                     |    7 
 xen/include/xlat.lst                           |    2 
 xen/include/xsm/xsm.h                          |   12 
 xen/xsm/dummy.c                                |   11 
 xen/xsm/flask/hooks.c                          |   21 
 212 files changed, 6397 insertions(+), 4382 deletions(-)

diff -r 4fd4dcf2f891 -r 79f259a26a11 Config.mk
--- a/Config.mk Wed Jan 28 12:22:58 2009 +0900
+++ b/Config.mk Wed Jan 28 13:06:45 2009 +0900
@@ -1,7 +1,7 @@
 # -*- mode: Makefile; -*-
 
 # A debug build of Xen and tools?
-debug ?= n
+debug ?= y ## TEMPORARILY ENABLED
 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
                          -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
@@ -38,6 +38,15 @@ cc-option = $(shell if test -z "`$(1) $(
 cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \
               /dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;)
 
+# cc-option-add: Add an option to compilation flags, but only if supported.
+# Usage: $(call cc-option-add CFLAGS,CC,-march=winchip-c6)
+cc-option-add = $(eval $(call cc-option-add-closure,$(1),$(2),$(3)))
+define cc-option-add-closure
+    ifneq ($$(call cc-option,$$($(2)),$(3),n),n)
+        $(1) += $(3)
+    endif
+endef
+
 # cc-ver: Check compiler is at least specified version. Return boolean 'y'/'n'.
 # Usage: ifeq ($(call cc-ver,$(CC),0x030400),y)
 cc-ver = $(shell if [ $$((`$(1) -dumpversion | awk -F. \
@@ -84,8 +93,8 @@ CFLAGS += -Wall -Wstrict-prototypes
 # result of any casted expression causes a warning.
 CFLAGS += -Wno-unused-value
 
-HOSTCFLAGS += $(call cc-option,$(HOSTCC),-Wdeclaration-after-statement,)
-CFLAGS     += $(call cc-option,$(CC),-Wdeclaration-after-statement,)
+$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement)
+$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)
 
 LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i)) 
 CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i))
diff -r 4fd4dcf2f891 -r 79f259a26a11 buildconfigs/mk.linux-2.6-common
--- a/buildconfigs/mk.linux-2.6-common  Wed Jan 28 12:22:58 2009 +0900
+++ b/buildconfigs/mk.linux-2.6-common  Wed Jan 28 13:06:45 2009 +0900
@@ -100,10 +100,10 @@ endif
 endif
        $(__NONINT_CONFIG) $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) 
oldconfig O=$$(/bin/pwd)/$(LINUX_DIR)
        @set -e ; if [ ! -f $(LINUX_DIR)/Makefile ] ; then \
-           echo "***********************************"; \
+           echo "==================================="; \
            echo "oldconfig did not create a Makefile"; \
            echo "Generating $(LINUX_DIR)/Makefile   "; \
-           echo "***********************************"; \
+           echo "==================================="; \
            ( echo "# Automatically generated: don't edit"; \
              echo ""; \
              echo "VERSION = 2"; \
diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/check_pkgs
--- a/docs/check_pkgs   Wed Jan 28 12:22:58 2009 +0900
+++ b/docs/check_pkgs   Wed Jan 28 13:06:45 2009 +0900
@@ -2,12 +2,12 @@ silent_which ()
 silent_which ()
 {
         which $1 1>/dev/null 2>/dev/null || {
-                echo "*************************************************"
-                echo "*************************************************"
-                echo "* WARNING: Package '$1' is required"
-                echo "*          to build Xen documentation"
-                echo "*************************************************"
-                echo "*************************************************"
+                echo "================================================="
+                echo "================================================="
+                echo "= WARNING: Package '$1' is required"
+                echo "=          to build Xen documentation"
+                echo "================================================="
+                echo "================================================="
         }
         which $1 1>/dev/null 2>/dev/null
 }
diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/misc/dump-core-format.txt
--- a/docs/misc/dump-core-format.txt    Wed Jan 28 12:22:58 2009 +0900
+++ b/docs/misc/dump-core-format.txt    Wed Jan 28 13:06:45 2009 +0900
@@ -30,8 +30,13 @@ The elf header members are set as follow
         e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
         e_type = ET_CORE = 4
 ELFCLASS64 is always used independent of architecture.
-e_ident[EI_DATA] and e_flags are set according to the dumping system's
-architecture. Other members are set as usual.
+e_ident[EI_DATA] is set as follows
+  For x86 PV domain case, it is set according to the guest configuration
+  (i.e. if guest is 32bit it is set to EM_386 even when the dom0 is 64 bit.)
+  For other domain case (x86 HVM domain case and ia64 domain case),
+  it is set according to the dumping system's architecture.
+e_flags is set according to the dumping system's architecture.
+Other members are set as usual.
 
 Sections
 --------
@@ -241,3 +246,7 @@ Currently only (major, minor) = (0, 1) i
   The format version isn't bumped because analysis tools can distinguish it.
 - .xen_ia64_mapped_regs section was made only for ia64 PV domain.
   In case of IA64 HVM domain, this section doesn't exist.
+- elf header e_ident[EI_DATA]
+  On x86 PV domain case, it is set according to the guest configuration.
+  I.e. 32-on-64 case, the file will be set EM_386 instead of EM_X86_64.
+  This is the same as 32-on-32 case, so there is no impact on analysis tools.
diff -r 4fd4dcf2f891 -r 79f259a26a11 docs/misc/vtd.txt
--- a/docs/misc/vtd.txt Wed Jan 28 12:22:58 2009 +0900
+++ b/docs/misc/vtd.txt Wed Jan 28 13:06:45 2009 +0900
@@ -38,6 +38,30 @@ Add "msi=1" option in kernel line of hos
 Add "msi=1" option in kernel line of host grub.
 
 
+MSI-INTx translation for passthrough devices in HVM
+---------------------------------------------------
+
+If the assigned device uses a physical IRQ that is shared by more than
+one device among multiple domains, there may be significant impact on
+device performance. Unfortunately, this is quite a common case if the
+IO-APIC (INTx) IRQ is used. MSI can avoid this issue, but was only
+available if the guest enables it.
+
+With MSI-INTx translation turned on, Xen enables device MSI if it's
+available, regardless of whether the guest uses INTx or MSI. If the
+guest uses INTx IRQ, Xen will inject a translated INTx IRQ to guest's
+virtual ioapic whenever an MSI message is received. This reduces the
+interrupt sharing of the system. If the guest OS enables MSI or MSI-X,
+the translation is automatically turned off.
+
+To enable or disable MSI-INTx translation globally, add "pci_msitranslate"
+in the config file:
+       pci_msitranslate = 1         (default is 1)
+
+To override for a specific device:
+       pci = [ '01:00.0,msitranslate=0', '03:00.0' ]
+
+
 Caveat on Conventional PCI Device Passthrough
 ---------------------------------------------
 
@@ -79,6 +103,11 @@ 2 virtual PCI slots (6~7) are reserved i
  3. Attach a PCI device to the guest by the physical BDF and desired virtual 
slot(optional). Following command would insert the physical device into guest's 
virtual slot 7
 
        [root@vt-vtd ~]# xm pci-attach HVMDomainVtd 0:2:0.0 7
+
+    To specify options for the device, use -o or --options=. Following command 
would disable MSI-INTx translation for the device
+
+       [root@vt-vtd ~]# xm pci-attach -o msitranslate=0 0:2:0.0 7
+
 
 VTd hotplug usage model:
 ------------------------
diff -r 4fd4dcf2f891 -r 79f259a26a11 stubdom/Makefile
--- a/stubdom/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ b/stubdom/Makefile  Wed Jan 28 13:06:45 2009 +0900
@@ -194,6 +194,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin
           ln -sf $(addprefix ../../,$(wildcard 
$(XEN_ROOT)/xen/include/public/*.h)) include/xen && \
           ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/public/,arch-ia64 
arch-x86 hvm io xsm) include/xen && \
           ( [ -h include/xen/sys ] || ln -sf 
../../$(XEN_ROOT)/tools/include/xen-sys/MiniOS include/xen/sys ) && \
+          ( [ -h include/xen/libelf ] || ln -sf 
../../$(XEN_ROOT)/tools/include/xen/libelf include/xen/libelf ) && \
          mkdir -p include/xen-foreign && \
          ln -sf $(addprefix ../../,$(wildcard 
$(XEN_ROOT)/tools/include/xen-foreign/*)) include/xen-foreign/ && \
          $(MAKE) -C include/xen-foreign/ && \
diff -r 4fd4dcf2f891 -r 79f259a26a11 stubdom/README
--- a/stubdom/README    Wed Jan 28 12:22:58 2009 +0900
+++ b/stubdom/README    Wed Jan 28 13:06:45 2009 +0900
@@ -55,6 +55,11 @@ sdl = 0
   - In hvmconfig-dm, set an sdl vfb:
 
 vfb = [ 'type=sdl' ]
+
+    by default qemu will use sdl together with opengl for rendering, if
+    you do not want qemu to use opengl then also pass opengl=0:
+
+vfb = [ 'type=sdl, opengl=0' ]
 
 * Using a VNC server in the stub domain
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/Rules.mk
--- a/tools/Rules.mk    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/Rules.mk    Wed Jan 28 13:06:45 2009 +0900
@@ -29,6 +29,10 @@ X11_LDPATH = -L/usr/X11R6/$(LIBLEAFDIR)
 
 CFLAGS += -D__XEN_TOOLS__
 
+# Get gcc to generate the dependencies for us.
+CFLAGS += -MMD -MF .$(@F).d
+DEPS = .*.d
+
 # Enable implicit LFS support *and* explicit LFS names.
 CFLAGS  += $(shell getconf LFS_CFLAGS)
 CFLAGS  += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
@@ -59,4 +63,3 @@ subdirs-all subdirs-clean subdirs-instal
 
 subdir-all-% subdir-clean-% subdir-install-%: .phony
        $(MAKE) -C $* $(patsubst subdir-%-$*,%,$@)
-
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/blktap/drivers/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -13,16 +13,12 @@ CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -I $(LIBAIO_DIR)
 CFLAGS   += -D_GNU_SOURCE
 
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS      = .*.d
-
 ifeq ($(shell . ./check_gcrypt),"yes")
 CFLAGS += -DUSE_GCRYPT
 CRYPT_LIB := -lgcrypt
 else
 CRYPT_LIB := -lcrypto
-$(warning *** libgcrypt not installed: falling back to libcrypto ***)
+$(warning === libgcrypt not installed: falling back to libcrypto ===)
 endif
 
 LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib 
-lblktap
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/console/Makefile
--- a/tools/console/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/console/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -16,7 +16,7 @@ all: $(BIN)
 
 .PHONY: clean
 clean:
-       $(RM) *.a *.so *.o *.rpm $(BIN)
+       $(RM) *.a *.so *.o *.rpm $(BIN) $(DEPS)
        $(RM) client/*.o daemon/*.o
 
 xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
@@ -33,3 +33,5 @@ install: $(BIN)
        $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(SBINDIR)
        $(INSTALL_DIR) $(DESTDIR)$(PRIVATE_BINDIR)
        $(INSTALL_PROG) xenconsole $(DESTDIR)$(PRIVATE_BINDIR)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/examples/xmexample.hvm      Wed Jan 28 13:06:45 2009 +0900
@@ -288,6 +288,39 @@ serial='pty'
 #  'x' -> we don't care (do not check)
 #  's' -> the bit must be the same as on the host that started this VM
 
+#-----------------------------------------------------------------------------
+#   Configure passthrough PCI{,-X,e} devices:
+#
+#   pci=[ '[SSSS:]BB:DD.F[,option1[,option2[...]]]', ... ]
+#
+#   [SSSS]:BB:DD.F  "bus segment:bus:device.function"(1) of the device to
+#                   be assigned, bus segment is optional. All fields are
+#                   in hexadecimal and no field should be longer than that
+#                   as shown in the pattern. Successful assignment may need
+#                   certain hardware support and additional configurations
+#                   (e.g. VT-d, see docs/misc/vtd.txt for more details).
+#
+#       (1) bus segment is sometimes also referred to as the PCI "domain",
+#           not to be confused with Xen domain.
+#
+#
+#   optionN         per-device options in "key=val" format. Current
+#                   available options are:
+#                   - msitranslate=0|1
+#                      per-device overriden of pci_msitranslate, see below
+#
+#pci=[ '07:00.0', '07:00.1' ]
+
+#   MSI-INTx translation for MSI capable devices:
+#
+#   If it's set, Xen will enable MSI for the device that supports it even
+# if the guest don't use MSI. In the case, an IO-APIC type interrupt will
+# be injected to the guest every time a corresponding MSI message is
+# received.
+#   If the guest enables MSI or MSI-X, the translation is automatically
+# turned off.
+# 
+#pci_msitranslate=1
 
 #-----------------------------------------------------------------------------
 #   Configure PVSCSI devices:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/Makefile
--- a/tools/firmware/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -15,10 +15,10 @@ SUBDIRS += hvmloader
 .PHONY: all
 all:
        @set -e; if [ $$((`( bcc -v 2>&1 | grep version || echo 0.0.0 ) | cut 
-d' ' -f 3 | awk -F. '{ printf "0x%02x%02x%02x", $$1, $$2, $$3}'`)) -lt 
$$((0x00100e)) ] ; then \
-       echo "***********************************************************"; \
+       echo "==========================================================="; \
        echo "Require dev86 package version >= 0.16.14 to build firmware!"; \
        echo "(visit http://www.cix.co.uk/~mayday for more information)"; \
-       echo "***********************************************************"; \
+       echo "==========================================================="; \
        else \
        $(MAKE) subdirs-$@; \
        fi
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/Rules.mk
--- a/tools/firmware/Rules.mk   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/Rules.mk   Wed Jan 28 13:06:45 2009 +0900
@@ -2,7 +2,7 @@ override XEN_TARGET_ARCH = x86_32
 override XEN_TARGET_ARCH = x86_32
 
 # User-supplied CFLAGS are not useful here.
-CFLAGS :=
+CFLAGS =
 
 include $(XEN_ROOT)/tools/Rules.mk
 
@@ -13,9 +13,9 @@ CFLAGS += -Werror
 CFLAGS += -Werror
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
 
 # Extra CFLAGS suitable for an embedded type of environment.
 CFLAGS += -fno-builtin -msoft-float
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/hvmloader/32bitbios_support.c
--- a/tools/firmware/hvmloader/32bitbios_support.c      Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/hvmloader/32bitbios_support.c      Wed Jan 28 13:06:45 
2009 +0900
@@ -32,15 +32,13 @@
 
 #include "../rombios/32bit/32bitbios_flat.h"
 
-static void relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
+static uint32_t relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
 {
     Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfarray;
     Elf32_Shdr *shdr = (Elf32_Shdr *)&elfarray[ehdr->e_shoff];
-    char *secstrings = &elfarray[shdr[ehdr->e_shstrndx].sh_offset];
-    char *jump_table;
     uint32_t reloc_off, reloc_size;
     char *highbiosarea;
-    int i, jump_sec_idx = 0;
+    int i;
 
     /*
      * Step 1. General elf cleanup, and compute total relocation size.
@@ -50,13 +48,6 @@ static void relocate_32bitbios(char *elf
     {
         /* By default all section data points into elf image data array. */
         shdr[i].sh_addr = (Elf32_Addr)&elfarray[shdr[i].sh_offset];
-
-        if ( !strcmp(".biosjumptable", secstrings + shdr[i].sh_name) )
-        {
-            /* We do not relocate the BIOS jump table to high memory. */
-            shdr[i].sh_flags &= ~SHF_ALLOC;
-            jump_sec_idx = i;
-        }
 
         /* Fix up a corner case of address alignment. */
         if ( shdr[i].sh_addralign == 0 )
@@ -76,7 +67,7 @@ static void relocate_32bitbios(char *elf
      */
     reloc_size = reloc_off;
     printf("%d bytes of ROMBIOS high-memory extensions:\n", reloc_size);
-    highbiosarea = (char *)(long)e820_malloc(reloc_size, 0);
+    highbiosarea = mem_alloc(reloc_size, 0);
     BUG_ON(highbiosarea == NULL);
     printf("  Relocating to 0x%x-0x%x ... ",
            (uint32_t)&highbiosarea[0],
@@ -148,21 +139,12 @@ static void relocate_32bitbios(char *elf
         }
     }
 
-    /* Step 5. Find the ROMBIOS jump-table stub and copy in the real table. */
-    for ( jump_table = (char *)ROMBIOS_BEGIN;
-          jump_table != (char *)ROMBIOS_END;
-          jump_table++ )
-        if ( !strncmp(jump_table, "___JMPT", 7) )
-            break;
-    BUG_ON(jump_table == NULL);
-    BUG_ON(jump_sec_idx == 0);
-    memcpy(jump_table, (char *)shdr[jump_sec_idx].sh_addr,
-           shdr[jump_sec_idx].sh_size);
+    printf("done\n");
 
-    printf("done\n");
+    return (uint32_t)highbiosarea;
 }
 
-void highbios_setup(void)
+uint32_t highbios_setup(void)
 {
-    relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
+    return relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
 }
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/Makefile
--- a/tools/firmware/hvmloader/Makefile Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/Makefile Wed Jan 28 13:06:45 2009 +0900
@@ -58,4 +58,6 @@ roms.h: ../rombios/BIOS-bochs-latest ../
 .PHONY: clean
 clean: subdirs-clean
        rm -f roms.h acpi.h
-       rm -f hvmloader hvmloader.tmp *.o
+       rm -f hvmloader hvmloader.tmp *.o $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/Makefile
--- a/tools/firmware/hvmloader/acpi/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -22,10 +22,6 @@ H_SRC = $(wildcard *.h)
 H_SRC = $(wildcard *.h)
 OBJS  = $(patsubst %.c,%.o,$(C_SRC))
 
-IASL_VER = acpica-unix-20080729
-#IASL_URL = http://acpica.org/download/$(IASL_VER).tar.gz
-IASL_URL = $(XEN_EXTFILES_URL)/$(IASL_VER).tar.gz
-
 CFLAGS += -I. -I.. $(CFLAGS_include)
 
 vpath iasl $(PATH)
@@ -46,15 +42,11 @@ dsdt.c: dsdt.asl
 
 iasl:
        @echo
-       @echo "ACPI ASL compiler(iasl) is needed"
-       @echo "Download Intel ACPI CA"
-       @echo "If wget failed, please download and compile manually from"
+       @echo "ACPI ASL compiler (iasl) is needed"
+       @echo "Download and install Intel ACPI CA from"
        @echo "http://acpica.org/downloads/";
        @echo 
-       wget $(IASL_URL)
-       tar xzf $(IASL_VER).tar.gz
-       make -C $(IASL_VER)/compiler
-       $(INSTALL_PROG) $(IASL_VER)/compiler/iasl $(DESTDIR)$(BINDIR)/iasl
+       @exit 1
 
 acpi.a: $(OBJS)
        $(AR) rc $@ $(OBJS)
@@ -63,6 +55,8 @@ acpi.a: $(OBJS)
        $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
 
 clean:
-       rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz
+       rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz $(DEPS)
 
 install: all
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/build.c
--- a/tools/firmware/hvmloader/acpi/build.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/build.c     Wed Jan 28 13:06:45 2009 +0900
@@ -48,48 +48,9 @@ static void set_checksum(
     p[checksum_offset] = -sum;
 }
 
-static int uart_exists(uint16_t uart_base)
-{
-    uint16_t ier = uart_base + 1;
-    uint8_t a, b, c;
-
-    a = inb(ier);
-    outb(ier, 0);
-    b = inb(ier);
-    outb(ier, 0xf);
-    c = inb(ier);
-    outb(ier, a);
-
-    return ((b == 0) && (c == 0xf));
-}
-
-static int hpet_exists(unsigned long hpet_base)
-{
-    uint32_t hpet_id = *(uint32_t *)hpet_base;
-    return ((hpet_id >> 16) == 0x8086);
-}
-
 static uint8_t battery_port_exists(void)
 {
     return (inb(0x88) == 0x1F);
-}
-
-static int construct_bios_info_table(uint8_t *buf)
-{
-    struct bios_info *bios_info = (struct bios_info *)buf;
-
-    memset(bios_info, 0, sizeof(*bios_info));
-
-    bios_info->com1_present = uart_exists(0x3f8);
-    bios_info->com2_present = uart_exists(0x2f8);
-
-    bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
-
-    bios_info->pci_min = PCI_MEMBASE;
-    bios_info->pci_len = PCI_MEMSIZE;
-    bios_info->xen_pfiob = 0xdead;
-
-    return align16(sizeof(*bios_info));
 }
 
 static int construct_madt(struct acpi_20_madt *madt)
@@ -150,7 +111,7 @@ static int construct_madt(struct acpi_20
     offset += sizeof(*io_apic);
 
     lapic = (struct acpi_20_madt_lapic *)(io_apic + 1);
-    for ( i = 0; i < get_vcpu_nr(); i++ )
+    for ( i = 0; i < hvm_info->nr_vcpus; i++ )
     {
         memset(lapic, 0, sizeof(*lapic));
         lapic->type    = ACPI_PROCESSOR_LOCAL_APIC;
@@ -199,9 +160,10 @@ static int construct_secondary_tables(ui
     struct acpi_20_tcpa *tcpa;
     static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001};
     uint16_t *tis_hdr;
+    void *lasa;
 
     /* MADT. */
-    if ( (get_vcpu_nr() > 1) || get_apic_mode() )
+    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
     {
         madt = (struct acpi_20_madt *)&buf[offset];
         offset += construct_madt(madt);
@@ -246,11 +208,11 @@ static int construct_secondary_tables(ui
         tcpa->header.oem_revision = ACPI_OEM_REVISION;
         tcpa->header.creator_id   = ACPI_CREATOR_ID;
         tcpa->header.creator_revision = ACPI_CREATOR_REVISION;
-        tcpa->lasa = e820_malloc(ACPI_2_0_TCPA_LAML_SIZE, 0);
-        if ( tcpa->lasa )
-        {
+        if ( (lasa = mem_alloc(ACPI_2_0_TCPA_LAML_SIZE, 0)) != NULL )
+        {
+            tcpa->lasa = virt_to_phys(lasa);
             tcpa->laml = ACPI_2_0_TCPA_LAML_SIZE;
-            memset((char *)(unsigned long)tcpa->lasa, 0, tcpa->laml);
+            memset(lasa, 0, tcpa->laml);
             set_checksum(tcpa,
                          offsetof(struct acpi_header, checksum),
                          tcpa->header.length);
@@ -348,9 +310,7 @@ static void __acpi_build_tables(uint8_t 
     buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS;
     offset = 0;
 
-    offset += construct_bios_info_table(&buf[offset]);
     rsdp = (struct acpi_20_rsdp *)&buf[offset];
-
     memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp));
     offset += align16(sizeof(struct acpi_20_rsdp));
     rsdp->rsdt_address = (unsigned long)rsdt;
@@ -376,7 +336,7 @@ void acpi_build_tables(void)
     memset(buf, 0, high_sz);
 
     /* Allocate data area and set up ACPI tables there. */
-    buf = (uint8_t *)e820_malloc(high_sz, 0);
+    buf = mem_alloc(high_sz, 0);
     __acpi_build_tables(buf, &low_sz, &high_sz);
 
     printf(" - Lo data: %08lx-%08lx\n"
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/dsdt.asl
--- a/tools/firmware/hvmloader/acpi/dsdt.asl    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/dsdt.asl    Wed Jan 28 13:06:45 2009 +0900
@@ -86,7 +86,7 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
 
     Scope (\_SB)
     {
-       /* ACPI_PHYSICAL_ADDRESS == 0xEA000 */
+       /* BIOS_INFO_PHYSICAL_ADDRESS == 0xEA000 */
        OperationRegion(BIOS, SystemMemory, 0xEA000, 16)
        Field(BIOS, ByteAcc, NoLock, Preserve) {
            UAR1, 1,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/acpi/dsdt.c
--- a/tools/firmware/hvmloader/acpi/dsdt.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/acpi/dsdt.c      Wed Jan 28 13:06:45 2009 +0900
@@ -1,11 +1,11 @@
 /*
  * 
  * Intel ACPI Component Architecture
- * ASL Optimizing Compiler version 20080729 [Dec 25 2008]
+ * ASL Optimizing Compiler version 20081204 [Jan 23 2009]
  * Copyright (C) 2000 - 2008 Intel Corporation
  * Supports ACPI Specification Revision 3.0a
  * 
- * Compilation of "dsdt.asl" - Thu Dec 25 17:00:32 2008
+ * Compilation of "dsdt.asl" - Fri Jan 23 14:30:29 2009
  * 
  * C source code output
  *
@@ -13,10 +13,10 @@ unsigned char AmlCode[] =
 unsigned char AmlCode[] =
 {
     0x44,0x53,0x44,0x54,0x5E,0x11,0x00,0x00,  /* 00000000    "DSDT^..." */
-    0x02,0xD1,0x58,0x65,0x6E,0x00,0x00,0x00,  /* 00000008    "..Xen..." */
+    0x02,0xEB,0x58,0x65,0x6E,0x00,0x00,0x00,  /* 00000008    "..Xen..." */
     0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00,  /* 00000010    "HVM....." */
     0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C,  /* 00000018    "....INTL" */
-    0x29,0x07,0x08,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    ").. .PMB" */
+    0x04,0x12,0x08,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    "... .PMB" */
     0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C,  /* 00000028    "S....PML" */
     0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31,  /* 00000030    "N...IOB1" */
     0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08,  /* 00000038    "..IOL1.." */
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/cacheattr.c
--- a/tools/firmware/hvmloader/cacheattr.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/cacheattr.c      Wed Jan 28 13:06:45 2009 +0900
@@ -88,11 +88,25 @@ void cacheattr_init(void)
     nr_var_ranges = (uint8_t)mtrr_cap;
     if ( nr_var_ranges != 0 )
     {
-        /* A single UC range covering PCI space. */
-        wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE);
-        wrmsr(MSR_MTRRphysMask(0),
-              ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11));
-        printf("var MTRRs ... ");
+        unsigned long base = pci_mem_start, size;
+        int i;
+
+        for ( i = 0; (base != pci_mem_end) && (i < nr_var_ranges); i++ )
+        {
+            size = PAGE_SIZE;
+            while ( !(base & size) )
+                size <<= 1;
+            while ( ((base + size) < base) || ((base + size) > pci_mem_end) )
+                size >>= 1;
+
+            wrmsr(MSR_MTRRphysBase(i), base);
+            wrmsr(MSR_MTRRphysMask(i),
+                  (~(uint64_t)(size-1) & addr_mask) | (1u << 11));
+
+            base += size;
+        }
+
+        printf("var MTRRs [%d/%d] ... ", i, nr_var_ranges);
     }
 
     wrmsr(MSR_MTRRdefType, mtrr_def);
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/config.h
--- a/tools/firmware/hvmloader/config.h Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/config.h Wed Jan 28 13:06:45 2009 +0900
@@ -1,5 +1,8 @@
 #ifndef __HVMLOADER_CONFIG_H__
 #define __HVMLOADER_CONFIG_H__
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE  (1ul << PAGE_SHIFT)
 
 #define IOAPIC_BASE_ADDRESS 0xfec00000
 #define IOAPIC_ID           0x01
@@ -11,8 +14,14 @@
 #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
 #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
 
-#define PCI_MEMBASE         0xf0000000
-#define PCI_MEMSIZE         0x0c000000
+/* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
+#define PCI_MEM_START       0xf0000000
+#define PCI_MEM_END         0xfc000000
+extern unsigned long pci_mem_start, pci_mem_end;
+
+/* We reserve 16MB for special BIOS mappings, etc. */
+#define RESERVED_MEMBASE    0xfc000000
+#define RESERVED_MEMSIZE    0x01000000
 
 #define ROMBIOS_SEG            0xF000
 #define ROMBIOS_BEGIN          0x000F0000
@@ -21,16 +30,17 @@
 #define ROMBIOS_END            (ROMBIOS_BEGIN + ROMBIOS_SIZE)
 
 /* Memory map. */
+#define SCRATCH_PHYSICAL_ADDRESS      0x00010000
 #define HYPERCALL_PHYSICAL_ADDRESS    0x00080000
 #define VGABIOS_PHYSICAL_ADDRESS      0x000C0000
 #define OPTIONROM_PHYSICAL_ADDRESS    0x000C8000
 #define OPTIONROM_PHYSICAL_END        0x000EA000
-#define ACPI_PHYSICAL_ADDRESS         0x000EA000
+#define BIOS_INFO_PHYSICAL_ADDRESS    0x000EA000
+#define ACPI_PHYSICAL_ADDRESS         0x000EA020
 #define E820_PHYSICAL_ADDRESS         0x000EA100
 #define SMBIOS_PHYSICAL_ADDRESS       0x000EB000
 #define SMBIOS_MAXIMUM_SIZE           0x00005000
 #define ROMBIOS_PHYSICAL_ADDRESS      0x000F0000
-#define SCRATCH_PHYSICAL_ADDRESS      0x00010000
 
 /* Offsets from E820_PHYSICAL_ADDRESS. */
 #define E820_NR_OFFSET                0x0
@@ -39,12 +49,16 @@
 /* Xen Platform Device */
 #define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */
 
+/* Located at BIOS_INFO_PHYSICAL_ADDRESS. */
 struct bios_info {
-    uint8_t  com1_present:1;
-    uint8_t  com2_present:1;
-    uint8_t  hpet_present:1;
-    uint32_t pci_min, pci_len;
-    uint16_t xen_pfiob;
+    uint8_t  com1_present:1;    /* 0[0] - System has COM1? */
+    uint8_t  com2_present:1;    /* 0[1] - System has COM2? */
+    uint8_t  hpet_present:1;    /* 0[2] - System has HPET? */
+    uint32_t pci_min, pci_len;  /* 4, 8 - PCI I/O hole boundaries */
+    uint32_t bios32_entry;      /* 12   - Entry point for 32-bit BIOS */
+    uint16_t xen_pfiob;         /* 16   - Xen platform device I/O ports */
 };
+#define BIOSINFO_OFF_bios32_entry 12
+#define BIOSINFO_OFF_xen_pfiob    16
 
 #endif /* __HVMLOADER_CONFIG_H__ */
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/hvmloader.c      Wed Jan 28 13:06:45 2009 +0900
@@ -31,6 +31,7 @@
 #include "option_rom.h"
 #include <xen/version.h>
 #include <xen/hvm/params.h>
+#include <xen/memory.h>
 
 asm (
     "    .text                       \n"
@@ -99,6 +100,9 @@ asm (
     "    .text                       \n"
     );
 
+unsigned long pci_mem_start = PCI_MEM_START;
+unsigned long pci_mem_end = PCI_MEM_END;
+
 static enum { VGA_none, VGA_std, VGA_cirrus } virtual_vga = VGA_none;
 
 static void init_hypercalls(void)
@@ -148,16 +152,14 @@ static void apic_setup(void)
 
 static void pci_setup(void)
 {
-    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd;
+    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
     uint16_t class, vendor_id, device_id;
     unsigned int bar, pin, link, isa_irq;
 
     /* Resources assignable to PCI devices via BARs. */
     struct resource {
         uint32_t base, max;
-    } *resource;
-    struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
-    struct resource io_resource  = { 0xc000, 0x10000 };
+    } *resource, mem_resource, io_resource;
 
     /* Create a list of device BARs in descending order of size. */
     struct bars {
@@ -248,6 +250,10 @@ static void pci_setup(void)
             bars[i].bar_reg = bar_reg;
             bars[i].bar_sz  = bar_sz;
 
+            if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
+                 PCI_BASE_ADDRESS_SPACE_MEMORY )
+                mmio_total += bar_sz;
+
             nr_bars++;
 
             /* Skip the upper-half of the address for a 64-bit BAR. */
@@ -276,6 +282,28 @@ static void pci_setup(void)
         pci_writew(devfn, PCI_COMMAND, cmd);
     }
 
+    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
+            ((pci_mem_start << 1) != 0) )
+        pci_mem_start <<= 1;
+
+    while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
+    {
+        struct xen_add_to_physmap xatp;
+        if ( hvm_info->high_mem_pgend == 0 )
+            hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT);
+        xatp.domid = DOMID_SELF;
+        xatp.space = XENMAPSPACE_gmfn;
+        xatp.idx   = --hvm_info->low_mem_pgend;
+        xatp.gpfn  = hvm_info->high_mem_pgend++;
+        if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+            BUG();
+    }
+
+    mem_resource.base = pci_mem_start;
+    mem_resource.max = pci_mem_end;
+    io_resource.base = 0xc000;
+    io_resource.max = 0x10000;
+
     /* Assign iomem and ioport resources in descending order of size. */
     for ( i = 0; i < nr_bars; i++ )
     {
@@ -488,22 +516,13 @@ static int pci_load_option_roms(uint32_t
 /* Replace possibly erroneous memory-size CMOS fields with correct values. */
 static void cmos_write_memory_size(void)
 {
-    struct e820entry *map = E820;
-    int i, nr = *E820_NR;
-    uint32_t base_mem = 640, ext_mem = 0, alt_mem = 0;
-
-    for ( i = 0; i < nr; i++ )
-        if ( (map[i].addr >= 0x100000) && (map[i].type == E820_RAM) )
-            break;
-
-    if ( i != nr )
-    {
-        alt_mem = ext_mem = map[i].addr + map[i].size;
-        ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
-        if ( ext_mem > 0xffff )
-            ext_mem = 0xffff;
-        alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
-    }
+    uint32_t base_mem = 640, ext_mem, alt_mem;
+
+    alt_mem = ext_mem = hvm_info->low_mem_pgend << PAGE_SHIFT;
+    ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
+    if ( ext_mem > 0xffff )
+        ext_mem = 0xffff;
+    alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
 
     /* All BIOSes: conventional memory (CMOS *always* reports 640kB). */
     cmos_outb(0x15, (uint8_t)(base_mem >> 0));
@@ -520,25 +539,23 @@ static void cmos_write_memory_size(void)
     cmos_outb(0x35, (uint8_t)( alt_mem >> 8));
 }
 
-static uint16_t init_xen_platform_io_base(void)
-{
-    struct bios_info *bios_info = (struct bios_info *)ACPI_PHYSICAL_ADDRESS;
+static uint16_t xen_platform_io_base(void)
+{
     uint32_t devfn, bar_data;
     uint16_t vendor_id, device_id;
 
-    bios_info->xen_pfiob = 0;
-
     for ( devfn = 0; devfn < 128; devfn++ )
     {
         vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
         device_id = pci_readw(devfn, PCI_DEVICE_ID);
-        if ( (vendor_id != 0x5853) || (device_id != 0x0001) )
-            continue;
-        bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
-        bios_info->xen_pfiob = bar_data & PCI_BASE_ADDRESS_IO_MASK;
-    }
-
-    return bios_info->xen_pfiob;
+        if ( (vendor_id == 0x5853) && (device_id == 0x0001) )
+        {
+            bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
+            return bar_data & PCI_BASE_ADDRESS_IO_MASK;
+        }
+    }
+
+    return 0;
 }
 
 /*
@@ -548,27 +565,80 @@ static uint16_t init_xen_platform_io_bas
  */
 static void init_vm86_tss(void)
 {
-    uint32_t tss;
+    void *tss;
     struct xen_hvm_param p;
 
-    tss = e820_malloc(128, 128);
-    memset((char *)tss, 0, 128);
+    tss = mem_alloc(128, 128);
+    memset(tss, 0, 128);
     p.domid = DOMID_SELF;
     p.index = HVM_PARAM_VM86_TSS;
-    p.value = tss;
+    p.value = virt_to_phys(tss);
     hypercall_hvm_op(HVMOP_set_param, &p);
-    printf("vm86 TSS at %08x\n", tss);
-}
-
-/*
- * Copy the E820 table provided by the HVM domain builder into the correct
- * place in the memory map we share with the rombios.
- */
-static void copy_e820_table(void)
-{
-    uint8_t nr = *(uint8_t *)(HVM_E820_PAGE + HVM_E820_NR_OFFSET);
-    BUG_ON(nr > 16);
-    memcpy(E820, (char *)HVM_E820_PAGE + HVM_E820_OFFSET, nr * sizeof(*E820));
+    printf("vm86 TSS at %08lx\n", virt_to_phys(tss));
+}
+
+/* Create an E820 table based on memory parameters provided in hvm_info. */
+static void build_e820_table(void)
+{
+    struct e820entry *e820 = E820;
+    unsigned int nr = 0;
+
+    /* 0x0-0x9FC00: Ordinary RAM. */
+    e820[nr].addr = 0x0;
+    e820[nr].size = 0x9FC00;
+    e820[nr].type = E820_RAM;
+    nr++;
+
+    /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
+    e820[nr].addr = 0x9FC00;
+    e820[nr].size = 0x400;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
+    /*
+     * Following regions are standard regions of the PC memory map.
+     * They are not covered by e820 regions. OSes will not use as RAM.
+     * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
+     * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
+     * TODO: free pages which turn out to be unused.
+     */
+
+    /*
+     * 0xE0000-0x0F0000: PC-specific area. We place various tables here.
+     * 0xF0000-0x100000: System BIOS.
+     * TODO: free pages which turn out to be unused.
+     */
+    e820[nr].addr = 0xE0000;
+    e820[nr].size = 0x20000;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
+    /* Low RAM goes here. Reserve space for special pages. */
+    BUG_ON((hvm_info->low_mem_pgend << PAGE_SHIFT) < (2u << 20));
+    e820[nr].addr = 0x100000;
+    e820[nr].size = (hvm_info->low_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
+    e820[nr].type = E820_RAM;
+    nr++;
+
+    /*
+     * Explicitly reserve space for special pages.
+     * This space starts at RESERVED_MEMBASE an extends to cover various
+     * fixed hardware mappings (e.g., LAPIC, IOAPIC, default SVGA framebuffer).
+     */
+    e820[nr].addr = RESERVED_MEMBASE;
+    e820[nr].size = (uint32_t)-e820[nr].addr;
+    e820[nr].type = E820_RESERVED;
+    nr++;
+
+    if ( hvm_info->high_mem_pgend )
+    {
+        e820[nr].addr = ((uint64_t)1 << 32);
+        e820[nr].size =
+            ((uint64_t)hvm_info->high_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
+        e820[nr].type = E820_RAM;
+        nr++;
+    }
+
     *E820_NR = nr;
 }
 
@@ -576,16 +646,17 @@ int main(void)
 {
     int option_rom_sz = 0, vgabios_sz = 0, etherboot_sz = 0;
     int rombios_sz, smbios_sz;
-    uint32_t etherboot_phys_addr, option_rom_phys_addr, vga_ram = 0;
-    uint16_t xen_pfiob;
+    uint32_t etherboot_phys_addr, option_rom_phys_addr, bios32_addr;
+    struct bios_info *bios_info;
 
     printf("HVM Loader\n");
 
-    copy_e820_table();
-
     init_hypercalls();
 
     printf("CPU speed is %u MHz\n", get_cpu_mhz());
+
+    apic_setup();
+    pci_setup();
 
     smp_initialise();
 
@@ -599,12 +670,9 @@ int main(void)
     if ( rombios_sz > 0x10000 )
         rombios_sz = 0x10000;
     memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, rombios_sz);
-    highbios_setup();
-
-    apic_setup();
-    pci_setup();
-
-    if ( (get_vcpu_nr() > 1) || get_apic_mode() )
+    bios32_addr = highbios_setup();
+
+    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
         create_mp_tables();
 
     switch ( virtual_vga )
@@ -626,12 +694,6 @@ int main(void)
         break;
     }
 
-    if ( virtual_vga != VGA_none )
-    {
-        vga_ram = e820_malloc(8 << 20, 4096);
-        printf("VGA RAM at %08x\n", vga_ram);
-    }
-
     etherboot_phys_addr = VGABIOS_PHYSICAL_ADDRESS + vgabios_sz;
     if ( etherboot_phys_addr < OPTIONROM_PHYSICAL_ADDRESS )
         etherboot_phys_addr = OPTIONROM_PHYSICAL_ADDRESS;
@@ -640,7 +702,7 @@ int main(void)
     option_rom_phys_addr = etherboot_phys_addr + etherboot_sz;
     option_rom_sz = pci_load_option_roms(option_rom_phys_addr);
 
-    if ( get_acpi_enabled() )
+    if ( hvm_info->acpi_enabled )
     {
         printf("Loading ACPI ...\n");
         acpi_build_tables();
@@ -672,9 +734,17 @@ int main(void)
                ROMBIOS_PHYSICAL_ADDRESS,
                ROMBIOS_PHYSICAL_ADDRESS + rombios_sz - 1);
 
-    xen_pfiob = init_xen_platform_io_base();
-    if ( xen_pfiob && vga_ram )
-        outl(xen_pfiob + 4, vga_ram);
+    build_e820_table();
+
+    bios_info = (struct bios_info *)BIOS_INFO_PHYSICAL_ADDRESS;
+    memset(bios_info, 0, sizeof(*bios_info));
+    bios_info->com1_present = uart_exists(0x3f8);
+    bios_info->com2_present = uart_exists(0x2f8);
+    bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
+    bios_info->pci_min = pci_mem_start;
+    bios_info->pci_len = pci_mem_end - pci_mem_start;
+    bios_info->bios32_entry = bios32_addr;
+    bios_info->xen_pfiob = xen_platform_io_base();
 
     printf("Invoking ROMBIOS ...\n");
     return 0;
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/mp_tables.c
--- a/tools/firmware/hvmloader/mp_tables.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/mp_tables.c      Wed Jan 28 13:06:45 2009 +0900
@@ -155,7 +155,7 @@ static void fill_mp_config_table(struct 
     int vcpu_nr, i;
     uint8_t checksum;
 
-    vcpu_nr = get_vcpu_nr();
+    vcpu_nr = hvm_info->nr_vcpus;
 
     /* fill in the MP configuration table signature, "PCMP" */
     mpct->signature[0] = 'P';
@@ -317,7 +317,7 @@ void create_mp_tables(void)
     char *p;
     int vcpu_nr, i, length;
 
-    vcpu_nr = get_vcpu_nr();
+    vcpu_nr = hvm_info->nr_vcpus;
 
     printf("Creating MP tables ...\n");
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/smbios.c
--- a/tools/firmware/hvmloader/smbios.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/smbios.c Wed Jan 28 13:06:45 2009 +0900
@@ -118,8 +118,9 @@ write_smbios_tables(void *start,
     do_struct(smbios_type_16_init(p, memsize, nr_mem_devs));
     for ( i = 0; i < nr_mem_devs; i++ )
     {
-        uint32_t dev_memsize = ((i == (nr_mem_devs - 1))
-                                ? (memsize & 0x3fff) : 0x4000);
+        uint32_t dev_memsize = 0x4000; /* all but last covers 16GB */
+        if ( (i == (nr_mem_devs - 1)) && ((memsize & 0x3fff) != 0) )
+            dev_memsize = memsize & 0x3fff; /* last dev is <16GB */
         do_struct(smbios_type_17_init(p, dev_memsize, i));
         do_struct(smbios_type_19_init(p, dev_memsize, i));
         do_struct(smbios_type_20_init(p, dev_memsize, i));
@@ -143,28 +144,18 @@ static uint64_t
 static uint64_t
 get_memsize(void)
 {
-    struct e820entry *map = E820;
-    uint8_t num_entries = *E820_NR;
-    uint64_t memsize = 0;
-    int i;
-
-    /*
-     * Walk through e820map, ignoring any entries that aren't marked
-     * as usable or reserved.
-     */
-    for ( i = 0; i < num_entries; i++ )
-    {
-        if ( (map->type == E820_RAM) || (map->type == E820_RESERVED) )
-            memsize += map->size;
-        map++;
-    }
+    uint64_t sz;
+
+    sz = (uint64_t)hvm_info->low_mem_pgend << PAGE_SHIFT;
+    if ( hvm_info->high_mem_pgend )
+        sz += (hvm_info->high_mem_pgend << PAGE_SHIFT) - (1ull << 32);
 
     /*
      * Round up to the nearest MB.  The user specifies domU pseudo-physical 
      * memory in megabytes, so not doing this could easily lead to reporting 
      * one less MB than the user specified.
      */
-    return (memsize + (1 << 20) - 1) >> 20;
+    return (sz + (1ul << 20) - 1) >> 20;
 }
 
 int
@@ -229,7 +220,7 @@ hvm_write_smbios_tables(void)
 
     /* SCRATCH_PHYSICAL_ADDRESS is a safe large memory area for scratch. */
     len = write_smbios_tables((void *)SCRATCH_PHYSICAL_ADDRESS,
-                              get_vcpu_nr(), get_memsize(),
+                              hvm_info->nr_vcpus, get_memsize(),
                               uuid, xen_version_str,
                               xen_major_version, xen_minor_version);
     if ( len > SMBIOS_MAXIMUM_SIZE )
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/smp.c
--- a/tools/firmware/hvmloader/smp.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/smp.c    Wed Jan 28 13:06:45 2009 +0900
@@ -121,7 +121,7 @@ static void boot_cpu(unsigned int cpu)
 
 void smp_initialise(void)
 {
-    unsigned int i, nr_cpus = get_vcpu_nr();
+    unsigned int i, nr_cpus = hvm_info->nr_vcpus;
 
     memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/util.c
--- a/tools/firmware/hvmloader/util.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/util.c   Wed Jan 28 13:06:45 2009 +0900
@@ -25,7 +25,6 @@
 #include <stdint.h>
 #include <xen/xen.h>
 #include <xen/memory.h>
-#include <xen/hvm/hvm_info_table.h>
 
 void wrmsr(uint32_t idx, uint64_t v)
 {
@@ -304,63 +303,63 @@ uuid_to_string(char *dest, uint8_t *uuid
     *p = '\0';
 }
 
-static void e820_collapse(void)
-{
-    int i = 0;
-    struct e820entry *ent = E820;
-
-    while ( i < (*E820_NR-1) )
-    {
-        if ( (ent[i].type == ent[i+1].type) &&
-             ((ent[i].addr + ent[i].size) == ent[i+1].addr) )
-        {
-            ent[i].size += ent[i+1].size;
-            memcpy(&ent[i+1], &ent[i+2], (*E820_NR-i-2) * sizeof(*ent));
-            (*E820_NR)--;
-        }
-        else
-        {
-            i++;
-        }
-    }
-}
-
-uint32_t e820_malloc(uint32_t size, uint32_t align)
-{
-    uint32_t addr;
-    int i;
-    struct e820entry *ent = E820;
+void *mem_alloc(uint32_t size, uint32_t align)
+{
+    static uint32_t reserve = RESERVED_MEMBASE - 1;
+    static int over_allocated;
+    struct xen_add_to_physmap xatp;
+    struct xen_memory_reservation xmr;
+    xen_pfn_t mfn;
+    uint32_t s, e;
 
     /* Align to at least one kilobyte. */
     if ( align < 1024 )
         align = 1024;
 
-    for ( i = *E820_NR - 1; i >= 0; i-- )
-    {
-        addr = (ent[i].addr + ent[i].size - size) & ~(align-1);
-        if ( (ent[i].type != E820_RAM) || /* not ram? */
-             (addr < ent[i].addr) ||      /* too small or starts above 4gb? */
-             ((addr + size) < addr) )     /* ends above 4gb? */
-            continue;
-
-        if ( addr != ent[i].addr )
-        {
-            memmove(&ent[i+1], &ent[i], (*E820_NR-i) * sizeof(*ent));
-            (*E820_NR)++;
-            ent[i].size = addr - ent[i].addr;
-            ent[i+1].addr = addr;
-            ent[i+1].size -= ent[i].size;
-            i++;
-        }
-
-        ent[i].type = E820_RESERVED;
-
-        e820_collapse();
-
-        return addr;
-    }
-
-    return 0;
+    s = (reserve + align) & ~(align - 1);
+    e = s + size - 1;
+
+    BUG_ON((e < s) || (e >> PAGE_SHIFT) >= hvm_info->reserved_mem_pgstart);
+
+    while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) )
+    {
+        reserve += PAGE_SIZE;
+        mfn = reserve >> PAGE_SHIFT;
+
+        /* Try to allocate a brand new page in the reserved area. */
+        if ( !over_allocated )
+        {
+            xmr.domid = DOMID_SELF;
+            xmr.mem_flags = 0;
+            xmr.extent_order = 0;
+            xmr.nr_extents = 1;
+            set_xen_guest_handle(xmr.extent_start, &mfn);
+            if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 )
+                continue;
+            over_allocated = 1;
+        }
+
+        /* Otherwise, relocate a page from the ordinary RAM map. */
+        if ( hvm_info->high_mem_pgend )
+        {
+            xatp.idx = --hvm_info->high_mem_pgend;
+            if ( xatp.idx == (1ull << (32 - PAGE_SHIFT)) )
+                hvm_info->high_mem_pgend = 0;
+        }
+        else
+        {
+            xatp.idx = --hvm_info->low_mem_pgend;
+        }
+        xatp.domid = DOMID_SELF;
+        xatp.space = XENMAPSPACE_gmfn;
+        xatp.gpfn  = mfn;
+        if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+            BUG();
+    }
+
+    reserve = e;
+
+    return (void *)(unsigned long)s;
 }
 
 uint32_t ioapic_read(uint32_t reg)
@@ -543,30 +542,35 @@ void __bug(char *file, int line)
         asm volatile ( "ud2" );
 }
 
-static int validate_hvm_info(struct hvm_info_table *t)
-{
-    char signature[] = "HVM INFO";
+static void validate_hvm_info(struct hvm_info_table *t)
+{
     uint8_t *ptr = (uint8_t *)t;
     uint8_t sum = 0;
     int i;
 
-    /* strncmp(t->signature, "HVM INFO", 8) */
-    for ( i = 0; i < 8; i++ )
-    {
-        if ( signature[i] != t->signature[i] )
-        {
-            printf("Bad hvm info signature\n");
-            return 0;
-        }
+    if ( strncmp(t->signature, "HVM INFO", 8) )
+    {
+        printf("Bad hvm info signature\n");
+        BUG();
+    }
+
+    if ( t->length < sizeof(struct hvm_info_table) )
+    {
+        printf("Bad hvm info length\n");
+        BUG();
     }
 
     for ( i = 0; i < t->length; i++ )
         sum += ptr[i];
 
-    return (sum == 0);
-}
-
-static struct hvm_info_table *get_hvm_info_table(void)
+    if ( sum != 0 )
+    {
+        printf("Bad hvm info checksum\n");
+        BUG();
+    }
+}
+
+struct hvm_info_table *get_hvm_info_table(void)
 {
     static struct hvm_info_table *table;
     struct hvm_info_table *t;
@@ -576,33 +580,11 @@ static struct hvm_info_table *get_hvm_in
 
     t = (struct hvm_info_table *)HVM_INFO_PADDR;
 
-    if ( !validate_hvm_info(t) )
-    {
-        printf("Bad hvm info table\n");
-        return NULL;
-    }
+    validate_hvm_info(t);
 
     table = t;
 
     return table;
-}
-
-int get_vcpu_nr(void)
-{
-    struct hvm_info_table *t = get_hvm_info_table();
-    return (t ? t->nr_vcpus : 1);
-}
-
-int get_acpi_enabled(void)
-{
-    struct hvm_info_table *t = get_hvm_info_table();
-    return (t ? t->acpi_enabled : 1);
-}
-
-int get_apic_mode(void)
-{
-    struct hvm_info_table *t = get_hvm_info_table();
-    return (t ? t->apic_mode : 1);
 }
 
 uint16_t get_cpu_mhz(void)
@@ -645,6 +627,27 @@ uint16_t get_cpu_mhz(void)
 
     cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000);
     return cpu_mhz;
+}
+
+int uart_exists(uint16_t uart_base)
+{
+    uint16_t ier = uart_base + 1;
+    uint8_t a, b, c;
+
+    a = inb(ier);
+    outb(ier, 0);
+    b = inb(ier);
+    outb(ier, 0xf);
+    c = inb(ier);
+    outb(ier, a);
+
+    return ((b == 0) && (c == 0xf));
+}
+
+int hpet_exists(unsigned long hpet_base)
+{
+    uint32_t hpet_id = *(uint32_t *)hpet_base;
+    return ((hpet_id >> 16) == 0x8086);
 }
 
 /*
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/hvmloader/util.h
--- a/tools/firmware/hvmloader/util.h   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/hvmloader/util.h   Wed Jan 28 13:06:45 2009 +0900
@@ -3,6 +3,7 @@
 
 #include <stdarg.h>
 #include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
 
 #undef offsetof
 #define offsetof(t, m) ((unsigned long)&((t *)0)->m)
@@ -56,6 +57,10 @@ void pci_write(uint32_t devfn, uint32_t 
 /* Get CPU speed in MHz. */
 uint16_t get_cpu_mhz(void);
 
+/* Hardware detection. */
+int uart_exists(uint16_t uart_base);
+int hpet_exists(unsigned long hpet_base);
+
 /* Do cpuid instruction, with operation 'idx' */
 void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx,
            uint32_t *ecx, uint32_t *edx);
@@ -103,9 +108,8 @@ static inline void cpu_relax(void)
 })
 
 /* HVM-builder info. */
-int get_vcpu_nr(void);
-int get_acpi_enabled(void);
-int get_apic_mode(void);
+struct hvm_info_table *get_hvm_info_table(void);
+#define hvm_info (get_hvm_info_table())
 
 /* String and memory functions */
 int strcmp(const char *cs, const char *ct);
@@ -131,11 +135,12 @@ int printf(const char *fmt, ...) __attri
 int printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 int vprintf(const char *fmt, va_list ap);
 
-/* Reserve a RAM region in the e820 table. */
-uint32_t e820_malloc(uint32_t size, uint32_t align);
+/* Allocate memory in a reserved region below 4GB. */
+void *mem_alloc(uint32_t size, uint32_t align);
+#define virt_to_phys(v) ((unsigned long)(v))
 
 /* Prepare the 32bit BIOS */
-void highbios_setup(void);
+uint32_t highbios_setup(void);
 
 /* Miscellaneous. */
 void cacheattr_init(void);
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/32bitbios.c
--- a/tools/firmware/rombios/32bit/32bitbios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bit/32bitbios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -19,35 +19,16 @@
  *
  * Author: Stefan Berger <stefanb@xxxxxxxxxx>
  */
+
 #include "rombios_compat.h"
+
+asm (
+    "    .text                       \n"
+    "     movzwl %bx,%eax            \n"
+    "     jmp *jumptable(,%eax,4)    \n"
+    "    .data                       \n"
+    "jumptable:                      \n"
+#define X(idx, ret, fn, args...) " .long "#fn"\n"
 #include "32bitprotos.h"
-
-/*
-   the jumptable that will be copied into the rombios in the 0xf000 segment
-   for every function that is to be called from the lower BIOS, make an entry
-   here.
- */
-#define TABLE_ENTRY(idx, func) [idx] = (uint32_t)func
-uint32_t jumptable[IDX_LAST+1] __attribute__((section (".biosjumptable"))) =
-{
-       TABLE_ENTRY(IDX_TCPA_ACPI_INIT, tcpa_acpi_init),
-       TABLE_ENTRY(IDX_TCPA_EXTEND_ACPI_LOG, tcpa_extend_acpi_log),
-
-       TABLE_ENTRY(IDX_TCGINTERRUPTHANDLER, TCGInterruptHandler),
-
-       TABLE_ENTRY(IDX_TCPA_CALLING_INT19H, tcpa_calling_int19h),
-       TABLE_ENTRY(IDX_TCPA_RETURNED_INT19H, tcpa_returned_int19h),
-       TABLE_ENTRY(IDX_TCPA_ADD_EVENT_SEPARATORS, tcpa_add_event_separators),
-       TABLE_ENTRY(IDX_TCPA_WAKE_EVENT, tcpa_wake_event),
-       TABLE_ENTRY(IDX_TCPA_ADD_BOOTDEVICE, tcpa_add_bootdevice),
-       TABLE_ENTRY(IDX_TCPA_START_OPTION_ROM_SCAN, tcpa_start_option_rom_scan),
-       TABLE_ENTRY(IDX_TCPA_OPTION_ROM, tcpa_option_rom),
-       TABLE_ENTRY(IDX_TCPA_IPL, tcpa_ipl),
-       TABLE_ENTRY(IDX_TCPA_MEASURE_POST, tcpa_measure_post),
-
-       TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm),
-
-       TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector),
-
-       TABLE_ENTRY(IDX_LAST       , 0)     /* keep last */
-};
+#undef X
+    );
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/Makefile
--- a/tools/firmware/rombios/32bit/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bit/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -1,14 +1,11 @@ XEN_ROOT = ../../../..
 XEN_ROOT = ../../../..
 include $(XEN_ROOT)/tools/firmware/Rules.mk
 
-SOURCES = util.c
 TARGET = 32bitbios_flat.h
 
-CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I..
 
 SUBDIRS = tcgbios
-
-MODULES = tcgbios/tcgbiosext.o
 
 .PHONY: all
 all: subdirs-all
@@ -16,9 +13,12 @@ all: subdirs-all
 
 .PHONY: clean
 clean: subdirs-clean
-       rm -rf *.o $(TARGET)
+       rm -rf *.o $(TARGET) $(DEPS)
 
-$(TARGET): 32bitbios.o $(MODULES) util.o
+$(TARGET): 32bitbios_all.o
+       sh mkhex highbios_array 32bitbios_all.o > $@
+
+32bitbios_all.o: 32bitbios.o tcgbios/tcgbiosext.o util.o pmm.o
        $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
        @nm 32bitbios_all.o |                                \
          egrep '^ +U ' >/dev/null && {                      \
@@ -26,4 +26,5 @@ clean: subdirs-clean
            nm -u 32bitbios_all.o;                           \
            exit 11;                                         \
          } || :
-       sh mkhex highbios_array 32bitbios_all.o > $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bit/pmm.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/firmware/rombios/32bit/pmm.c        Wed Jan 28 13:06:45 2009 +0900
@@ -0,0 +1,531 @@
+/*
+ *  pmm.c - POST(Power On Self Test) Memory Manager
+ *  according to the specification described in
+ *  
http://www.phoenix.com/NR/rdonlyres/873A00CF-33AC-4775-B77E-08E7B9754993/0/specspmm101.pdf
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (C) 2009 FUJITSU LIMITED
+ *
+ *  Author: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
+ */
+
+/*
+ * Algorithm:
+ *
+ * This is not a fast storage allocator but simple one.  There is no
+ * segregated management by block size and it does nothing special for
+ * avoiding the fragmentation.
+ *
+ * The allocation algorithm is a first-fit. All memory blocks are
+ * managed by linear single linked list in order of the address.
+ * (i.e. There is no backward pointer) It searches the first available
+ * equal or larger block from the head (lowest address) of memory
+ * heap. The larger block is splitted into two blocks unless one side
+ * becomes too small.
+ * 
+ * For de-allocation, the specified block is just marked as available
+ * and it does nothing else. Thus, the fragmentation will occur. The
+ * collection of continuous available blocks are done on the search
+ * phase of another block allocation.
+ *
+ * The following is an abstract of this algorithm. The actual code
+ * looks complicated on account of alignment and checking the handle.
+ *
+ *     static memblk_t *
+ *     alloc(heap_t *heap, uint32_t size)
+ *     {
+ *         static memblk_t *mb;
+ *         for_each_memblk(heap, mb) // search memory blocks
+ *             if (memblk_is_avail(mb))
+ *             {
+ *                 collect_avail_memblks(heap, mb);
+ *                 if (size <= memblk_bufsize(mb))
+ *                 {
+ *                     split_memblk(mb, size);
+ *                     set_inuse(mb);
+ *                     return mb;
+ *                 }
+ *             }
+ *         return NULL;
+ *     }
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <../hvmloader/config.h>
+#include <../hvmloader/e820.h>
+#include "util.h"
+
+#define DEBUG_PMM 0
+
+#define ASSERT(_expr, _action)                                  \
+    if (!(_expr)) {                                             \
+        printf("ASSERTION FAIL: %s %s:%d %s()\n",               \
+               __STRING(_expr), __FILE__, __LINE__, __func__);  \
+        _action;                                                \
+    } else
+
+#if DEBUG_PMM
+# define PMM_DEBUG(format, p...) printf("PMM " format, ##p)
+#else
+# define PMM_DEBUG(format, p...)
+#endif
+
+struct pmmAllocArgs {
+    uint16_t function;
+    uint32_t length;
+    uint32_t handle;
+    uint16_t flags;
+} __attribute__ ((packed));
+
+struct pmmFindArgs {
+    uint16_t function;
+    uint32_t handle;
+} __attribute__ ((packed));
+
+struct pmmDeallocateArgs {
+    uint16_t function;
+    uint32_t buffer;
+} __attribute__ ((packed));
+
+#define PMM_FUNCTION_ALLOCATE   0
+#define PMM_FUNCTION_FIND       1         
+#define PMM_FUNCTION_DEALLOC    2
+
+#define PARAGRAPH_LENGTH        16  // unit of length
+
+#define PMM_HANDLE_ANONYMOUS    0xffffffff
+
+#define PMM_FLAGS_MEMORY_TYPE_MASK      0x0003
+#define PMM_FLAGS_MEMORY_INVALID        0
+#define PMM_FLAGS_MEMORY_CONVENTIONAL   1  // 0 to 1MB
+#define PMM_FLAGS_MEMORY_EXTENDED       2  // 1MB to 4GB
+#define PMM_FLAGS_MEMORY_ANY            3  // whichever is available
+#define PMM_FLAGS_ALIGINMENT            0x0004
+
+/* Error code */
+#define PMM_ENOMEM      (0)     // Out of memory, duplicate handle
+#define PMM_EINVAL      (-1)    // Invalid argument
+
+#define ALIGN_UP(addr, size)    (((addr)+((size)-1))&(~((size)-1)))
+#define ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
+
+typedef struct memblk {
+    uint32_t magic;      // inuse or available
+    struct memblk *next; // points the very next of this memblk
+    uint32_t handle;     // identifier of this block
+    uint32_t __fill;     // for 16byte alignment, not used
+    uint8_t buffer[0];
+} memblk_t;
+
+typedef struct heap {
+    memblk_t *head;     // start address of heap
+    memblk_t *end;      // end address of heap
+} heap_t;
+
+#define HEAP_NOT_INITIALIZED    (memblk_t *)-1
+#define HEAP_ALIGNMENT          16
+
+/*
+ * PMM handles two memory heaps, the caller chooses either.
+ *
+ * - conventional memroy (below 1MB)
+ *    In HVM, the area is fixed. 0x00010000-0x0007FFFF
+ *    (from SCRATCH_PHYSICAL_ADDRESS to HYPERCALL_PHYSICAL_ADDRESS)
+ *
+ * - extended memory (start at 1MB, below 4GB)
+ *    In HVM, the area starts at memory address 0x00100000.
+ *    The end address is variable. We read low RAM address from e820 table.
+ *
+ * The following struct must be located in the data segment since bss
+ * in 32bitbios doesn't be relocated.
+ */
+static struct {
+    heap_t heap;     // conventional memory
+    heap_t ext_heap; // extended memory
+} pmm_data = { {HEAP_NOT_INITIALIZED, NULL}, {NULL, NULL} };
+
+/* These values are private use, not a spec in PMM */
+#define MEMBLK_MAGIC_INUSE   0x2A4D4D50  // 'PMM*'
+#define MEMBLK_MAGIC_AVAIL   0x5F4D4D50  // 'PMM_'
+
+#define memblk_is_inuse(_mb)  ((_mb)->magic == MEMBLK_MAGIC_INUSE)
+#define memblk_is_avail(_mb)  ((_mb)->magic == MEMBLK_MAGIC_AVAIL)
+
+static void set_inuse(memblk_t *mb, uint32_t handle)
+{
+    mb->magic = MEMBLK_MAGIC_INUSE;
+    mb->handle = handle;
+}
+
+static void set_avail(memblk_t *mb)
+{
+    mb->magic = MEMBLK_MAGIC_AVAIL;
+    mb->handle = PMM_HANDLE_ANONYMOUS;
+}
+
+#define MEMBLK_HEADER_SIZE   ((int)(&((memblk_t *)0)->buffer))
+#define MIN_MEMBLK_SIZE      (MEMBLK_HEADER_SIZE + PARAGRAPH_LENGTH)
+
+#define memblk_size(_mb)     ((void *)((_mb)->next) - (void *)(_mb))
+#define memblk_buffer(_mb)   ((uint32_t)(&(_mb)->buffer))
+#define memblk_bufsize(_mb)  (memblk_size(_mb) - MEMBLK_HEADER_SIZE)
+
+#define buffer_memblk(_buf)  (memblk_t *)((_buf) - MEMBLK_HEADER_SIZE)
+
+#define memblk_loop_mbondition(_h, _mb) \
+    (((_mb) < (_h)->end) && (/* avoid infinite loop */ (_mb) < (_mb)->next))
+
+#define for_each_memblk(_h, _mb)                \
+    for ((_mb) = (_h)->head;                    \
+         memblk_loop_mbondition(_h, _mb);       \
+         (_mb) = (_mb)->next)
+
+#define for_remain_memblk(_h, _mb)              \
+    for (;                                      \
+         memblk_loop_mbondition(_h, _mb);       \
+         (_mb) = (_mb)->next)
+
+/*
+ *                                       <-size->
+ *    +==================+======+       +========+========+======+
+ *    |      avail       |      |       | avail  | avail  |      |
+ *    |      memblk      |memblk|...    | memblk | memblk |memblk|...
+ *    +==================+======+   =>  +========+========+======+
+ *    ^ |                ^ |    ^         |      ^ |      ^ |    ^
+ *    | |next            | |next|         |next  | |next  | |next|
+ *    | \________________/ \____/         \______/ \______/ \____/
+ *    |                                          ^
+ *    |                                          |
+ *    mb                                         +- sb(return value)
+ */
+static memblk_t *
+split_memblk(memblk_t *mb, uint32_t size)
+{
+    memblk_t *sb = (void *)memblk_buffer(mb) + size;
+
+    /* Only split if the remaining fragment is big enough. */
+    if ( (memblk_bufsize(mb) - size) < MIN_MEMBLK_SIZE)
+        return mb;
+
+    sb->next = mb->next;
+    set_avail(sb);
+
+    mb->next = sb;
+    return sb;
+}
+
+/*
+ *    +======+======+======+======+       +=================+======+
+ *    |avail |avail |avail |inuse |       |      avail      |inuse |   
+ *    |memblk|memblk|memblk|memblk|...    |      memblk     |memblk|...
+ *    +======+======+======+======+   =>  +=================+======+
+ *    ^ |    ^ |    ^ |    ^ |    ^         |               ^ |    ^
+ *    | |next| |next| |next| |next|         |next           | |next|
+ *    | \____/ \____/ \____/ \____/         \_______________/ \____/
+ *    |
+ *    mb
+ */
+static void
+collect_avail_memblks(heap_t *heap, memblk_t *mb)
+{
+    memblk_t *nb = mb->next;
+
+    for_remain_memblk ( heap, nb )
+        if ( memblk_is_inuse(nb) )
+            break;
+    mb->next = nb;
+}
+
+static void
+pmm_init_heap(heap_t *heap, uint32_t from_addr, uint32_t to_addr)
+{
+    memblk_t *mb = (memblk_t *)ALIGN_UP(from_addr, HEAP_ALIGNMENT);
+
+    mb->next = (memblk_t *)ALIGN_DOWN(to_addr, HEAP_ALIGNMENT);
+    set_avail(mb);
+
+    heap->head = mb;
+    heap->end = mb->next;
+}
+
+static void
+pmm_initalize(void)
+{
+    int i, e820_nr = *E820_NR;
+    struct e820entry *e820 = E820;
+
+    /* Extended memory: RAM below 4GB, 0x100000-0xXXXXXXXX */
+    for ( i = 0; i < e820_nr; i++ )
+    {
+        if ( (e820[i].type == E820_RAM) && (e820[i].addr >= 0x00100000) )
+        {
+            pmm_init_heap(&pmm_data.ext_heap, e820[i].addr, 
+                          e820[i].addr + e820[i].size);
+            break;
+        }
+    }
+
+    /* convectional memory: RAM below 1MB, 0x10000-0x7FFFF */
+    pmm_init_heap(&pmm_data.heap, SCRATCH_PHYSICAL_ADDRESS,
+                  HYPERCALL_PHYSICAL_ADDRESS);
+}
+
+static uint32_t
+pmm_max_avail_length(heap_t *heap)
+{
+    memblk_t *mb;
+    uint32_t size, max = 0;
+
+    for_each_memblk ( heap, mb )
+    {
+        if ( !memblk_is_avail(mb) )
+            continue;
+        collect_avail_memblks(heap, mb);
+        size = memblk_bufsize(mb);
+        if ( size > max )
+            max = size;
+    }
+
+    return (max / PARAGRAPH_LENGTH);
+}
+
+static memblk_t *
+first_fit(heap_t *heap, uint32_t size, uint32_t handle, uint32_t flags)
+{
+    memblk_t *mb;
+    int32_t align = 0;
+
+    if ( flags & PMM_FLAGS_ALIGINMENT )
+        align = ((size ^ (size - 1)) >> 1) + 1;
+
+    for_each_memblk ( heap, mb )
+    {
+        if ( memblk_is_avail(mb) )
+        {
+            collect_avail_memblks(heap, mb);
+
+            if ( align )
+            {
+                uint32_t addr = memblk_buffer(mb);
+                uint32_t offset = ALIGN_UP(addr, align) - addr;
+
+                if ( offset > 0 )
+                {
+                    ASSERT(offset >= MEMBLK_HEADER_SIZE, continue);
+
+                    if ( (offset + size) > memblk_bufsize(mb) )
+                        continue;
+
+                    mb = split_memblk(mb, offset - MEMBLK_HEADER_SIZE);
+                    return mb;
+                }
+            }
+
+            if ( size <= memblk_bufsize(mb) )
+                return mb;
+        }
+        else
+        {
+            ASSERT(memblk_is_inuse(mb), return NULL);
+
+            /* Duplication check for handle. */
+            if ( (handle != PMM_HANDLE_ANONYMOUS) && (mb->handle == handle) )
+                return NULL;
+        }
+    }
+
+    return NULL;
+}
+
+static memblk_t *
+pmm_find_handle(heap_t *heap, uint32_t handle)
+{
+    memblk_t *mb;
+
+    if ( handle == PMM_HANDLE_ANONYMOUS )
+        return NULL;
+
+    for_each_memblk ( heap, mb )
+        if ( mb->handle == handle )
+            return mb;
+
+    return NULL;
+}
+
+/*
+ * allocate a memory block of the specified type and size, and returns
+ * the address of the memory block.
+ *
+ * A client-specified identifier to be associated with the allocated
+ * memory block. A handle of 0xFFFFFFFF indicates that no identifier
+ * should be associated with the block. Such a memory block is known
+ * as an "anonymous" memory block and cannot be found using the
+ * pmmFind function. If a specified handle for a requested memory
+ * block is already used in a currently allocated memory block, the
+ * error value of 0x00000000 is returned
+ *
+ * If length is 0x00000000, no memory is allocated and the value
+ * returned is the size of the largest memory block available for the
+ * memory type specified in the flags parameter. The alignment bit in
+ * the flags register is ignored when calculating the largest memory
+ * block available.
+ *
+ * If a specified handle for a requested memory block is already used
+ * in a currently allocated memory block, the error value of
+ * 0x00000000 is returned.
+ * 
+ * A return value of 0x00000000 indicates that an error occurred and
+ * no memory has been allocated. 
+ */
+static uint32_t
+pmmAllocate(uint32_t length, uint32_t handle, uint16_t flags)
+{
+    heap_t *heap;
+    memblk_t *mb;
+    uint32_t size;
+
+    switch ( flags & PMM_FLAGS_MEMORY_TYPE_MASK )
+    {
+    case PMM_FLAGS_MEMORY_CONVENTIONAL:
+        heap = &pmm_data.heap;
+        break;
+
+    case PMM_FLAGS_MEMORY_EXTENDED:
+    case PMM_FLAGS_MEMORY_ANY: /* XXX: ignore conventional memory for now */
+        heap = &pmm_data.ext_heap;
+        break;
+
+    default:
+        return PMM_EINVAL;
+    }
+
+    /* return the largest memory block available */
+    if ( length == 0 )
+        return pmm_max_avail_length(heap);
+
+    size = length * PARAGRAPH_LENGTH;
+    mb = first_fit(heap, size, handle, flags);
+
+    if ( mb == NULL )
+        return PMM_ENOMEM;
+
+    /* duplication check for handle */
+    if ( handle != PMM_HANDLE_ANONYMOUS )
+    {
+        memblk_t *nb = mb->next;
+
+        for_remain_memblk(heap, nb)
+            if (nb->handle == handle)
+                return PMM_ENOMEM;
+    }
+
+    split_memblk(mb, size);
+    set_inuse(mb, handle);
+
+    return memblk_buffer(mb);
+}
+
+/*
+ * returns the address of the memory block associated with the
+ * specified handle.  
+ *
+ * A return value of 0x00000000 indicates that the handle does not
+ * correspond to a currently allocated memory block.
+ */
+static uint32_t
+pmmFind(uint32_t handle)
+{
+    memblk_t *mb;
+
+    if ( handle == PMM_HANDLE_ANONYMOUS )
+        return 0;
+
+    mb = pmm_find_handle(&pmm_data.heap, handle);
+    if ( mb == NULL )
+        mb = pmm_find_handle(&pmm_data.ext_heap, handle);
+
+    return mb ? memblk_buffer(mb) : 0;
+}
+
+/* 
+ * frees the specified memory block that was previously allocated by
+ * pmmAllocate.
+ *
+ * If the memory block was deallocated correctly, the return value is
+ * 0x00000000. If there was an error, the return value is non-zero.
+ */
+static uint32_t
+pmmDeallocate(uint32_t buffer)
+{
+    memblk_t *mb = buffer_memblk(buffer);
+
+    if ( !memblk_is_inuse(mb) )
+        return PMM_EINVAL;
+
+    set_avail(mb);
+    return 0;
+}
+
+
+union pmm_args {
+    uint16_t function;
+    struct pmmAllocArgs alloc;
+    struct pmmFindArgs find;
+    struct pmmDeallocateArgs dealloc;
+} __attribute__ ((packed));
+
+/*
+ * entry function of all PMM services.
+ *
+ * Values returned to the caller are placed in the DX:AX register
+ * pair. The flags and all registers, other than DX and AX, are
+ * preserved across calls to PMM services.
+ */
+uint32_t
+pmm(void *argp)
+{
+    union pmm_args *ap = argp;
+    uint32_t ret = PMM_EINVAL;
+
+    if ( pmm_data.heap.head == HEAP_NOT_INITIALIZED )
+        pmm_initalize();
+
+    switch ( ap->function )
+    {
+    case PMM_FUNCTION_ALLOCATE:
+        ret = pmmAllocate(ap->alloc.length, ap->alloc.handle, ap->alloc.flags);
+        PMM_DEBUG("Alloc length=%x handle=%x flags=%x ret=%x\n", 
+                  ap->alloc.length, ap->alloc.handle, ap->alloc.flags, ret);
+        break;
+
+    case PMM_FUNCTION_FIND:
+        ret = pmmFind(ap->find.handle);
+        PMM_DEBUG("Find handle=%x ret=%x\n", ap->find.handle, ret);
+        break;
+
+    case PMM_FUNCTION_DEALLOC:
+        ret = pmmDeallocate(ap->dealloc.buffer);
+        PMM_DEBUG("Dealloc buffer=%x ret=%x\n", ap->dealloc.buffer, ret);
+        break;
+
+    default:
+        PMM_DEBUG("Invalid function:%d\n", ap->function);
+        break;
+    }
+
+    return ret;
+}
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/rombios/32bit/rombios_compat.h
--- a/tools/firmware/rombios/32bit/rombios_compat.h     Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/rombios/32bit/rombios_compat.h     Wed Jan 28 13:06:45 
2009 +0900
@@ -89,4 +89,8 @@ static inline void write_byte(Bit16u seg
        *addr = val;
 }
 
+#define X(idx, ret, fn, args...) ret fn (args);
+#include "32bitprotos.h"
+#undef X
+
 #endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/rombios/32bit/tcgbios/Makefile
--- a/tools/firmware/rombios/32bit/tcgbios/Makefile     Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/rombios/32bit/tcgbios/Makefile     Wed Jan 28 13:06:45 
2009 +0900
@@ -2,17 +2,17 @@ include $(XEN_ROOT)/tools/firmware/Rules
 include $(XEN_ROOT)/tools/firmware/Rules.mk
 
 TARGET  = tcgbiosext.o
-FILES   = tcgbios tpm_drivers
-OBJECTS = $(foreach f,$(FILES),$(f).o)
 
-CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I.. -I../..
 
-.PHONY: all clean
-
+.PHONY: all
 all: $(TARGET)
 
+.PHONY: clean
 clean:
-       rm -rf *.o $(TARGET)
+       rm -rf *.o $(TARGET) $(DEPS)
 
-$(TARGET): $(OBJECTS)
+$(TARGET): tcgbios.o tpm_drivers.o
        $(LD) $(LDFLAGS_DIRECT) -r $^ -o $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 
tools/firmware/rombios/32bit/tcgbios/tcgbios.c
--- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c    Wed Jan 28 12:22:58 
2009 +0900
+++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c    Wed Jan 28 13:06:45 
2009 +0900
@@ -26,7 +26,6 @@
 
 #include "util.h"
 #include "tcgbios.h"
-#include "32bitprotos.h"
 
 /* local structure and variables */
 struct ptti_cust {
@@ -259,6 +258,10 @@ uint8_t acpi_validate_entry(struct acpi_
 }
 
 
+/*
+   initialize the TCPA ACPI subsystem; find the ACPI tables and determine
+   where the TCPA table is.
+ */
 void tcpa_acpi_init(void)
 {
        struct acpi_20_rsdt *rsdt;
@@ -313,6 +316,16 @@ static void tcpa_reset_acpi_log(void)
 }
 
 
+/*
+ * Extend the ACPI log with the given entry by copying the
+ * entry data into the log.
+ * Input
+ *  Pointer to the structure to be copied into the log
+ *
+ * Output:
+ *  lower 16 bits of return code contain entry number
+ *  if entry number is '0', then upper 16 bits contain error code.
+ */
 uint32_t tcpa_extend_acpi_log(uint32_t entry_ptr)
 {
        uint32_t res = 0;
@@ -622,7 +635,8 @@ void tcpa_wake_event()
 }
 
 /*
- * add the boot device to the measurement log
+ * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
+ * the list of measurements.
  */
 void tcpa_add_bootdevice(uint32_t bootcd, uint32_t bootdrv)
 {
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitgateway.c
--- a/tools/firmware/rombios/32bitgateway.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bitgateway.c     Wed Jan 28 13:06:45 2009 +0900
@@ -19,8 +19,10 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
  * Copyright (C) IBM Corporation, 2006
+ * Copyright (c) 2008, Citrix Systems, Inc.
  *
  * Author: Stefan Berger <stefanb@xxxxxxxxxx>
+ * Author: Keir Fraser <keir.fraser@xxxxxxxxxx>
  */
 
 /*
@@ -34,389 +36,142 @@
  *  (4 bytes) even for uint16_t, so casting to 32bit from bcc is a good idea.
  */
 
-#define SEGMENT_OFFSET  0xf0000
-#define REAL_MODE_CODE_SEGMENT  0xf000
+/* At most 32 bytes in argument list to a 32-bit function. */
+#define MAX_ARG_BYTES 32
 
-#define START_PM_CODE  USE32
-#define END_PM_CODE    USE16
+#define REAL_MODE_CODE_OFFSET  0xf0000
 
-/* definition of used code/data segment descriptors */
-#define PM_NORMAL_CS (gdt_entry_pm_cs       - gdt_base)
+/* Definitions of code/data segment descriptors. */
+#define PM_32BIT_CS  (gdt_entry_pm_32bit_cs - gdt_base)
 #define PM_16BIT_CS  (gdt_entry_pm_16bit_cs - gdt_base)
 #define PM_32BIT_DS  (gdt_entry_pm_32bit_ds - gdt_base)
+#define PM_16BIT_DS  (gdt_entry_pm_16bit_ds - gdt_base)
 
-  ASM_START
+    .align 16
+gdt_base:
+    .word 0,0
+    .byte 0,0,0,0
+gdt_entry_pm_32bit_cs:
+    .word 0xffff, 0x0000
+    .byte 0x00, 0x9b, 0xcf, 0x00
+gdt_entry_pm_16bit_cs:
+    .word 0xffff, 0x0000
+    .byte REAL_MODE_CODE_OFFSET >> 16, 0x9b, 0x0, 0x0
+gdt_entry_pm_32bit_ds:
+    .word 0xffff, 0x0000
+    .byte 0x0, 0x93, 0xcf, 0x0
+gdt_entry_pm_16bit_ds:
+    .word 0xffff, 0x0000
+    .byte 0x0, 0x93, 0x0, 0x0
+gdt_entry_end:
 
-    ; Switch into protected mode to allow access to 32 bit addresses.
-    ; This function allows switching into protected mode.
-    ; (the specs says big real mode, but that will not work)
+protmode_gdtdesc:
+    .word (gdt_entry_end - gdt_base) - 1
+    .long gdt_base | REAL_MODE_CODE_OFFSET
+
+realmode_gdtdesc:
+    .word 0xffff
+    .long 0x0
+
+Upcall:
+    ; Do an upcall into 32 bit space
     ;
-    ; preserves all registers and prepares cs, ds, es, ss for usage
-    ; in protected mode; while in prot.mode interrupts remain disabled
-switch_to_protmode:
+    ; Input:
+    ; bx: index of function to call
+    ; Ouput:
+    ; dx, ax: 32 bit result of call (even if 'void' is expected)
+
+    ; Save caller state, stack frame offsets listed below
+#define esp_off     0
+#define ss_off      4
+#define es_off      6
+#define ds_off      8
+#define flags_off   10
+#define retaddr_off 12
+#define args_off    14
+    pushf
     cli
+    push ds
+    push es
+    push ss
+    push esp
 
-    ; have to fix the stack for proper return address in 32 bit mode
-    push WORD #(REAL_MODE_CODE_SEGMENT>>12)    ;extended return address
-    push bp                                    ;pop@A1
-    mov bp, sp
-    push eax                                   ;pop@A2
-    mov eax, 2[bp]                             ; fix return address
-    rol eax, #16
-    mov 2[bp], eax
-
-    mov eax, esp
-    ror eax, #16                               ; hi(esp)
-
-    push bx                                    ; preserve before function call
-    push cx
-    push dx
-
-    push ax                                    ; prepare stack for
-    push es                                    ; call
-    push ds
-    push cs
-    push ss
-    call _store_segment_registers
-    add sp, #10                                        ; pop ax,es-ss
-
-    pop dx                                     ; restore after function call
-    pop cx
-    pop bx
-
-    ; calculate protected-mode esp from ss:sp
+    ; Calculate protected-mode esp from ss:sp
     and esp, #0xffff
     xor eax, eax
     mov ax, ss
-    rol eax, #4
-    add eax, esp
-    mov esp, eax
+    shl eax, #4
+    add esp, eax
 
+    ; Switch to protected mode
     seg cs
-    lgdt my_gdtdesc                            ; switch to own table
-
+    lgdt protmode_gdtdesc
     mov eax, cr0
-    or al, #0x1                                ; protected mode 'on'
+    or al, #0x1  ; protected mode on
     mov cr0, eax
-
-    jmpf DWORD (SEGMENT_OFFSET | switch_to_protmode_goon_1), #PM_NORMAL_CS
-
-    START_PM_CODE
-
-switch_to_protmode_goon_1:
-    mov ax, #PM_32BIT_DS                       ; 32 bit segment that allows
-    mov ds, ax                                 ; to reach all 32 bit
-    mov es, ax                                 ; addresses
+    jmpf DWORD (REAL_MODE_CODE_OFFSET|upcall1), #PM_32BIT_CS
+upcall1:
+    USE32
+    mov ax, #PM_32BIT_DS
+    mov ds, ax
+    mov es, ax
     mov ss, ax
 
-    pop eax                                    ;@A2
-    pop bp                                     ;@A1
-    ret
+    ; Marshal arguments and call 32-bit function
+    mov ecx, #MAX_ARG_BYTES/4
+upcall2:
+    push MAX_ARG_BYTES-4+args_off[esp]
+    loop upcall2
+    mov eax, [BIOS_INFO_PHYSICAL_ADDRESS + BIOSINFO_OFF_bios32_entry]
+    call eax
+    add esp, #MAX_ARG_BYTES
+    mov ecx, eax  ; Result in ecx
 
-    END_PM_CODE
-
-
-
-    .align 16
-gdt_base:
-    ; see Intel SW Dev. Manuals section 3.4.5, Volume 3 for meaning of bits
-    .word 0,0
-    .byte 0,0,0,0
-
-gdt_entry_pm_cs:
-    ; 32 bit code segment for protected mode
-    .word 0xffff, 0x0000
-    .byte 0x00, 0x9a, 0xcf, 0x00
-
-gdt_entry_pm_16bit_cs:
-    ; temp. 16 bit code segment used while in protected mode
-    .word 0xffff, 0x0000
-    .byte SEGMENT_OFFSET >> 16, 0x9a, 0x0, 0x0
-
-gdt_entry_pm_32bit_ds:
-    ; (32 bit) data segment (r/w) reaching all possible areas in 32bit memory
-    ; 4kb granularity
-    .word 0xffff, 0x0000
-    .byte 0x0, 0x92, 0xcf, 0x0
-gdt_entry_end:
-
-my_gdtdesc:
-    .word (gdt_entry_end - gdt_base) - 1
-    .long gdt_base | SEGMENT_OFFSET
-
-
-realmode_gdtdesc:                              ;to be used in real mode
-    .word 0xffff
-    .long 0x0
-
-
-
-switch_to_realmode:
-    ; Implementation of switching from protected mode to real mode
-    ; prepares cs, es, ds, ss to be used in real mode
-    ; spills   eax
-    START_PM_CODE
-
-    ; need to fix up the stack to return in 16 bit mode
-    ; currently the 32 bit return address is on the stack
-    pop eax
-    push ax
-
-    push bx                                    ;pop@1
-    push si                                    ;pop@2
-
-    call _ebda_ss_offset32                     ; get the offset of the ss
-    mov bx, ax                                 ; entry within the ebda.
-
-    jmpf switch_to_realmode_goon_1, #PM_16BIT_CS
-
-    END_PM_CODE
-
-switch_to_realmode_goon_1:
-    mov eax, cr0
-    and al, #0xfe                              ; protected mode 'off'
-    mov cr0, eax
-
-    jmpf switch_to_realmode_goon_2, #REAL_MODE_CODE_SEGMENT
-
-switch_to_realmode_goon_2:
-
-    ; get orig. 'ss' without using the stack (no 'call'!)
-    xor eax, eax                       ; clear upper 16 bits (and lower)
-    mov ax, #0x40                      ; where is the ebda located?
-    mov ds, ax
-    mov si, #0xe
-    seg ds
-    mov ax, [si]                       ; ax = segment of ebda
-
-    mov ds, ax                         ; segment of ebda
-    seg ds
-    mov ax, [bx]                       ; stack segment - bx has been set above
-    mov ss, ax
-
-    ; from esp and ss calculate real-mode sp
-    rol eax, #4
+    ; Restore real-mode stack pointer
+    xor eax, eax
+    mov ax, ss_off[esp]
+    mov bx, ax    ; Real-mode ss in bx
+    shl eax, 4
     sub esp, eax
 
-    push dx                            ;preserve before call(s)
-    push cx
-    push bx
-
-    call _get_register_ds              ; get orig. 'ds'
+    ; Return to real mode
+    jmpf upcall3, #PM_16BIT_CS
+upcall3:
+    USE16
+    mov ax, #PM_16BIT_DS
     mov ds, ax
-    call _get_register_es              ; get orig. 'es'
     mov es, ax
-    call _get_register_esp_hi          ; fix the upper 16 bits of esp
-    ror esp, #16
-    mov sp, ax
-    rol esp, #16
-
-    pop bx
-    pop cx
-    pop dx
-
+    mov ss, ax
+    mov eax, cr0
+    and al, #0xfe ; protected mode off
+    mov cr0, eax
+    jmpf upcall4, #REAL_MODE_CODE_OFFSET>>4
+upcall4:
     seg cs
     lgdt realmode_gdtdesc
 
-    sti                                                ; allow interrupts
+    ; Restore real-mode ss
+    mov ss, bx
 
-    pop si                                     ;@2
-    pop bx                                     ;@1
+    ; Convert result into dx:ax format
+    mov eax, ecx
+    ror eax, #16
+    mov dx, ax
+    ror eax, #16
 
+    ; Restore caller state and return
+    pop esp
+    pop bx ; skip ss
+    pop es
+    pop ds
+    popf
     ret
 
-    ASM_END
-
-/*
- * Helper function to get the offset of the reg_ss within the ebda struct
- * Only 'C' can tell the offset.
- */
-Bit16u
-ebda_ss_offset32()
-{
-    ASM_START
-    START_PM_CODE                              // need to have this
-    ASM_END                                    // compiled for protected mode
-    return &EbdaData->upcall.reg_ss;           // 'C' knows the offset!
-    ASM_START
-    END_PM_CODE
-    ASM_END
-}
-
-/*
- * Two often-used functions
- */
-Bit16u
-read_word_from_ebda(offset)
-    Bit16u offset;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       return read_word(ebda_seg, offset);
-}
-
-Bit32u
-read_dword_from_ebda(offset)
-    Bit16u offset;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       return read_dword(ebda_seg, offset);
-}
-
-/*
- * Store registers in the EBDA; used to keep the registers'
- * content in a well-defined place during protected mode execution
- */
-  void
-store_segment_registers(ss, cs, ds, es, esp_hi)
-  Bit16u ss, cs, ds, es, esp_hi;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       write_word(ebda_seg, &EbdaData->upcall.reg_ss, ss);
-       write_word(ebda_seg, &EbdaData->upcall.reg_cs, cs);
-       write_word(ebda_seg, &EbdaData->upcall.reg_ds, ds);
-       write_word(ebda_seg, &EbdaData->upcall.reg_es, es);
-       write_word(ebda_seg, &EbdaData->upcall.esp_hi, esp_hi);
-}
-
-
-  void
-store_returnaddress(retaddr)
-   Bit16u retaddr;
-{
-       Bit16u ebda_seg = read_word(0x0040, 0x000E);
-       write_word(ebda_seg, &EbdaData->upcall.retaddr, retaddr);
-}
-
-Bit16u
-get_returnaddress()
-{
-       return read_word_from_ebda(&EbdaData->upcall.retaddr);
-}
-
-/*
- * get the segment register 'cs' value from the EBDA
- */
-Bit16u
-get_register_cs()
-{
-       return read_word_from_ebda(&EbdaData->upcall.reg_cs);
-}
-
-/*
- * get the segment register 'ds' value from the EBDA
- */
-Bit16u
-get_register_ds()
-{
-       return read_word_from_ebda(&EbdaData->upcall.reg_ds);
-}
-
-/*
- * get the segment register 'es' value from the EBDA
- */
-Bit16u
-get_register_es()
-{
-       return read_word_from_ebda(&EbdaData->upcall.reg_es);
-}
-
-/*
- * get the upper 16 bits of the esp from the EBDA
- */
-Bit16u
-get_register_esp_hi()
-{
-       return read_word_from_ebda(&EbdaData->upcall.esp_hi);
-}
-
-
-
-/********************************************************/
-
-
-ASM_START
-
-Upcall:
-       ; do the upcall into 32 bit space
-       ; clear the stack frame so that 32 bit space sees all the parameters
-       ; on the stack as if they were prepared for it
-       ; ---> take the 16 bit return address off the stack and remember it
-       ;
-       ; Input:
-       ; bx: index of function to call
-       ; Ouput:
-       ; dx, ax: 32 bit result of call (even if 'void' is expected)
-
-       push bp                         ;pop @1
-       mov bp, sp
-       push si                         ;pop @2
-
-       mov ax, 2[bp]                   ; 16 bit return address
-       push ax
-       call _store_returnaddress       ; store away
-       pop ax
-
-       ; XXX GDT munging requires ROM to be writable!
-       call _enable_rom_write_access
-
-       rol bx, #2
-       mov si, #jmptable
-       seg cs
-       mov eax, dword ptr [si+bx]      ; address to call from table
-
-       pop si                          ;@2
-       pop bp                          ;@1
-
-       add sp, #2                      ; remove 16bit return address from stack
-
-       call switch_to_protmode
-       START_PM_CODE
-
-       call eax                        ; call 32bit function
-       push eax                        ; preserve result
-
-       call switch_to_realmode         ; back to realmode
-       END_PM_CODE
-
-       pop eax                         ; get result
-
-       push word 0x0000                ; placeholder for 16 bit return address
-       push bp
-       mov bp,sp
-       push eax                        ; preserve work register
-
-       call _disable_rom_write_access
-
-       call _get_returnaddress
-       mov 2[bp], ax                   ; 16bit return address onto stack
-
-       pop eax
-       pop bp
-
-       ror eax, #16                    ; result into dx/ax
-       mov dx, ax                      ; hi(res) -> dx
-       ror eax, #16
-
-       ret
-
-
-/* macro for functions to declare their call into 32bit space */
 MACRO DoUpcall
-       mov bx, #?1
-       jmp Upcall
+    mov bx, #?1
+    jmp Upcall
 MEND
 
-
-ASM_END
-
+#define X(idx, ret, fn, args...) _ ## fn: DoUpcall(idx)
 #include "32bitprotos.h"
-#include "32bitgateway.h"
-
-#include "tcgbios.c"
-
-Bit32u get_s3_waking_vector()
-{
-       ASM_START
-       DoUpcall(IDX_GET_S3_WAKING_VECTOR)
-       ASM_END
-}
+#undef X
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitgateway.h
--- a/tools/firmware/rombios/32bitgateway.h     Wed Jan 28 12:22:58 2009 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#ifndef GATEWAY
-#define GATEWAY
-
-#include "32bitprotos.h"
-
-void test_gateway();
-
-/* extension for the EBDA */
-typedef struct {
-  Bit16u reg_ss;
-  Bit16u reg_cs;
-  Bit16u reg_ds;
-  Bit16u reg_es;
-  Bit16u esp_hi;
-  Bit16u retaddr;
-} upcall_t;
-
-#endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/32bitprotos.h
--- a/tools/firmware/rombios/32bitprotos.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/32bitprotos.h      Wed Jan 28 13:06:45 2009 +0900
@@ -1,47 +1,16 @@
-#ifndef PROTOS_HIGHBIOS
-#define PROTOS_HIGHBIOS
-
-/* shared include file for bcc and gcc */
-
-/* bcc does not like 'enum' */
-#define IDX_TCGINTERRUPTHANDLER            0
-#define IDX_TCPA_ACPI_INIT                 1
-#define IDX_TCPA_EXTEND_ACPI_LOG           2
-#define IDX_TCPA_CALLING_INT19H            3
-#define IDX_TCPA_RETURNED_INT19H           4
-#define IDX_TCPA_ADD_EVENT_SEPARATORS      5
-#define IDX_TCPA_WAKE_EVENT                6
-#define IDX_TCPA_ADD_BOOTDEVICE            7
-#define IDX_TCPA_START_OPTION_ROM_SCAN     8
-#define IDX_TCPA_OPTION_ROM                9
-#define IDX_TCPA_IPL                       10
-#define IDX_TCPA_INITIALIZE_TPM            11
-#define IDX_TCPA_MEASURE_POST              12
-#define IDX_GET_S3_WAKING_VECTOR           13
-#define IDX_LAST                           14 /* keep last! */
-
-#ifdef GCC_PROTOS
-  #define PARMS(x...) x
-#else
-  /* bcc doesn't want any parameter types in prototypes */
-  #define PARMS(x...)
-#endif
-
-Bit32u TCGInterruptHandler( PARMS(pushad_regs_t *regs, Bit32u esds, Bit32u 
flags_ptr));
-
-void tcpa_acpi_init( PARMS(void) );
-Bit32u tcpa_extend_acpi_log( PARMS(Bit32u entry_ptr) );
-void tcpa_calling_int19h( PARMS(void) );
-void tcpa_returned_int19h( PARMS(void) );
-void tcpa_add_event_separators( PARMS(void) );
-void tcpa_wake_event( PARMS(void) );
-void tcpa_add_bootdevice( PARMS(Bit32u bootcd, Bit32u bootdrv) );
-void tcpa_start_option_rom_scan( PARMS(void) );
-void tcpa_option_rom( PARMS(Bit32u seg) );
-void tcpa_ipl( PARMS(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count) );
-void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) );
-Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) );
-
-Bit32u get_s3_waking_vector( PARMS(void) );
-
-#endif
+X(0,  Bit32u, TCGInterruptHandler,
+  pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr)
+X(1,  void,   tcpa_acpi_init, void)
+X(2,  Bit32u, tcpa_extend_acpi_log, Bit32u entry_ptr)
+X(3,  void,   tcpa_calling_int19h,void)
+X(4,  void,   tcpa_returned_int19h, void)
+X(5,  void,   tcpa_add_event_separators, void)
+X(6,  void,   tcpa_wake_event, void)
+X(7,  void,   tcpa_add_bootdevice, Bit32u bootcd, Bit32u bootdrv)
+X(8,  void,   tcpa_start_option_rom_scan, void)
+X(9,  void,   tcpa_option_rom, Bit32u seg)
+X(10, void,   tcpa_ipl, Bit32u bootcd, Bit32u seg, Bit32u off, Bit32u count)
+X(11, void,   tcpa_measure_post, Bit32u from, Bit32u to)
+X(12, Bit32u, tcpa_initialize_tpm, Bit32u physpres)
+X(13, Bit32u, get_s3_waking_vector, void)
+X(14, Bit32u, pmm, void *argp)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/Makefile
--- a/tools/firmware/rombios/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -13,6 +13,7 @@ clean: subdirs-clean
        rm -f  as86-sym.txt ld86-sym.txt 
        rm -f  rombios*.txt rombios*.sym usage biossums
        rm -f  BIOS-bochs-*
+       rm -f  $(DEPS)
 
 BIOS-bochs-latest: rombios.c biossums 32bitgateway.c tcgbios.c
        gcc -DBX_SMP_PROCESSORS=1 -E -P $< > _rombios_.c
@@ -27,3 +28,4 @@ biossums: biossums.c
 biossums: biossums.c
        gcc -o biossums biossums.c
 
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/rombios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -161,6 +161,8 @@
 
 #define BX_TCGBIOS       0   /* main switch for TCG BIOS ext. */
 
+#define BX_PMM           1   /* POST Memory Manager */
+
 #define BX_MAX_ATA_INTERFACES   4
 #define BX_MAX_ATA_DEVICES      (BX_MAX_ATA_INTERFACES*2)
 
@@ -726,7 +728,9 @@ typedef struct {
     } cdemu_t;
 #endif // BX_ELTORITO_BOOT
 
-#include "32bitgateway.h"
+#define X(idx, ret, fn, arg...) ret fn ();
+#include "32bitprotos.h"
+#undef X
 
   // for access to EBDA area
   //     The EBDA structure should conform to
@@ -752,8 +756,6 @@ typedef struct {
     // El Torito Emulation data
     cdemu_t cdemu;
 #endif // BX_ELTORITO_BOOT
-
-    upcall_t upcall;
     } ebda_data_t;
 
   #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
@@ -1416,31 +1418,24 @@ fixup_base_mem_in_k()
   write_word(0x40, 0x13, base_mem >> 10);
 }
 
-void
-set_rom_write_access(action)
-  Bit16u action;
-{
-    Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob;
 ASM_START
-    mov si,.set_rom_write_access.off[bp]
+_rom_write_access_control:
     push ds
-    mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4)
+    mov ax,#(BIOS_INFO_PHYSICAL_ADDRESS >> 4)
     mov ds,ax
-    mov dx,[si]
+    mov ax,[BIOSINFO_OFF_xen_pfiob]
     pop ds
-    mov ax,.set_rom_write_access.action[bp]
-    out dx,al
+    ret
 ASM_END
-}
 
 void enable_rom_write_access()
 {
-    set_rom_write_access(0);
+    outb(rom_write_access_control(), 0);
 }
 
 void disable_rom_write_access()
 {
-    set_rom_write_access(PFFLAG_ROM_LOCK);
+    outb(rom_write_access_control(), PFFLAG_ROM_LOCK);
 }
     
 #endif /* HVMASSIST */
@@ -2054,7 +2049,10 @@ print_bios_banner()
   "rombios32 "
 #endif
 #if BX_TCGBIOS
-  "TCG-enabled"
+  "TCG-enabled "
+#endif
+#if BX_PMM
+  "PMM "
 #endif
   "\n\n");
 }
@@ -9499,8 +9497,9 @@ use16 386
 
 #endif
 
+#include "32bitgateway.c"
 ASM_END
-#include "32bitgateway.c"
+#include "tcgbios.c"
 ASM_START
 
 ;--------------------
@@ -10355,6 +10354,48 @@ rombios32_gdt:
   dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff
 #endif // BX_ROMBIOS32
 
+#if BX_PMM
+; according to POST Memory Manager Specification Version 1.01
+.align 16
+pmm_structure:
+  db 0x24,0x50,0x4d,0x4d ;; "$PMM" signature
+  db 0x01 ;; revision
+  db 16 ;; length
+  db (-((pmm_entry_point>>8)+pmm_entry_point+0x20f))&0xff;; checksum
+  dw pmm_entry_point,0xf000 ;; far call entrypoint
+  db 0,0,0,0,0 ;; reserved
+
+pmm_entry_point:
+  pushf
+  pushad
+; Calculate protected-mode address of PMM function args
+  xor  eax, eax
+  mov  ax, sp
+  xor  ebx, ebx
+  mov  bx, ss
+  shl  ebx, 4
+  lea  ebx, [eax+ebx+38] ;; ebx=(ss<<4)+sp+4(far call)+2(pushf)+32(pushad)
+  push ebx
+;
+; Stack layout at this point:
+;
+;        : +0x0    +0x2    +0x4    +0x6    +0x8    +0xa    +0xc    +0xe
+; -----------------------------------------------------------------------
+; sp     : [&arg1         ][edi           ][esi           ][ebp           ]
+; sp+0x10: [esp           ][ebx           ][edx           ][ecx           ]
+; sp+0x20: [eax           ][flags ][ip    ][cs    ][arg1  ][arg2, ...
+;
+  call _pmm
+  mov  bx, sp
+SEG SS
+  mov  [bx+0x20], ax
+SEG SS
+  mov  [bx+0x18], dx
+  pop  ebx
+  popad
+  popf
+  retf
+#endif // BX_PMM
 
 ; parallel port detection: base address in DX, index in BX, timeout in CL
 detect_parport:
@@ -10447,7 +10488,9 @@ rom_scan:
   ;;   3         ROM initialization entry point (FAR CALL)
 
 #if BX_TCGBIOS
+  push ax
   call _tcpa_start_option_rom_scan    /* specs: 3.2.3.3 + 10.4.3 */
+  pop ax
 #endif
 
 rom_scan_loop:
@@ -11790,15 +11833,6 @@ static Bit8u vgafont8[128*8]=
 #ifdef HVMASSIST
 ASM_START
 
-// space for addresses in 32bit BIOS area; currently 256/4 entries
-// are allocated
-.org 0xcb00
-jmptable:
-db 0x5F, 0x5F, 0x5F, 0x4A, 0x4D, 0x50, 0x54 ;; ___JMPT
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;;  64 bytes
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 128 bytes
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 192 bytes
-
 //
 // MP Tables
 // just carve out some blank space for HVMLOADER to write the MP tables to
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/rombios/tcgbios.c
--- a/tools/firmware/rombios/tcgbios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/rombios/tcgbios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -25,162 +25,6 @@
   Support for TCPA ACPI logging
  ******************************************************************/
 
-/*
- * Extend the ACPI log with the given entry by copying the
- * entry data into the log.
- * Input
- *  Pointer to the structure to be copied into the log
- *
- * Output:
- *  lower 16 bits of return code contain entry number
- *  if entry number is '0', then upper 16 bits contain error code.
- */
-Bit32u tcpa_extend_acpi_log(entry_ptr)
-    Bit32u entry_ptr;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_EXTEND_ACPI_LOG)
-       ASM_END
-}
-
-
-/*
-   initialize the TCPA ACPI subsystem; find the ACPI tables and determine
-   where the TCPA table is.
- */
- void
-tcpa_acpi_init()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_ACPI_INIT)
-       ASM_END
-}
-
-
-/*
- * Add measurement to log about call of int 19h
- */
- void
-tcpa_calling_int19h()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_CALLING_INT19H)
-       ASM_END
-}
-
-/*
- * Add measurement to log about retuning from int 19h
- */
- void
-tcpa_returned_int19h()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_RETURNED_INT19H)
-       ASM_END
-}
-
-/*
- * Add event separators for PCRs 0 to 7; specs 8.2.3
- */
- void
-tcpa_add_event_separators()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_ADD_EVENT_SEPARATORS)
-       ASM_END
-}
-
-
-/*
- * Add a wake event to the log
- */
- void
-tcpa_wake_event()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_WAKE_EVENT)
-       ASM_END
-}
-
-
-/*
- * Add measurement to the log about option rom scan
- * 10.4.3 : action 14
- */
- void
-tcpa_start_option_rom_scan()
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_START_OPTION_ROM_SCAN)
-       ASM_END
-}
-
-
-/*
- * Add measurement to the log about an option rom
- */
- void
-tcpa_option_rom(seg)
-    Bit32u seg;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_OPTION_ROM)
-       ASM_END
-}
-
-/*
- * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
- * the list of measurements.
- */
-void
- tcpa_add_bootdevice(bootcd, bootdrv)
-  Bit32u bootcd;
-  Bit32u bootdrv;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_ADD_BOOTDEVICE)
-       ASM_END
-}
-
-/*
- * Add a measurement to the log in support of 8.2.5.3
- * Creates two log entries
- *
- * Input parameter:
- *  seg    : segment where the IPL data are located
- */
- void
- tcpa_ipl(bootcd,seg,off,count)
-    Bit32u bootcd;
-    Bit32u seg;
-    Bit32u off;
-    Bit32u count;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_IPL)
-       ASM_END
-}
-
-
-Bit32u
-tcpa_initialize_tpm(physpres)
-  Bit32u physpres;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_INITIALIZE_TPM)
-       ASM_END
-}
-
-void
-tcpa_measure_post(from, to)
-   Bit32u from;
-   Bit32u to;
-{
-       ASM_START
-       DoUpcall(IDX_TCPA_MEASURE_POST)
-       ASM_END
-}
-
 ASM_START
 MACRO POST_MEASURE
        push word #0x000f
@@ -205,18 +49,6 @@ tcpa_do_measure_POSTs()
        POST_MEASURE(timer_tick_post, int76_handler)
 
        ret
-       ASM_END
-}
-
-Bit32u
-TCGInterruptHandler(regs_ptr, es, ds, flags_ptr)
-   Bit32u regs_ptr;
-   Bit16u es;
-   Bit16u ds;
-   Bit32u flags_ptr;
-{
-       ASM_START
-       DoUpcall(IDX_TCGINTERRUPTHANDLER)
        ASM_END
 }
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbe.c
--- a/tools/firmware/vgabios/vbe.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vbe.c      Wed Jan 28 13:06:45 2009 +0900
@@ -37,8 +37,6 @@
 
 #include "vbe.h"
 #include "vbetables.h"
-
-#define VBE_TOTAL_VIDEO_MEMORY_DIV_64K 
(VBE_DISPI_TOTAL_VIDEO_MEMORY_MB*1024/64)
 
 // The current OEM Software Revision of this VBE Bios
 #define VBE_OEM_SOFTWARE_REV 0x0002;
@@ -821,7 +819,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
         vbe_info_block.VideoModePtr_Off= DI + 34;
 
         // VBE Total Memory (in 64b blocks)
-        vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K;
+        outw(VBE_DISPI_IOPORT_INDEX, VBE_DISPI_INDEX_VIDEO_MEMORY_64K);
+        vbe_info_block.TotalMemory = inw(VBE_DISPI_IOPORT_DATA);
 
         if (vbe2_info)
        {
@@ -846,7 +845,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
         do
         {
                 if ((cur_info->info.XResolution <= dispi_get_max_xres()) &&
-                    (cur_info->info.BitsPerPixel <= dispi_get_max_bpp())) {
+                    (cur_info->info.BitsPerPixel <= dispi_get_max_bpp()) &&
+                    (cur_info->info.XResolution * cur_info->info.XResolution * 
cur_info->info.BitsPerPixel <= vbe_info_block.TotalMemory << 19 )) {
 #ifdef DEBUG
                   printf("VBE found mode %x => %x\n", cur_info->mode,cur_mode);
 #endif
@@ -855,7 +855,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
                   cur_ptr+=2;
                 } else {
 #ifdef DEBUG
-                  printf("VBE mode %x (xres=%x / bpp=%02x) not supported by 
display\n", 
cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
+                  printf("VBE mode %x (xres=%x / bpp=%02x) not supported \n", 
cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
 #endif
                 }
                 cur_info++;
@@ -913,7 +913,13 @@ Bit16u *AX;Bit16u CX; Bit16u ES;Bit16u D
                   info.WinFuncPtr = 0xC0000000UL;
                   *(Bit16u *)&(info.WinFuncPtr) = 
(Bit16u)(dispi_set_bank_farcall);
                 }
-                
+                outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_H);
+                info.PhysBasePtr = inw(VBE_DISPI_IOPORT_DATA);
+                info.PhysBasePtr = info.PhysBasePtr << 16;
+#if 0                                  
+                outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_L);
+                info.PhysBasePtr |= inw(VBE_DISPI_IOPORT_DATA);
+#endif                                                         
                 result = 0x4f;
         }
         else
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbe.h
--- a/tools/firmware/vgabios/vbe.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vbe.h      Wed Jan 28 13:06:45 2009 +0900
@@ -275,39 +275,41 @@ typedef struct ModeInfoListItem
 //        like 0xE0000000
 
 
-  #define VBE_DISPI_BANK_ADDRESS          0xA0000
-  #define VBE_DISPI_BANK_SIZE_KB          64
+  #define VBE_DISPI_BANK_ADDRESS           0xA0000
+  #define VBE_DISPI_BANK_SIZE_KB           64
   
-  #define VBE_DISPI_MAX_XRES              1024
-  #define VBE_DISPI_MAX_YRES              768
+  #define VBE_DISPI_MAX_XRES               2560
+  #define VBE_DISPI_MAX_YRES               1600
   
-  #define VBE_DISPI_IOPORT_INDEX          0x01CE
-  #define VBE_DISPI_IOPORT_DATA           0x01CF
+  #define VBE_DISPI_IOPORT_INDEX           0x01CE
+  #define VBE_DISPI_IOPORT_DATA            0x01CF
   
-  #define VBE_DISPI_INDEX_ID              0x0
-  #define VBE_DISPI_INDEX_XRES            0x1
-  #define VBE_DISPI_INDEX_YRES            0x2
-  #define VBE_DISPI_INDEX_BPP             0x3
-  #define VBE_DISPI_INDEX_ENABLE          0x4
-  #define VBE_DISPI_INDEX_BANK            0x5
-  #define VBE_DISPI_INDEX_VIRT_WIDTH      0x6
-  #define VBE_DISPI_INDEX_VIRT_HEIGHT     0x7
-  #define VBE_DISPI_INDEX_X_OFFSET        0x8
-  #define VBE_DISPI_INDEX_Y_OFFSET        0x9
-      
-  #define VBE_DISPI_ID0                   0xB0C0
-  #define VBE_DISPI_ID1                   0xB0C1
-  #define VBE_DISPI_ID2                   0xB0C2
-  #define VBE_DISPI_ID3                   0xB0C3
-  #define VBE_DISPI_ID4                   0xB0C4
-  
-  #define VBE_DISPI_DISABLED              0x00
-  #define VBE_DISPI_ENABLED               0x01
-  #define VBE_DISPI_GETCAPS               0x02
-  #define VBE_DISPI_8BIT_DAC              0x20
-  #define VBE_DISPI_LFB_ENABLED           0x40
-  #define VBE_DISPI_NOCLEARMEM            0x80
-  
-  #define VBE_DISPI_LFB_PHYSICAL_ADDRESS  0xE0000000
+  #define VBE_DISPI_INDEX_ID               0x0
+  #define VBE_DISPI_INDEX_XRES             0x1
+  #define VBE_DISPI_INDEX_YRES             0x2
+  #define VBE_DISPI_INDEX_BPP              0x3
+  #define VBE_DISPI_INDEX_ENABLE           0x4
+  #define VBE_DISPI_INDEX_BANK             0x5
+  #define VBE_DISPI_INDEX_VIRT_WIDTH       0x6
+  #define VBE_DISPI_INDEX_VIRT_HEIGHT      0x7
+  #define VBE_DISPI_INDEX_X_OFFSET         0x8
+  #define VBE_DISPI_INDEX_Y_OFFSET         0x9
+  #define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
+  #define VBE_DISPI_INDEX_LFB_ADDRESS_H    0xb
+  #define VBE_DISPI_INDEX_LFB_ADDRESS_L    0xc
+
+  #define VBE_DISPI_LFB_PHYSICAL_ADDRESS   0xE0000000
+  #define VBE_DISPI_ID0                    0xB0C0
+  #define VBE_DISPI_ID1                    0xB0C1
+  #define VBE_DISPI_ID2                    0xB0C2
+  #define VBE_DISPI_ID3                    0xB0C3
+  #define VBE_DISPI_ID4                    0xB0C4
+
+  #define VBE_DISPI_DISABLED               0x00
+  #define VBE_DISPI_ENABLED                0x01
+  #define VBE_DISPI_GETCAPS                0x02
+  #define VBE_DISPI_8BIT_DAC               0x20
+  #define VBE_DISPI_LFB_ENABLED            0x40
+  #define VBE_DISPI_NOCLEARMEM             0x80
 
 #endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vbetables-gen.c
--- a/tools/firmware/vgabios/vbetables-gen.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vbetables-gen.c    Wed Jan 28 13:06:45 2009 +0900
@@ -2,7 +2,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 
-#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 8
+#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 16
 
 typedef struct {
     int width;
@@ -42,19 +42,40 @@ ModeInfo modes[] = {
 { 1600, 1200, 24                      , 0x11F},
 
       /* BOCHS/PLE, 86 'own' mode numbers */
-{ 320, 200, 32                        , 0x140},
-{ 640, 400, 32                        , 0x141},
-{ 640, 480, 32                        , 0x142},
-{ 800, 600, 32                        , 0x143},
-{ 1024, 768, 32                       , 0x144},
-{ 1280, 1024, 32                      , 0x145},
-{ 320, 200, 8                           , 0x146},
-{ 1600, 1200, 32                      , 0x147},
-{ 1152, 864, 8                      , 0x148},
+{ 320, 200, 32                       , 0x140},
+{ 640, 400, 32                       , 0x141},
+{ 640, 480, 32                       , 0x142},
+{ 800, 600, 32                       , 0x143},
+{ 1024, 768, 32                      , 0x144},
+{ 1280, 1024, 32                     , 0x145},
+{ 320, 200, 8                        , 0x146},
+{ 1600, 1200, 32                     , 0x147},
+{ 1152, 864, 8                       , 0x148},
 { 1152, 864, 15                      , 0x149},
 { 1152, 864, 16                      , 0x14a},
 { 1152, 864, 24                      , 0x14b},
 { 1152, 864, 32                      , 0x14c},
+{ 1280, 800, 16                      , 0x178},
+{ 1280, 800, 24                      , 0x179},
+{ 1280, 800, 32                      , 0x17a},
+{ 1280, 960, 16                      , 0x17b},
+{ 1280, 960, 24                      , 0x17c},
+{ 1280, 960, 32                      , 0x17d},
+{ 1440, 900, 16                      , 0x17e},
+{ 1440, 900, 24                      , 0x17f},
+{ 1440, 900, 32                      , 0x180},
+{ 1400, 1050, 16                     , 0x181},
+{ 1400, 1050, 24                     , 0x182},
+{ 1400, 1050, 32                     , 0x183},
+{ 1680, 1050, 16                     , 0x184},
+{ 1680, 1050, 24                     , 0x185},
+{ 1680, 1050, 32                     , 0x186},
+{ 1920, 1200, 16                     , 0x187},
+{ 1920, 1200, 24                     , 0x188},
+{ 1920, 1200, 32                     , 0x189},
+{ 2560, 1600, 16                     , 0x18a},
+{ 2560, 1600, 24                     , 0x18b},
+{ 2560, 1600, 32                     , 0x18c},
 { 0, },
 };
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/firmware/vgabios/vgabios.c
--- a/tools/firmware/vgabios/vgabios.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/firmware/vgabios/vgabios.c  Wed Jan 28 13:06:45 2009 +0900
@@ -3811,9 +3811,9 @@ void printf(s)
         for (i=0; i<format_width; i++) {
           nibble = (arg >> (4 * digit)) & 0x000f;
           if (nibble <= 9)
-            outb(0x0500, nibble + '0');
+            outb(0xe9, nibble + '0');
           else
-            outb(0x0500, (nibble - 10) + 'A');
+            outb(0xe9, (nibble - 10) + 'A');
           digit--;
           }
         in_format = 0;
@@ -3823,7 +3823,7 @@ void printf(s)
       //  }
       }
     else {
-      outb(0x0500, c);
+      outb(0xe9, c);
       }
     s ++;
     }
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/flask/libflask/Makefile
--- a/tools/flask/libflask/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/flask/libflask/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -16,7 +16,6 @@ CFLAGS   += $(INCLUDES) -I./include -I$(
 # Get gcc to generate the dependencies for us.
 CFLAGS   += -Wp,-MD,.$(@F).d
 LDFLAGS  += -L.
-DEPS     = .*.d
 
 LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
 PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/flask/loadpolicy/Makefile
--- a/tools/flask/loadpolicy/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/flask/loadpolicy/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -7,9 +7,6 @@ LIBFLASK_ROOT = $(XEN_ROOT)/tools/flask/
 
 PROFILE=#-pg
 BASECFLAGS=-Wall -g -Werror
-# Make gcc generate dependencies.
-BASECFLAGS += -Wp,-MD,.$(@F).d
-PROG_DEP = .*.d
 BASECFLAGS+= $(PROFILE)
 #BASECFLAGS+= -I$(XEN_ROOT)/tools
 BASECFLAGS+= $(CFLAGS_libxenctrl)
@@ -39,7 +36,7 @@ clean:
 clean: 
        rm -f *.o *.opic *.so
        rm -f $(CLIENTS)
-       $(RM) $(PROG_DEP)
+       $(RM) $(DEPS)
 
 .PHONY: print-dir
 print-dir:
@@ -54,7 +51,7 @@ install: all
        $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
        $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)$(SBINDIR)
 
--include $(PROG_DEP)
+-include $(DEPS)
 
 # never delete any intermediate files.
 .SECONDARY:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/fs-back/Makefile
--- a/tools/fs-back/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/fs-back/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -12,10 +12,6 @@ CFLAGS   += $(CFLAGS_libxenstore)
 CFLAGS   += $(CFLAGS_libxenstore)
 CFLAGS   += $(INCLUDES) -I.
 CFLAGS   += -D_GNU_SOURCE
-
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS      = .*.d
 
 LIBS      := -L. -L.. -L../lib
 LIBS      += $(LDFLAGS_libxenctrl)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/include/Makefile
--- a/tools/include/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/include/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -10,11 +10,12 @@ xen-foreign:
 
 xen/.dir:
        @rm -rf xen
-       mkdir xen
+       mkdir -p xen/libelf
        ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen
        ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) 
xen
        ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 
arch-x86 hvm io xsm) xen
        ln -sf ../xen-sys/$(XEN_OS) xen/sys
+       ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/xen/,libelf.h 
elfstructs.h) xen/libelf/
        ln -s ../xen-foreign xen/foreign
        touch $@
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/include/xen-foreign/reference.size
--- a/tools/include/xen-foreign/reference.size  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/include/xen-foreign/reference.size  Wed Jan 28 13:06:45 2009 +0900
@@ -1,7 +1,7 @@
 
 structs                   |  x86_32  x86_64    ia64
 
-start_info                |    1104    1152    1152
+start_info                |    1112    1168    1168
 trap_info                 |       8      16       -
 pt_fpreg                  |       -       -      16
 cpu_user_regs             |      68     200       -
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libaio/src/Makefile
--- a/tools/libaio/src/Makefile Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libaio/src/Makefile Wed Jan 28 13:06:45 2009 +0900
@@ -6,7 +6,7 @@ libdir=$(prefix)/lib
 libdir=$(prefix)/lib
 
 ARCH := $(shell uname -m | sed -e s/i.86/i386/)
-CFLAGS := -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
+CFLAGS = -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
 SO_CFLAGS=-shared $(CFLAGS)
 L_CFLAGS=$(CFLAGS)
 LINK_FLAGS=
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libfsimage/Rules.mk
--- a/tools/libfsimage/Rules.mk Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libfsimage/Rules.mk Wed Jan 28 13:06:45 2009 +0900
@@ -1,8 +1,6 @@ include $(XEN_ROOT)/tools/Rules.mk
 include $(XEN_ROOT)/tools/Rules.mk
 
-DEPS = .*.d
-
-CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror -Wp,-MD,.$(@F).d
+CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror
 LDFLAGS += -L../common/
 
 PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libfsimage/common/Makefile
--- a/tools/libfsimage/common/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libfsimage/common/Makefile  Wed Jan 28 13:06:45 2009 +0900
@@ -3,9 +3,6 @@ include $(XEN_ROOT)/tools/Rules.mk
 
 MAJOR = 1.0
 MINOR = 0
-
-CFLAGS += -Werror -Wp,-MD,.$(@F).d
-DEPS = .*.d
 
 LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS
 LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/Makefile      Wed Jan 28 13:06:45 2009 +0900
@@ -1,7 +1,7 @@ XEN_ROOT = ../..
 XEN_ROOT = ../..
 include $(XEN_ROOT)/tools/Rules.mk
 
-MAJOR    = 3.2
+MAJOR    = 3.4
 MINOR    = 0
 
 CTRL_SRCS-y       :=
@@ -62,10 +62,7 @@ CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
 # libraries.
 #CFLAGS   += -DVALGRIND -O0 -ggdb3
 
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
 LDFLAGS  += -L.
-DEPS     = .*.d
 
 CTRL_LIB_OBJS := $(patsubst %.c,%.o,$(CTRL_SRCS-y))
 CTRL_PIC_OBJS := $(patsubst %.c,%.opic,$(CTRL_SRCS-y))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core.c     Wed Jan 28 13:06:45 2009 +0900
@@ -57,9 +57,6 @@
 
 /* number of pages to write at a time */
 #define DUMP_INCREMENT (4 * 1024)
-
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
 
 /* string table */
 struct xc_core_strtab {
@@ -240,7 +237,7 @@ xc_core_ehdr_init(Elf64_Ehdr *ehdr)
     ehdr->e_ident[EI_ABIVERSION] = EV_CURRENT;
 
     ehdr->e_type = ET_CORE;
-    ehdr->e_machine = ELF_ARCH_MACHINE;
+    /* e_machine will be filled in later */
     ehdr->e_version = EV_CURRENT;
     ehdr->e_entry = 0;
     ehdr->e_phoff = 0;
@@ -359,7 +356,8 @@ elfnote_dump_core_header(
 }
 
 static int
-elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle)
+elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle,
+                         unsigned int guest_width)
 {
     int sts;
     struct elfnote elfnote;
@@ -371,6 +369,12 @@ elfnote_dump_xen_version(void *args, dum
     elfnote.descsz = sizeof(xen_version);
     elfnote.type = XEN_ELFNOTE_DUMPCORE_XEN_VERSION;
     elfnote_fill_xen_version(xc_handle, &xen_version);
+    if (guest_width < sizeof(unsigned long))
+    {
+        // 32 bit elf file format differs in pagesize's alignment
+        char *p = (char *)&xen_version.pagesize;
+        memmove(p - 4, p, sizeof(xen_version.pagesize));
+    }
     sts = dump_rtn(args, (char*)&elfnote, sizeof(elfnote));
     if ( sts != 0 )
         return sts;
@@ -396,6 +400,24 @@ elfnote_dump_format_version(void *args, 
     return dump_rtn(args, (char*)&format_version, sizeof(format_version));
 }
 
+static int
+get_guest_width(int xc_handle,
+                uint32_t domid,
+                unsigned int *guest_width)
+{
+    DECLARE_DOMCTL;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = domid;
+    domctl.cmd = XEN_DOMCTL_get_address_size;
+
+    if ( do_domctl(xc_handle, &domctl) != 0 )
+        return 1;
+        
+    *guest_width = domctl.u.address_size.size / 8;
+    return 0;
+}
+
 int
 xc_domain_dumpcore_via_callback(int xc_handle,
                                 uint32_t domid,
@@ -403,7 +425,8 @@ xc_domain_dumpcore_via_callback(int xc_h
                                 dumpcore_rtn_t dump_rtn)
 {
     xc_dominfo_t info;
-    shared_info_t *live_shinfo = NULL;
+    shared_info_any_t *live_shinfo = NULL;
+    unsigned int guest_width; 
 
     int nr_vcpus = 0;
     char *dump_mem, *dump_mem_start = NULL;
@@ -437,6 +460,12 @@ xc_domain_dumpcore_via_callback(int xc_h
     uint16_t strtab_idx;
     struct xc_core_section_headers *sheaders = NULL;
     Elf64_Shdr *shdr;
+ 
+    if ( get_guest_width(xc_handle, domid, &guest_width) != 0 )
+    {
+        PERROR("Could not get address size for domain");
+        return sts;
+    }
 
     xc_core_arch_context_init(&arch_ctxt);
     if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL )
@@ -500,7 +529,7 @@ xc_domain_dumpcore_via_callback(int xc_h
             goto out;
         }
 
-        sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
+        sts = xc_core_arch_map_p2m(xc_handle, guest_width, &info, live_shinfo,
                                    &p2m, &p2m_size);
         if ( sts != 0 )
             goto out;
@@ -676,6 +705,7 @@ xc_domain_dumpcore_via_callback(int xc_h
     /* write out elf header */
     ehdr.e_shnum = sheaders->num;
     ehdr.e_shstrndx = strtab_idx;
+    ehdr.e_machine = ELF_ARCH_MACHINE;
     sts = dump_rtn(args, (char*)&ehdr, sizeof(ehdr));
     if ( sts != 0 )
         goto out;
@@ -697,7 +727,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         goto out;
 
     /* elf note section: xen version */
-    sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle);
+    sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle, guest_width);
     if ( sts != 0 )
         goto out;
 
@@ -757,9 +787,21 @@ xc_domain_dumpcore_via_callback(int xc_h
 
             if ( !auto_translated_physmap )
             {
-                gmfn = p2m[i];
-                if ( gmfn == INVALID_P2M_ENTRY )
-                    continue;
+                if ( guest_width >= sizeof(unsigned long) )
+                {
+                    if ( guest_width == sizeof(unsigned long) )
+                        gmfn = p2m[i];
+                    else
+                        gmfn = ((uint64_t *)p2m)[i];
+                    if ( gmfn == INVALID_P2M_ENTRY )
+                        continue;
+                }
+                else
+                {
+                    gmfn = ((uint32_t *)p2m)[i];
+                    if ( gmfn == (uint32_t)INVALID_P2M_ENTRY )
+                       continue;
+                }
 
                 p2m_array[j].pfn = i;
                 p2m_array[j].gmfn = gmfn;
@@ -802,7 +844,7 @@ copy_done:
         /* When live dump-mode (-L option) is specified,
          * guest domain may reduce memory. pad with zero pages.
          */
-        IPRINTF("j (%ld) != nr_pages (%ld)", j , nr_pages);
+        IPRINTF("j (%ld) != nr_pages (%ld)", j, nr_pages);
         memset(dump_mem_start, 0, PAGE_SIZE);
         for (; j < nr_pages; j++) {
             sts = dump_rtn(args, dump_mem_start, PAGE_SIZE);
@@ -891,7 +933,7 @@ xc_domain_dumpcore(int xc_handle,
     struct dump_args da;
     int sts;
 
-    if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 )
+    if ( (da.fd = open(corename, O_CREAT|O_RDWR|O_TRUNC, S_IWUSR|S_IRUSR)) < 0 
)
     {
         PERROR("Could not open corefile %s", corename);
         return -errno;
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core.h
--- a/tools/libxc/xc_core.h     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core.h     Wed Jan 28 13:06:45 2009 +0900
@@ -23,7 +23,7 @@
 
 #include "xen/version.h"
 #include "xg_private.h"
-#include "xen/elfstructs.h"
+#include "xen/libelf/elfstructs.h"
 
 /* section names */
 #define XEN_DUMPCORE_SEC_NOTE                   ".note.Xen"
@@ -136,12 +136,12 @@ struct xc_core_arch_context;
 struct xc_core_arch_context;
 int xc_core_arch_memory_map_get(int xc_handle,
                                 struct xc_core_arch_context *arch_ctxt,
-                                xc_dominfo_t *info, shared_info_t *live_shinfo,
+                                xc_dominfo_t *info, shared_info_any_t 
*live_shinfo,
                                 xc_core_memory_map_t **mapp,
                                 unsigned int *nr_entries);
-int xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
-                         shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
-                         unsigned long *pfnp);
+int xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width,
+                         xc_dominfo_t *info, shared_info_any_t *live_shinfo,
+                         xen_pfn_t **live_p2m, unsigned long *pfnp);
 
 
 #if defined (__i386__) || defined (__x86_64__)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_ia64.c
--- a/tools/libxc/xc_core_ia64.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core_ia64.c        Wed Jan 28 13:06:45 2009 +0900
@@ -68,7 +68,7 @@ xc_core_arch_auto_translated_physmap(con
 /* see setup_guest() @ xc_linux_build.c */
 static int
 memory_map_get_old_domu(int xc_handle, xc_dominfo_t *info,
-                        shared_info_t *live_shinfo,
+                        shared_info_any_t *live_shinfo,
                         xc_core_memory_map_t **mapp, unsigned int *nr_entries)
 {
     xc_core_memory_map_t *map = NULL;
@@ -96,7 +96,7 @@ out:
 /* see setup_guest() @ xc_ia64_hvm_build.c */
 static int
 memory_map_get_old_hvm(int xc_handle, xc_dominfo_t *info, 
-                       shared_info_t *live_shinfo,
+                       shared_info_any_t *live_shinfo,
                        xc_core_memory_map_t **mapp, unsigned int *nr_entries)
 {
     const xc_core_memory_map_t gfw_map[] = {
@@ -155,7 +155,7 @@ out:
 
 static int
 memory_map_get_old(int xc_handle, xc_dominfo_t *info, 
-                   shared_info_t *live_shinfo,
+                   shared_info_any_t *live_shinfo,
                    xc_core_memory_map_t **mapp, unsigned int *nr_entries)
 {
     if ( info->hvm )
@@ -170,7 +170,8 @@ int
 int
 xc_core_arch_memory_map_get(int xc_handle,
                             struct xc_core_arch_context *arch_ctxt,
-                            xc_dominfo_t *info, shared_info_t *live_shinfo,
+                            xc_dominfo_t *info,
+                            shared_info_any_t *live_shinfo,
                             xc_core_memory_map_t **mapp,
                             unsigned int *nr_entries)
 {
@@ -190,8 +191,8 @@ xc_core_arch_memory_map_get(int xc_handl
     }
 
     /* copy before use in case someone updating them */
-    if (xc_ia64_copy_memmap(xc_handle, info->domid, live_shinfo, &memmap_info,
-                            NULL)) {
+    if (xc_ia64_copy_memmap(xc_handle, info->domid, &live_shinfo->s,
+                            &memmap_info, NULL)) {
         goto old;
     }
 
@@ -235,8 +236,8 @@ old:
 }
 
 int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
-                     shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t 
*info,
+                     shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
                      unsigned long *pfnp)
 {
     /*
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_x86.c
--- a/tools/libxc/xc_core_x86.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core_x86.c Wed Jan 28 13:06:45 2009 +0900
@@ -20,9 +20,25 @@
 
 #include "xg_private.h"
 #include "xc_core.h"
-
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
+#include "xc_e820.h"
+
+#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f))
+
+#ifndef MAX
+#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b))
+#endif
+
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+                              unsigned long pfn)
+{
+    if ((pfn >= 0xa0 && pfn < 0xc0) /* VGA hole */
+        || (pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT)
+            && pfn < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */
+        return 0;
+    return 1;
+}
+
 
 static int nr_gpfns(int xc_handle, domid_t domid)
 {
@@ -37,7 +53,7 @@ xc_core_arch_auto_translated_physmap(con
 
 int
 xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *unused,
-                            xc_dominfo_t *info, shared_info_t *live_shinfo,
+                            xc_dominfo_t *info, shared_info_any_t *live_shinfo,
                             xc_core_memory_map_t **mapp,
                             unsigned int *nr_entries)
 {
@@ -60,17 +76,22 @@ xc_core_arch_memory_map_get(int xc_handl
 }
 
 int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
-                     shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t 
*info,
+                     shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
                      unsigned long *pfnp)
 {
     /* Double and single indirect references to the live P2M table */
     xen_pfn_t *live_p2m_frame_list_list = NULL;
     xen_pfn_t *live_p2m_frame_list = NULL;
+    /* Copies of the above. */
+    xen_pfn_t *p2m_frame_list_list = NULL;
+    xen_pfn_t *p2m_frame_list = NULL;
+
     uint32_t dom = info->domid;
     unsigned long p2m_size = nr_gpfns(xc_handle, info->domid);
     int ret = -1;
     int err;
+    int i;
 
     if ( p2m_size < info->nr_pages  )
     {
@@ -80,17 +101,36 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
 
     live_p2m_frame_list_list =
         xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
-                             live_shinfo->arch.pfn_to_mfn_frame_list_list);
+                             GET_FIELD(live_shinfo, 
arch.pfn_to_mfn_frame_list_list));
 
     if ( !live_p2m_frame_list_list )
     {
         PERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
         goto out;
     }
+
+    /* Get a local copy of the live_P2M_frame_list_list */
+    if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) )
+    {
+        ERROR("Couldn't allocate p2m_frame_list_list array");
+        goto out;
+    }
+    memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE);
+
+    /* Canonicalize guest's unsigned long vs ours */
+    if ( guest_width > sizeof(unsigned long) )
+        for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ )
+            if ( i < PAGE_SIZE/guest_width )
+                p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i];
+            else
+                p2m_frame_list_list[i] = 0;
+    else if ( guest_width < sizeof(unsigned long) )
+        for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- )
+            p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i];
 
     live_p2m_frame_list =
         xc_map_foreign_pages(xc_handle, dom, PROT_READ,
-                             live_p2m_frame_list_list,
+                             p2m_frame_list_list,
                              P2M_FLL_ENTRIES);
 
     if ( !live_p2m_frame_list )
@@ -99,8 +139,25 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
         goto out;
     }
 
+    /* Get a local copy of the live_P2M_frame_list */
+    if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) )
+    {
+        ERROR("Couldn't allocate p2m_frame_list array");
+        goto out;
+    }
+    memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE);
+    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE);
+
+    /* Canonicalize guest's unsigned long vs ours */
+    if ( guest_width > sizeof(unsigned long) )
+        for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+            p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i];
+    else if ( guest_width < sizeof(unsigned long) )
+        for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- )
+            p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i];
+
     *live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_READ,
-                                    live_p2m_frame_list,
+                                    p2m_frame_list,
                                     P2M_FL_ENTRIES);
 
     if ( !*live_p2m )
@@ -121,6 +178,12 @@ out:
 
     if ( live_p2m_frame_list )
         munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
+
+    if ( p2m_frame_list_list )
+        free(p2m_frame_list_list);
+
+    if ( p2m_frame_list )
+        free(p2m_frame_list);
 
     errno = err;
     return ret;
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_core_x86.h
--- a/tools/libxc/xc_core_x86.h Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_core_x86.h Wed Jan 28 13:06:45 2009 +0900
@@ -21,15 +21,8 @@
 #ifndef XC_CORE_X86_H
 #define XC_CORE_X86_H
 
-#if defined(__i386__) || defined(__x86_64__)
 #define ELF_ARCH_DATA           ELFDATA2LSB
-#if defined (__i386__)
-# define ELF_ARCH_MACHINE       EM_386
-#else
-# define ELF_ARCH_MACHINE       EM_X86_64
-#endif
-#endif /* __i386__ or __x86_64__ */
-
+#define ELF_ARCH_MACHINE       (guest_width == 8 ? EM_X86_64 : EM_386)
 
 struct xc_core_arch_context {
     /* nothing */
@@ -40,8 +33,10 @@ struct xc_core_arch_context {
 #define xc_core_arch_context_get(arch_ctxt, ctxt, xc_handle, domid) \
                                                                 (0)
 #define xc_core_arch_context_dump(arch_ctxt, args, dump_rtn)    (0)
-#define xc_core_arch_gpfn_may_present(arch_ctxt, i)             (1)
 
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+                              unsigned long pfn);
 static inline int
 xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 
                               struct xc_core_section_headers *sheaders,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_dom.h
--- a/tools/libxc/xc_dom.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_dom.h      Wed Jan 28 13:06:45 2009 +0900
@@ -1,4 +1,4 @@
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
 
 #define INVALID_P2M_ENTRY   ((xen_pfn_t)-1)
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_domain.c   Wed Jan 28 13:06:45 2009 +0900
@@ -531,33 +531,6 @@ int xc_domain_memory_populate_physmap(in
         DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n",
                 domid, nr_extents, extent_order);
         errno = EBUSY;
-        err = -1;
-    }
-
-    return err;
-}
-
-int xc_domain_memory_translate_gpfn_list(int xc_handle,
-                                         uint32_t domid,
-                                         unsigned long nr_gpfns,
-                                         xen_pfn_t *gpfn_list,
-                                         xen_pfn_t *mfn_list)
-{
-    int err;
-    struct xen_translate_gpfn_list translate_gpfn_list = {
-        .domid    = domid,
-        .nr_gpfns = nr_gpfns,
-    };
-    set_xen_guest_handle(translate_gpfn_list.gpfn_list, gpfn_list);
-    set_xen_guest_handle(translate_gpfn_list.mfn_list, mfn_list);
-
-    err = xc_memory_op(xc_handle, XENMEM_translate_gpfn_list, 
&translate_gpfn_list);
-
-    if ( err != 0 )
-    {
-        DPRINTF("Failed translation for dom %d (%ld PFNs)\n",
-                domid, nr_gpfns);
-        errno = -err;
         err = -1;
     }
 
@@ -958,7 +931,8 @@ int xc_domain_bind_pt_irq(
     bind->hvm_domid = domid;
     bind->irq_type = irq_type;
     bind->machine_irq = machine_irq;
-    if ( irq_type == PT_IRQ_TYPE_PCI )
+    if ( irq_type == PT_IRQ_TYPE_PCI ||
+         irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
     {
         bind->u.pci.bus = bus;
         bind->u.pci.device = device;    
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_elf.h
--- a/tools/libxc/xc_elf.h      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_elf.h      Wed Jan 28 13:06:45 2009 +0900
@@ -1,1 +1,1 @@
-#include <xen/elfstructs.h>
+#include <xen/libelf/elfstructs.h>
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_hvm_build.c        Wed Jan 28 13:06:45 2009 +0900
@@ -15,100 +15,55 @@
 #include <xen/foreign/x86_64.h>
 #include <xen/hvm/hvm_info_table.h>
 #include <xen/hvm/params.h>
-#include "xc_e820.h"
-
-#include <xen/libelf.h>
+#include <xen/hvm/e820.h>
+
+#include <xen/libelf/libelf.h>
 
 #define SUPERPAGE_PFN_SHIFT  9
 #define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
 
-#define SCRATCH_PFN 0xFFFFF
-
-#define SPECIALPAGE_GUARD    0
-#define SPECIALPAGE_BUFIOREQ 1
-#define SPECIALPAGE_XENSTORE 2
-#define SPECIALPAGE_IOREQ    3
-#define SPECIALPAGE_IDENT_PT 4
+#define SPECIALPAGE_BUFIOREQ 0
+#define SPECIALPAGE_XENSTORE 1
+#define SPECIALPAGE_IOREQ    2
+#define SPECIALPAGE_IDENT_PT 3
+#define SPECIALPAGE_SHINFO   4
 #define NR_SPECIAL_PAGES     5
-
-static void build_e820map(void *e820_page, unsigned long long mem_size)
-{
-    struct e820entry *e820entry =
-        (struct e820entry *)(((unsigned char *)e820_page) + HVM_E820_OFFSET);
-    unsigned long long extra_mem_size = 0;
-    unsigned char nr_map = 0;
-
-    /*
-     * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
-     * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
-     * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
-     */
-    if ( mem_size > HVM_BELOW_4G_RAM_END )
-    {
-        extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
-        mem_size = HVM_BELOW_4G_RAM_END;
-    }
-
-    /* 0x0-0x9FC00: Ordinary RAM. */
-    e820entry[nr_map].addr = 0x0;
-    e820entry[nr_map].size = 0x9FC00;
-    e820entry[nr_map].type = E820_RAM;
-    nr_map++;
-
-    /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
-    e820entry[nr_map].addr = 0x9FC00;
-    e820entry[nr_map].size = 0x400;
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-    /*
-     * Following regions are standard regions of the PC memory map.
-     * They are not covered by e820 regions. OSes will not use as RAM.
-     * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
-     * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
-     * TODO: hvmloader should free pages which turn out to be unused.
-     */
-
-    /*
-     * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
-     *                   We *cannot* mark as E820_ACPI, for two reasons:
-     *                    1. ACPI spec. says that E820_ACPI regions below
-     *                       16MB must clip INT15h 0x88 and 0xe801 queries.
-     *                       Our rombios doesn't do this.
-     *                    2. The OS is allowed to reclaim ACPI memory after
-     *                       parsing the tables. But our FACS is in this
-     *                       region and it must not be reclaimed (it contains
-     *                       the ACPI global lock!).
-     * 0xF0000-0x100000: System BIOS.
-     * TODO: hvmloader should free pages which turn out to be unused.
-     */
-    e820entry[nr_map].addr = 0xE0000;
-    e820entry[nr_map].size = 0x20000;
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-    /* Low RAM goes here. Reserve space for special pages. */
-    e820entry[nr_map].addr = 0x100000;
-    e820entry[nr_map].size = (mem_size - 0x100000 -
-                              PAGE_SIZE * NR_SPECIAL_PAGES);
-    e820entry[nr_map].type = E820_RAM;
-    nr_map++;
-
-    /* Explicitly reserve space for special pages (excluding guard page). */
-    e820entry[nr_map].addr = mem_size - PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
-    e820entry[nr_map].size = PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-    if ( extra_mem_size )
-    {
-        e820entry[nr_map].addr = (1ULL << 32);
-        e820entry[nr_map].size = extra_mem_size;
-        e820entry[nr_map].type = E820_RAM;
-        nr_map++;
-    }
-
-    *(((unsigned char *)e820_page) + HVM_E820_NR_OFFSET) = nr_map;
+#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x))
+
+static void build_hvm_info(void *hvm_info_page, uint64_t mem_size)
+{
+    struct hvm_info_table *hvm_info = (struct hvm_info_table *)
+        (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
+    uint64_t lowmem_end = mem_size, highmem_end = 0;
+    uint8_t sum;
+    int i;
+
+    if ( lowmem_end > HVM_BELOW_4G_RAM_END )
+    {
+        highmem_end = lowmem_end + (1ull<<32) - HVM_BELOW_4G_RAM_END;
+        lowmem_end = HVM_BELOW_4G_RAM_END;
+    }
+
+    memset(hvm_info_page, 0, PAGE_SIZE);
+
+    /* Fill in the header. */
+    strncpy(hvm_info->signature, "HVM INFO", 8);
+    hvm_info->length = sizeof(struct hvm_info_table);
+
+    /* Sensible defaults: these can be overridden by the caller. */
+    hvm_info->acpi_enabled = 1;
+    hvm_info->apic_mode = 1;
+    hvm_info->nr_vcpus = 1;
+
+    /* Memory parameters. */
+    hvm_info->low_mem_pgend = lowmem_end >> PAGE_SHIFT;
+    hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT;
+    hvm_info->reserved_mem_pgstart = special_pfn(0);
+
+    /* Finish with the checksum. */
+    for ( i = 0, sum = 0; i < hvm_info->length; i++ )
+        sum += ((uint8_t *)hvm_info)[i];
+    hvm_info->checksum = -sum;
 }
 
 static int loadelfimage(
@@ -153,10 +108,10 @@ static int setup_guest(int xc_handle,
     unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
     unsigned long target_pages = (unsigned long)target << (20 - PAGE_SHIFT);
     unsigned long pod_pages = 0;
-    unsigned long special_page_nr, entry_eip, cur_pages;
+    unsigned long entry_eip, cur_pages;
     struct xen_add_to_physmap xatp;
     struct shared_info *shared_info;
-    void *e820_page;
+    void *hvm_info_page;
     uint32_t *ident_pt;
     struct elf_binary elf;
     uint64_t v_start, v_end;
@@ -289,23 +244,22 @@ static int setup_guest(int xc_handle,
     if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
         goto error_out;
 
-    if ( (e820_page = xc_map_foreign_range(
+    if ( (hvm_info_page = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              HVM_E820_PAGE >> PAGE_SHIFT)) == NULL )
-        goto error_out;
-    memset(e820_page, 0, PAGE_SIZE);
-    build_e820map(e820_page, v_end);
-    munmap(e820_page, PAGE_SIZE);
+              HVM_INFO_PFN)) == NULL )
+        goto error_out;
+    build_hvm_info(hvm_info_page, v_end);
+    munmap(hvm_info_page, PAGE_SIZE);
 
     /* Map and initialise shared_info page. */
     xatp.domid = dom;
     xatp.space = XENMAPSPACE_shared_info;
     xatp.idx   = 0;
-    xatp.gpfn  = SCRATCH_PFN;
+    xatp.gpfn  = special_pfn(SPECIALPAGE_SHINFO);
     if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
          ((shared_info = xc_map_foreign_range(
              xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-             SCRATCH_PFN)) == NULL) )
+             special_pfn(SPECIALPAGE_SHINFO))) == NULL) )
         goto error_out;
     memset(shared_info, 0, PAGE_SIZE);
     /* NB. evtchn_upcall_mask is unused: leave as zero. */
@@ -313,31 +267,28 @@ static int setup_guest(int xc_handle,
            sizeof(shared_info->evtchn_mask));
     munmap(shared_info, PAGE_SIZE);
 
-    special_page_nr = (((v_end > HVM_BELOW_4G_RAM_END)
-                        ? (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT)
-                        : (v_end >> PAGE_SHIFT))
-                       - NR_SPECIAL_PAGES);
-
-    /* Paranoia: clean special pages. */
+    /* Allocate and clear special pages. */
     for ( i = 0; i < NR_SPECIAL_PAGES; i++ )
-        if ( xc_clear_domain_page(xc_handle, dom, special_page_nr + i) )
+    {
+        xen_pfn_t pfn = special_pfn(i);
+        if ( i == SPECIALPAGE_SHINFO )
+            continue;
+        rc = xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &pfn);
+        if ( rc != 0 )
+        {
+            PERROR("Could not allocate %d'th special page.\n", i);
             goto error_out;
-
-    /* Free the guard page that separates low RAM from special pages. */
-    rc = xc_domain_memory_decrease_reservation(
-        xc_handle, dom, 1, 0, &page_array[special_page_nr]);
-    if ( rc != 0 )
-    {
-        PERROR("Could not deallocate guard page for HVM guest.\n");
-        goto error_out;
+        }
+        if ( xc_clear_domain_page(xc_handle, dom, special_pfn(i)) )
+            goto error_out;
     }
 
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
-                     special_page_nr + SPECIALPAGE_XENSTORE);
+                     special_pfn(SPECIALPAGE_XENSTORE));
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
-                     special_page_nr + SPECIALPAGE_BUFIOREQ);
+                     special_pfn(SPECIALPAGE_BUFIOREQ));
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
-                     special_page_nr + SPECIALPAGE_IOREQ);
+                     special_pfn(SPECIALPAGE_IOREQ));
 
     /*
      * Identity-map page table is required for running with CR0.PG=0 when
@@ -345,14 +296,14 @@ static int setup_guest(int xc_handle,
      */
     if ( (ident_pt = xc_map_foreign_range(
               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              special_page_nr + SPECIALPAGE_IDENT_PT)) == NULL )
+              special_pfn(SPECIALPAGE_IDENT_PT))) == NULL )
         goto error_out;
     for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
         ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
                        _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
     munmap(ident_pt, PAGE_SIZE);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
-                     (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
+                     special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
 
     /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
     entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_private.c  Wed Jan 28 13:06:45 2009 +0900
@@ -307,13 +307,6 @@ int xc_memory_op(int xc_handle,
             goto out1;
         }
         break;
-    case XENMEM_remove_from_physmap:
-        if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
-        {
-            PERROR("Could not lock");
-            goto out1;
-        }
-        break;
     case XENMEM_current_reservation:
     case XENMEM_maximum_reservation:
     case XENMEM_maximum_gpfn:
@@ -354,9 +347,6 @@ int xc_memory_op(int xc_handle,
         break;
     case XENMEM_add_to_physmap:
         unlock_pages(arg, sizeof(struct xen_add_to_physmap));
-        break;
-    case XENMEM_remove_from_physmap:
-        unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
         break;
     case XENMEM_current_reservation:
     case XENMEM_maximum_reservation:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xc_ptrace_core.c      Wed Jan 28 13:06:45 2009 +0900
@@ -540,7 +540,9 @@ xc_waitdomain_core_elf(
                              XEN_ELFNOTE_DUMPCORE_XEN_VERSION,
                              (void**)&xen_version) < 0)
         goto out;
-    if (xen_version->xen_version.pagesize != PAGE_SIZE)
+    /* shifted case covers 32 bit FV guest core file created on 64 bit Dom0 */
+    if (xen_version->xen_version.pagesize != PAGE_SIZE &&
+        (xen_version->xen_version.pagesize >> 32) != PAGE_SIZE)
         goto out;
 
     /* .note.Xen: format_version */
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/libxc/xenctrl.h     Wed Jan 28 13:06:45 2009 +0900
@@ -628,12 +628,6 @@ int xc_domain_memory_populate_physmap(in
                                       unsigned int mem_flags,
                                       xen_pfn_t *extent_start);
 
-int xc_domain_memory_translate_gpfn_list(int xc_handle,
-                                         uint32_t domid,
-                                         unsigned long nr_gpfns,
-                                         xen_pfn_t *gpfn_list,
-                                         xen_pfn_t *mfn_list);
-
 int xc_domain_memory_set_pod_target(int xc_handle,
                                     uint32_t domid,
                                     uint64_t target_pages,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/misc/Makefile
--- a/tools/misc/Makefile       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/misc/Makefile       Wed Jan 28 13:06:45 2009 +0900
@@ -47,7 +47,7 @@ install: build
 
 .PHONY: clean
 clean:
-       $(RM) *.o $(TARGETS) *~
+       $(RM) *.o $(TARGETS) *~ $(DEPS)
        set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d clean; done
 
 %.o: %.c $(HDRS) Makefile
@@ -55,3 +55,5 @@ clean:
 
 xenperf xenpm: %: %.o Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/misc/xenpm.c
--- a/tools/misc/xenpm.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/misc/xenpm.c        Wed Jan 28 13:06:45 2009 +0900
@@ -21,83 +21,56 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <string.h>
 #include <getopt.h>
 #include <errno.h>
+#include <signal.h>
 
 #include <xenctrl.h>
 #include <inttypes.h>
+#include <sys/time.h>
 
 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+static int xc_fd;
+static int max_cpu_nr;
 
 /* help message */
 void show_help(void)
 {
     fprintf(stderr,
-            "Usage:\n"
-            "       xenpm get-cpuidle-states [cpuid]: list cpu idle 
information on CPU cpuid or all CPUs.\n"
-            "       xenpm get-cpufreq-states [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
-            "       xenpm get-cpufreq-para [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
-            "       xenpm set-scaling-maxfreq <cpuid> <HZ>: set max cpu 
frequency <HZ> on CPU <cpuid>.\n"
-            "       xenpm set-scaling-minfreq <cpuid> <HZ>: set min cpu 
frequency <HZ> on CPU <cpuid>.\n"
-            "       xenpm set-scaling-governor <cpuid> <name>: set scaling 
governor on CPU <cpuid>.\n"
-            "       xenpm set-scaling-speed <cpuid> <num>: set scaling speed 
on CPU <cpuid>.\n"
-            "       xenpm set-sampling-rate <cpuid> <num>: set sampling rate 
on CPU <cpuid>.\n"
-            "       xenpm set-up-threshold <cpuid> <num>: set up threshold on 
CPU <cpuid>.\n");
-}
-
+            "xen power management control tool\n\n"
+            "usage: xenpm <command> [args]\n\n"
+            "xenpm command list:\n\n"
+            " get-cpuidle-states    [cpuid]       list cpu idle info of CPU 
<cpuid> or all\n"
+            " get-cpufreq-states    [cpuid]       list cpu freq info of CPU 
<cpuid> or all\n"
+            " get-cpufreq-para      [cpuid]       list cpu freq parameter of 
CPU <cpuid> or all\n"
+            " set-scaling-maxfreq   [cpuid] <HZ>  set max cpu frequency <HZ> 
on CPU <cpuid>\n"
+            "                                     or all CPUs\n"
+            " set-scaling-minfreq   [cpuid] <HZ>  set min cpu frequency <HZ> 
on CPU <cpuid>\n"
+            "                                     or all CPUs\n"
+            " set-scaling-speed     [cpuid] <num> set scaling speed on CPU 
<cpuid> or all\n"
+            "                                     it is used in userspace 
governor.\n"
+            " set-scaling-governor  [cpuid] <gov> set scaling governor on CPU 
<cpuid> or all\n"
+            "                                     as 
userspace/performance/powersave/ondemand\n"
+            " set-sampling-rate     [cpuid] <num> set sampling rate on CPU 
<cpuid> or all\n"
+            "                                     it is used in ondemand 
governor.\n"
+            " set-up-threshold      [cpuid] <num> set up threshold on CPU 
<cpuid> or all\n"
+            "                                     it is used in ondemand 
governor.\n"
+            " start                               start collect Cx/Px 
statistics,\n"
+            "                                     output after CTRL-C or 
SIGINT.\n"
+            );
+}
 /* wrapper function */
-int help_func(int xc_fd, int cpuid, uint32_t value)
+void help_func(int argc, char *argv[])
 {
     show_help();
-    return 0;
-}
-
-/* show cpu idle information on CPU cpuid */
-static int show_cx_cpuid(int xc_fd, int cpuid)
-{
-    int i, ret = 0;
-    int max_cx_num = 0;
-    struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
-
-    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
-    if ( ret )
-    {
-        if ( errno == ENODEV )
-        {
-            fprintf(stderr, "Xen cpuidle is not enabled!\n");
-            return -ENODEV;
-        }
-        else
-        {
-            fprintf(stderr, "[CPU%d] failed to get max C-state\n", cpuid);
-            return -EINVAL;
-        }
-    }
-
-    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
-    if ( !cxstat->triggers )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for C-states triggers\n", 
cpuid);
-        return -ENOMEM;
-    }
-    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
-    if ( !cxstat->residencies )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for C-states residencies\n", 
cpuid);
-        free(cxstat->triggers);
-        return -ENOMEM;
-    }
-
-    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
-    if( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to get C-states statistics "
-                "information\n", cpuid);
-        free(cxstat->triggers);
-        free(cxstat->residencies);
-        return -EINVAL;
-    }
+}
+
+static void print_cxstat(int cpuid, struct xc_cx_stat *cxstat)
+{
+    int i;
 
     printf("cpu id               : %d\n", cpuid);
     printf("total C-states       : %d\n", cxstat->nr);
@@ -110,88 +83,87 @@ static int show_cx_cpuid(int xc_fd, int 
         printf("                       residency  [%020"PRIu64" ms]\n",
                cxstat->residencies[i]/1000000UL);
     }
-
-    free(cxstat->triggers);
-    free(cxstat->residencies);
-
     printf("\n");
+}
+
+/* show cpu idle information on CPU cpuid */
+static int get_cxstat_by_cpuid(int xc_fd, int cpuid, struct xc_cx_stat *cxstat)
+{
+    int ret = 0;
+    int max_cx_num = 0;
+
+    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
+    if ( ret )
+        return errno;
+
+    if ( !cxstat )
+        return -EINVAL;
+
+    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->triggers )
+        return -ENOMEM;
+    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->residencies )
+    {
+        free(cxstat->triggers);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
+    if( ret )
+    {
+        int temp = errno;
+        free(cxstat->triggers);
+        free(cxstat->residencies);
+        cxstat->triggers = NULL;
+        cxstat->residencies = NULL;
+        return temp;
+    }
+
     return 0;
 }
 
-int cxstates_func(int xc_fd, int cpuid, uint32_t value)
+static int show_cxstat_by_cpuid(int xc_fd, int cpuid)
 {
     int ret = 0;
-    xc_physinfo_t physinfo = { 0 };
-
-    if ( cpuid < 0 )
-    {
-        /* show cxstates on all cpu */
-        ret = xc_physinfo(xc_fd, &physinfo);
-        if ( ret )
-        {
-            fprintf(stderr, "failed to get the processor information\n");
-        }
-        else
-        {
-            int i;
-            for ( i = 0; i < physinfo.nr_cpus; i++ )
-            {
-                if ( (ret = show_cx_cpuid(xc_fd, i)) == -ENODEV )
-                    break;
-            }
-        }
-    }
-    else
-        ret = show_cx_cpuid(xc_fd, cpuid);
-
-    return ret;
-}
-
-/* show cpu frequency information on CPU cpuid */
-static int show_px_cpuid(int xc_fd, int cpuid)
-{
-    int i, ret = 0;
-    int max_px_num = 0;
-    struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
-
-    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
+    struct xc_cx_stat cxstatinfo;
+
+    ret = get_cxstat_by_cpuid(xc_fd, cpuid, &cxstatinfo);
     if ( ret )
-    {
-        if ( errno == ENODEV )
-        {
-            printf("Xen cpufreq is not enabled!\n");
-            return -ENODEV;
-        }
-        else
-        {
-            fprintf(stderr, "[CPU%d] failed to get max P-state\n", cpuid);
-            return -EINVAL;
-        }
-    }
-
-    pxstat->trans_pt = malloc(max_px_num * max_px_num *
-                              sizeof(uint64_t));
-    if ( !pxstat->trans_pt )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for P-states transition 
table\n", cpuid);
-        return -ENOMEM;
-    }
-    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
-    if ( !pxstat->pt )
-    {
-        fprintf(stderr, "[CPU%d] failed to malloc for P-states table\n", 
cpuid);
-        free(pxstat->trans_pt);
-        return -ENOMEM;
-    }
-
-    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
-    if( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to get P-states statistics 
information\n", cpuid);
-        free(pxstat->trans_pt);
-        free(pxstat->pt);
-        return -ENOMEM;
-    }
+        return ret;
+
+    print_cxstat(cpuid, &cxstatinfo);
+
+    free(cxstatinfo.triggers);
+    free(cxstatinfo.residencies);
+    return 0;
+}
+
+void cxstat_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+
+    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+        cpuid = -1;
+
+    if ( cpuid >= max_cpu_nr )
+        cpuid = -1;
+
+    if ( cpuid < 0 )
+    {
+        /* show cxstates on all cpus */
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( show_cxstat_by_cpuid(xc_fd, i) == -ENODEV )
+                break;
+    }
+    else
+        show_cxstat_by_cpuid(xc_fd, cpuid);
+}
+
+static void print_pxstat(int cpuid, struct xc_px_stat *pxstat)
+{
+    int i;
 
     printf("cpu id               : %d\n", cpuid);
     printf("total P-states       : %d\n", pxstat->total);
@@ -211,40 +183,233 @@ static int show_px_cpuid(int xc_fd, int 
         printf("                       residency  [%020"PRIu64" ms]\n",
                pxstat->pt[i].residency/1000000UL);
     }
-
-    free(pxstat->trans_pt);
-    free(pxstat->pt);
-
     printf("\n");
+}
+
+/* show cpu frequency information on CPU cpuid */
+static int get_pxstat_by_cpuid(int xc_fd, int cpuid, struct xc_px_stat *pxstat)
+{
+    int ret = 0;
+    int max_px_num = 0;
+
+    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
+    if ( ret )
+        return errno;
+
+    if ( !pxstat)
+        return -EINVAL;
+
+    pxstat->trans_pt = malloc(max_px_num * max_px_num *
+                              sizeof(uint64_t));
+    if ( !pxstat->trans_pt )
+        return -ENOMEM;
+    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
+    if ( !pxstat->pt )
+    {
+        free(pxstat->trans_pt);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
+    if( ret )
+    {
+        int temp = errno;
+        free(pxstat->trans_pt);
+        free(pxstat->pt);
+        pxstat->trans_pt = NULL;
+        pxstat->pt = NULL;
+        return temp;
+    }
+
     return 0;
 }
 
-int pxstates_func(int xc_fd, int cpuid, uint32_t value)
+static int show_pxstat_by_cpuid(int xc_fd, int cpuid)
 {
     int ret = 0;
-    xc_physinfo_t physinfo = { 0 };
-
-    if ( cpuid < 0 )
-    {
-        ret = xc_physinfo(xc_fd, &physinfo);
-        if ( ret )
+    struct xc_px_stat pxstatinfo;
+
+    ret = get_pxstat_by_cpuid(xc_fd, cpuid, &pxstatinfo);
+    if ( ret )
+        return ret;
+
+    print_pxstat(cpuid, &pxstatinfo);
+
+    free(pxstatinfo.trans_pt);
+    free(pxstatinfo.pt);
+    return 0;
+}
+
+void pxstat_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+
+    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+        cpuid = -1;
+
+    if ( cpuid >= max_cpu_nr )
+        cpuid = -1;
+
+    if ( cpuid < 0 )
+    {
+        /* show pxstates on all cpus */
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( show_pxstat_by_cpuid(xc_fd, i) == -ENODEV )
+                break;
+    }
+    else
+        show_pxstat_by_cpuid(xc_fd, cpuid);
+}
+
+static uint64_t usec_start, usec_end;
+static struct xc_cx_stat *cxstat, *cxstat_start, *cxstat_end;
+static struct xc_px_stat *pxstat, *pxstat_start, *pxstat_end;
+static uint64_t *sum, *sum_cx, *sum_px;
+
+static void signal_int_handler(int signo)
+{
+    int i, j;
+    struct timeval tv;
+    int cx_cap = 0, px_cap = 0;
+
+    if ( gettimeofday(&tv, NULL) == -1 )
+    {
+        fprintf(stderr, "failed to get timeofday\n");
+        return ;
+    }
+    usec_end = tv.tv_sec * 1000000UL + tv.tv_usec;
+
+    if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
+    {
+        cx_cap = 1;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( !get_cxstat_by_cpuid(xc_fd, i, &cxstat_end[i]) )
+                for ( j = 0; j < cxstat_end[i].nr; j++ )
+                    sum_cx[i] += cxstat_end[i].residencies[j] -
+                                 cxstat_start[i].residencies[j];
+    }
+
+    if ( get_pxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
+    {
+        px_cap = 1;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( !get_pxstat_by_cpuid(xc_fd, i , &pxstat_end[i]) )
+                for ( j = 0; j < pxstat_end[i].total; j++ )
+                    sum_px[i] += pxstat_end[i].pt[j].residency -
+                                 pxstat_start[i].pt[j].residency;
+    }
+
+    printf("Elapsed time (ms): %"PRIu64"\n", (usec_end - usec_start) / 1000UL);
+    for ( i = 0; i < max_cpu_nr; i++ )
+    {
+        uint64_t temp;
+        printf("CPU%d:\n\tresidency\tpercentage\n", i);
+        if ( cx_cap )
         {
-            fprintf(stderr, "failed to get the processor information\n");
-        }
-        else
-        {
-            int i;
-            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            for ( j = 0; j < cxstat_end[i].nr; j++ )
             {
-                if ( (ret = show_px_cpuid(xc_fd, i)) == -ENODEV )
-                    break;
+                if ( sum_cx[i] > 0 )
+                {
+                    temp = cxstat_end[i].residencies[j] -
+                           cxstat_start[i].residencies[j];
+                    printf("  C%d\t%"PRIu64" ms\t%.2f%%\n", j,
+                           temp / 1000000UL, 100UL * temp / (double)sum_cx[i]);
+                }
             }
         }
-    }
-    else
-        ret = show_px_cpuid(xc_fd, cpuid);
-
-    return ret;
+        if ( px_cap )
+        {
+            for ( j = 0; j < pxstat_end[i].total; j++ )
+            {
+                if ( sum_px[i] > 0 )
+                {
+                    temp = pxstat_end[i].pt[j].residency -
+                           pxstat_start[i].pt[j].residency;
+                    printf("  P%d\t%"PRIu64" ms\t%.2f%%\n", j,
+                           temp / 1000000UL, 100UL * temp / (double)sum_px[i]);
+                }
+            }
+        }
+        printf("\n");
+    }
+
+    /* some clean up and then exits */
+    for ( i = 0; i < 2 * max_cpu_nr; i++ )
+    {
+        free(cxstat[i].triggers);
+        free(cxstat[i].residencies);
+        free(pxstat[i].trans_pt);
+        free(pxstat[i].pt);
+    }
+    free(cxstat);
+    free(pxstat);
+    free(sum);
+    xc_interface_close(xc_fd);
+    exit(0);
+}
+
+void start_gather_func(int argc, char *argv[])
+{
+    int i;
+    struct timeval tv;
+
+    if ( gettimeofday(&tv, NULL) == -1 )
+    {
+        fprintf(stderr, "failed to get timeofday\n");
+        return ;
+    }
+    usec_start = tv.tv_sec * 1000000UL + tv.tv_usec;
+
+    sum = malloc(sizeof(uint64_t) * 2 * max_cpu_nr);
+    if ( sum == NULL )
+        return ;
+    cxstat = malloc(sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
+    if ( cxstat == NULL )
+    {
+        free(sum);
+        return ;
+    }
+    pxstat = malloc(sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
+    if ( pxstat == NULL )
+    {
+        free(sum);
+        free(cxstat);
+        return ;
+    }
+    memset(sum, 0, sizeof(uint64_t) * 2 * max_cpu_nr);
+    memset(cxstat, 0, sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
+    memset(pxstat, 0, sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
+    sum_cx = sum;
+    sum_px = sum + max_cpu_nr;
+    cxstat_start = cxstat;
+    cxstat_end = cxstat + max_cpu_nr;
+    pxstat_start = pxstat;
+    pxstat_end = pxstat + max_cpu_nr;
+
+    if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV &&
+         get_pxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV )
+    {
+        fprintf(stderr, "Xen cpu idle and frequency is disabled!\n");
+        return ;
+    }
+
+    for ( i = 0; i < max_cpu_nr; i++ )
+    {
+        get_cxstat_by_cpuid(xc_fd, i, &cxstat_start[i]);
+        get_pxstat_by_cpuid(xc_fd, i, &pxstat_start[i]);
+    }
+
+    if (signal(SIGINT, signal_int_handler) == SIG_ERR)
+    {
+        fprintf(stderr, "failed to set signal int handler\n");
+        free(sum);
+        free(pxstat);
+        free(cxstat);
+        return ;
+    }
+
+    pause();
 }
 
 /* print out parameters about cpu frequency */
@@ -294,7 +459,8 @@ static void print_cpufreq_para(int cpuid
 
     printf("scaling_avail_freq   :");
     for ( i = 0; i < p_cpufreq->freq_num; i++ )
-        if ( p_cpufreq->scaling_available_frequencies[i] == 
p_cpufreq->scaling_cur_freq )
+        if ( p_cpufreq->scaling_available_frequencies[i] ==
+             p_cpufreq->scaling_cur_freq )
             printf(" *%d", p_cpufreq->scaling_available_frequencies[i]);
         else
             printf(" %d", p_cpufreq->scaling_available_frequencies[i]);
@@ -308,7 +474,7 @@ static void print_cpufreq_para(int cpuid
 }
 
 /* show cpu frequency parameters information on CPU cpuid */
-static int show_cpufreq_para_cpuid(int xc_fd, int cpuid)
+static int show_cpufreq_para_by_cpuid(int xc_fd, int cpuid)
 {
     int ret = 0;
     struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para;
@@ -381,159 +547,221 @@ out:
     return ret;
 }
 
-int cpufreq_para_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret = 0;
-    xc_physinfo_t physinfo = { 0 };
-
-    if ( cpuid < 0 )
-    {
-        ret = xc_physinfo(xc_fd, &physinfo);
-        if ( ret )
+void cpufreq_para_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+
+    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+        cpuid = -1;
+
+    if ( cpuid >= max_cpu_nr )
+        cpuid = -1;
+
+    if ( cpuid < 0 )
+    {
+        /* show cpu freqency information on all cpus */
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( show_cpufreq_para_by_cpuid(xc_fd, i) == -ENODEV )
+                break;
+    }
+    else
+        show_cpufreq_para_by_cpuid(xc_fd, cpuid);
+}
+
+void scaling_max_freq_func(int argc, char *argv[])
+{
+    int cpuid = -1, freq = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1)) ||
+         (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling max freq\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MAX_FREQ, freq) )
+                fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, freq) )
+            fprintf(stderr, "failed to set scaling max freq\n");
+    }
+}
+
+void scaling_min_freq_func(int argc, char *argv[])
+{
+    int cpuid = -1, freq = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling min freq\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MIN_FREQ, freq) )
+                fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, freq) )
+            fprintf(stderr, "failed to set scaling min freq\n");
+    }
+}
+
+void scaling_speed_func(int argc, char *argv[])
+{
+    int cpuid = -1, speed = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &speed) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &speed) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling speed\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_SETSPEED, speed) )
+                fprintf(stderr, "[CPU%d] failed to set scaling speed\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, speed) )
+            fprintf(stderr, "failed to set scaling speed\n");
+    }
+}
+
+void scaling_sampling_rate_func(int argc, char *argv[])
+{
+    int cpuid = -1, rate = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &rate) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &rate) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set scaling sampling rate\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, SAMPLING_RATE, rate) )
+                fprintf(stderr,
+                        "[CPU%d] failed to set scaling sampling rate\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, rate) )
+            fprintf(stderr, "failed to set scaling sampling rate\n");
+    }
+}
+
+void scaling_up_threshold_func(int argc, char *argv[])
+{
+    int cpuid = -1, threshold = -1;
+
+    if ( (argc >= 2 && (sscanf(argv[1], "%d", &threshold) != 1 ||
+                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+         (argc == 1 && sscanf(argv[0], "%d", &threshold) != 1 ) ||
+         argc == 0 )
+    {
+        fprintf(stderr, "failed to set up scaling threshold\n");
+        return ;
+    }
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_para(xc_fd, i, UP_THRESHOLD, threshold) )
+                fprintf(stderr,
+                        "[CPU%d] failed to set up scaling threshold\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, threshold) )
+            fprintf(stderr, "failed to set up scaling threshold\n");
+    }
+}
+
+void scaling_governor_func(int argc, char *argv[])
+{
+    int cpuid = -1;
+    char *name = NULL;
+
+    if ( argc >= 2 )
+    {
+        name = strdup(argv[1]);
+        if ( name == NULL )
+            goto out;
+        if ( sscanf(argv[0], "%d", &cpuid) != 1 )
         {
-            fprintf(stderr, "failed to get the processor information\n");
+            free(name);
+            goto out;
         }
-        else
-        {
-            int i;
-            for ( i = 0; i < physinfo.nr_cpus; i++ )
-            {
-                if ( (ret = show_cpufreq_para_cpuid(xc_fd, i)) == -ENODEV )
-                    break;
-            }
-        }
-    }
-    else
-        ret = show_cpufreq_para_cpuid(xc_fd, cpuid);
-
-    return ret;
-}
-
-int scaling_max_freq_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret = 0;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_min_freq_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_speed_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling speed\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_sampling_rate_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, value);
-    if ( ret ) 
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling sampling rate\n", 
cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_up_threshold_func(int xc_fd, int cpuid, uint32_t value)
-{
-    int ret;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, value);
-    if ( ret )
-    {
-        fprintf(stderr, "[CPU%d] failed to set scaling threshold\n", cpuid);
-    }
-
-    return ret;
-}
-
-int scaling_governor_func(int xc_fd, int cpuid, char *name)
-{
-    int ret = 0;
-
-    if ( cpuid < 0 )
-    {
-        show_help();
-        return -EINVAL;
-    }
-
-    ret = xc_set_cpufreq_gov(xc_fd, cpuid, name);
-    if ( ret )
-    {
-        fprintf(stderr, "failed to set cpufreq governor to %s\n", name);
-    }
-
-    return ret;
+    }
+    else if ( argc > 0 )
+    {
+        name = strdup(argv[0]);
+        if ( name == NULL )
+            goto out;
+    }
+    else
+        goto out;
+
+    if ( cpuid < 0 )
+    {
+        int i;
+        for ( i = 0; i < max_cpu_nr; i++ )
+            if ( xc_set_cpufreq_gov(xc_fd, i, name) )
+                fprintf(stderr, "[CPU%d] failed to set governor name\n", i);
+    }
+    else
+    {
+        if ( xc_set_cpufreq_gov(xc_fd, cpuid, name) )
+            fprintf(stderr, "failed to set governor name\n");
+    }
+
+    free(name);
+    return ;
+out:
+    fprintf(stderr, "failed to set governor name\n");
 }
 
 struct {
     const char *name;
-    int (*function)(int xc_fd, int cpuid, uint32_t value);
+    void (*function)(int argc, char *argv[]);
 } main_options[] = {
     { "help", help_func },
-    { "get-cpuidle-states", cxstates_func },
-    { "get-cpufreq-states", pxstates_func },
+    { "get-cpuidle-states", cxstat_func },
+    { "get-cpufreq-states", pxstat_func },
+    { "start", start_gather_func },
     { "get-cpufreq-para", cpufreq_para_func },
     { "set-scaling-maxfreq", scaling_max_freq_func },
     { "set-scaling-minfreq", scaling_min_freq_func },
-    { "set-scaling-governor", NULL },
+    { "set-scaling-governor", scaling_governor_func },
     { "set-scaling-speed", scaling_speed_func },
     { "set-sampling-rate", scaling_sampling_rate_func },
     { "set-up-threshold", scaling_up_threshold_func },
@@ -541,38 +769,37 @@ struct {
 
 int main(int argc, char *argv[])
 {
-    int i, ret = -EINVAL;
-    int xc_fd;
-    int cpuid = -1;
-    uint32_t value = 0;
+    int i, ret = 0;
+    xc_physinfo_t physinfo = { 0 };
     int nr_matches = 0;
     int matches_main_options[ARRAY_SIZE(main_options)];
 
     if ( argc < 2 )
     {
         show_help();
-        return ret;
-    }
-
-    if ( argc > 2 )
-    {
-        if ( sscanf(argv[2], "%d", &cpuid) != 1 )
-            cpuid = -1;
+        return 0;
     }
 
     xc_fd = xc_interface_open();
     if ( xc_fd < 0 )
     {
         fprintf(stderr, "failed to get the handler\n");
-    }
-
+        return 0;
+    }
+
+    ret = xc_physinfo(xc_fd, &physinfo);
+    if ( ret )
+    {
+        fprintf(stderr, "failed to get the processor information\n");
+        xc_interface_close(xc_fd);
+        return 0;
+    }
+    max_cpu_nr = physinfo.nr_cpus;
+
+    /* calculate how many options match with user's input */
     for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
-    {
         if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
-        {
             matches_main_options[nr_matches++] = i;
-        }
-    }
 
     if ( nr_matches > 1 )
     {
@@ -582,27 +809,12 @@ int main(int argc, char *argv[])
         fprintf(stderr, "\n");
     }
     else if ( nr_matches == 1 )
-    {
-        if ( !strcmp("set-scaling-governor", 
main_options[matches_main_options[0]].name) )
-        {
-            char *name = strdup(argv[3]);
-            ret = scaling_governor_func(xc_fd, cpuid, name);
-            free(name);
-        }
-        else
-        {
-            if ( argc > 3 )
-            {
-                if ( sscanf(argv[3], "%d", &value) != 1 )
-                    value = 0;
-            }
-            ret = main_options[matches_main_options[0]].function(xc_fd, cpuid, 
value);
-        }
-    }
+        /* dispatch to the corresponding function handler */
+        main_options[matches_main_options[0]].function(argc - 2, argv + 2);
     else
         show_help();
 
     xc_interface_close(xc_fd);
-    return ret;
-}
-
+    return 0;
+}
+
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/pygrub/Makefile
--- a/tools/pygrub/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/pygrub/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -22,4 +22,6 @@ endif
 
 .PHONY: clean
 clean:
-       rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
+       rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/Makefile
--- a/tools/python/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -85,3 +85,6 @@ test:
 .PHONY: clean
 clean:
        rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
+       rm -f $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Jan 28 13:06:45 2009 +0900
@@ -903,26 +903,24 @@ static PyObject *pyxc_hvm_build(XcObject
     if ( target == -1 )
         target = memsize;
 
-    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target, image) 
!= 0 )
+    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
+                                 target, image) != 0 )
         return pyxc_error_to_exception();
 
 #if !defined(__ia64__)
-    /* Set up the HVM info table. */
+    /* Fix up the HVM info table. */
     va_map = xc_map_foreign_range(self->xc_handle, dom, XC_PAGE_SIZE,
                                   PROT_READ | PROT_WRITE,
                                   HVM_INFO_PFN);
     if ( va_map == NULL )
         return PyErr_SetFromErrno(xc_error_obj);
     va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
-    memset(va_hvm, 0, sizeof(*va_hvm));
-    strncpy(va_hvm->signature, "HVM INFO", 8);
-    va_hvm->length       = sizeof(struct hvm_info_table);
     va_hvm->acpi_enabled = acpi;
     va_hvm->apic_mode    = apic;
     va_hvm->nr_vcpus     = vcpus;
     for ( i = 0, sum = 0; i < va_hvm->length; i++ )
         sum += ((uint8_t *)va_hvm)[i];
-    va_hvm->checksum = -sum;
+    va_hvm->checksum -= sum;
     munmap(va_map, XC_PAGE_SIZE);
 #endif
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/util/oshelp.py
--- a/tools/python/xen/util/oshelp.py   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/util/oshelp.py   Wed Jan 28 13:06:45 2009 +0900
@@ -5,7 +5,7 @@ def fcntl_setfd_cloexec(file, bool):
         f = fcntl.fcntl(file, fcntl.F_GETFD)
         if bool: f |= fcntl.FD_CLOEXEC
         else: f &= ~fcntl.FD_CLOEXEC
-        fcntl.fcntl(file, fcntl.F_SETFD)
+        fcntl.fcntl(file, fcntl.F_SETFD, f)
 
 def waitstatus_description(st):
         if os.WIFEXITED(st):
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Wed Jan 28 13:06:45 2009 +0900
@@ -149,6 +149,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
     'opengl': int,
     'soundhw': str,
     'stdvga': int,
+    'videoram': int,
     'usb': int,
     'usbdevice': str,
     'hpet': int,
@@ -166,6 +167,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
     'guest_os_type': str,
     'hap': int,
     'xen_extended_power_mgmt': int,
+    'pci_msitranslate': int,
 }
 
 # Xen API console 'other_config' keys.
@@ -1247,6 +1249,11 @@ class XendConfig(dict):
                         'PPCI': ppci_uuid,
                         'hotplug_slot': pci_dev.get('vslot', 0)
                     }
+
+                    dpci_opts = pci_dev.get('opts')
+                    if dpci_opts and len(dpci_opts) > 0:
+                        dpci_record['options'] = dpci_opts
+
                     XendDPCI(dpci_uuid, dpci_record)
 
                 target['devices'][pci_devs_uuid] = (dev_type,
@@ -1762,6 +1769,11 @@ class XendConfig(dict):
                         'PPCI': ppci_uuid,
                         'hotplug_slot': pci_dev.get('vslot', 0)
                     }
+
+                    dpci_opts = pci_dev.get('opts')
+                    if dpci_opts and len(dpci_opts) > 0:
+                        dpci_record['options'] = dpci_opts
+
                     XendDPCI(dpci_uuid, dpci_record)
 
                 self['devices'][dev_uuid] = (dev_type,
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDPCI.py
--- a/tools/python/xen/xend/XendDPCI.py Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendDPCI.py Wed Jan 28 13:06:45 2009 +0900
@@ -41,7 +41,8 @@ class XendDPCI(XendBase):
                   'virtual_name',
                   'VM',
                   'PPCI',
-                  'hotplug_slot']
+                  'hotplug_slot',
+                  'options']
         return XendBase.getAttrRO() + attrRO
 
     def getAttrRW(self):
@@ -119,6 +120,8 @@ class XendDPCI(XendBase):
         self.VM = record['VM']
         self.PPCI = record['PPCI']
         self.hotplug_slot = record['hotplug_slot']
+        if 'options' in record.keys():
+            self.options = record['options']
 
     def destroy(self):
         xendom = XendDomain.instance()
@@ -152,3 +155,5 @@ class XendDPCI(XendBase):
     def get_hotplug_slot(self):
         return self.hotplug_slot
 
+    def get_options(self):
+        return self.options
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendDomain.py       Wed Jan 28 13:06:45 2009 +0900
@@ -423,7 +423,7 @@ class XendDomain:
                     log.exception("Unable to recreate domain")
                     try:
                         xc.domain_pause(domid)
-                        do_FLR(domid)
+                        XendDomainInfo.do_FLR(domid)
                         xc.domain_destroy(domid)
                     except:
                         log.exception("Hard destruction of domain failed: %d" %
@@ -1264,7 +1264,7 @@ class XendDomain:
         else:
             try:
                 xc.domain_pause(int(domid))
-                do_FLR(int(domid))
+                XendDomainInfo.do_FLR(int(domid))
                 val = xc.domain_destroy(int(domid))
             except ValueError:
                 raise XendInvalidDomain(domid)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Jan 28 13:06:45 2009 +0900
@@ -696,10 +696,17 @@ class XendDomainInfo:
                     " assigned to other domain.' \
                     )% (pci_device.name, self.domid, pci_str))
 
-        bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'],
+        opts = ''
+        if 'opts' in new_dev and len(new_dev['opts']) > 0:
+            config_opts = new_dev['opts']
+            config_opts = map(lambda (x, y): x+'='+y, config_opts)
+            opts = ',' + reduce(lambda x, y: x+','+y, config_opts)
+
+        bdf_str = "%s:%s:%s.%s%s@%s" % (new_dev['domain'],
                 new_dev['bus'],
                 new_dev['slot'],
                 new_dev['func'],
+                opts,
                 new_dev['vslt'])
         self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str)
 
@@ -1192,7 +1199,7 @@ class XendDomainInfo:
 
         if self.domid >= 0:
             if target > memory_cur:
-                balloon.free( (target-memory_cur)*1024 )
+                balloon.free((target - memory_cur) * 1024, self)
             self.storeVm("memory", target)
             self.storeDom("memory/target", target << 10)
             xc.domain_set_target_mem(self.domid,
@@ -2234,7 +2241,11 @@ class XendDomainInfo:
         xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max']))
 
         # Test whether the devices can be assigned with VT-d
-        pci_str = str(self.info["platform"].get("pci"))
+        pci = self.info["platform"].get("pci")
+        pci_str = ''
+        if pci and len(pci) > 0:
+            pci = map(lambda x: x[0:4], pci)  # strip options 
+            pci_str = str(pci)
         if hvm and pci_str:
             bdf = xc.test_assign_device(self.domid, pci_str)
             if bdf != 0:
@@ -3527,6 +3538,11 @@ class XendDomainInfo:
 
         dpci_uuid = uuid.createString()
 
+        dpci_opts = []
+        opts_dict = xenapi_pci.get('options')
+        for k in opts_dict.keys():
+            dpci_opts.append([k, opts_dict[k]])
+
         # Convert xenapi to sxp
         ppci = XendAPIStore.get(xenapi_pci.get('PPCI'), 'PPCI')
 
@@ -3538,6 +3554,7 @@ class XendDomainInfo:
                     ['slot', '0x%02x' % ppci.get_slot()],
                     ['func', '0x%1x' % ppci.get_func()],
                     ['vslt', '0x%02x' % xenapi_pci.get('hotplug_slot')],
+                    ['opts', dpci_opts],
                     ['uuid', dpci_uuid]
                 ],
                 ['state', 'Initialising']
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/balloon.py  Wed Jan 28 13:06:45 2009 +0900
@@ -67,7 +67,7 @@ def get_dom0_target_alloc():
         raise VmError('Failed to query target memory allocation of dom0.')
     return kb
 
-def free(need_mem ,self):
+def free(need_mem, dominfo):
     """Balloon out memory from the privileged domain so that there is the
     specified required amount (in KiB) free.
     """
@@ -130,7 +130,7 @@ def free(need_mem ,self):
         if physinfo['nr_nodes'] > 1 and retries == 0:
             oldnode = -1
             waitscrub = 1
-            vcpus = self.info['cpus'][0]
+            vcpus = dominfo.info['cpus'][0]
             for vcpu in vcpus:
                 nodenum = 0
                 for node in physinfo['node_to_cpu']:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/image.py    Wed Jan 28 13:06:45 2009 +0900
@@ -264,6 +264,10 @@ class ImageHandler:
             # skip vnc init if nographic is set
             ret.append('-nographic')
             return ret
+
+        vram = str(vmConfig['platform'].get('videoram',4))
+        ret.append('-videoram')
+        ret.append(vram)
 
         vnc_config = {}
         has_vnc = int(vmConfig['platform'].get('vnc', 0)) != 0
@@ -833,6 +837,7 @@ class IA64_HVM_ImageHandler(HVMImageHand
     def configure(self, vmConfig):
         HVMImageHandler.configure(self, vmConfig)
         self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
+        self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
 
     def buildDomain(self):
         xc.nvram_init(self.vm.getName(), self.vm.getDomid())
@@ -847,8 +852,8 @@ class IA64_HVM_ImageHandler(HVMImageHand
         # buffer io page, buffer pio page and memmap info page
         extra_pages = 1024 + 5
         mem_kb += extra_pages * page_kb
-        # Add 8 MiB overhead for QEMU's video RAM.
-        return mem_kb + 8192
+        mem_kb += self.vramsize
+        return mem_kb
 
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
@@ -882,6 +887,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
     def configure(self, vmConfig):
         HVMImageHandler.configure(self, vmConfig)
         self.pae = int(vmConfig['platform'].get('pae',  0))
+        self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
 
     def buildDomain(self):
         xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae)
@@ -890,8 +896,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
         return rc
 
     def getRequiredAvailableMemory(self, mem_kb):
-        # Add 8 MiB overhead for QEMU's video RAM.
-        return mem_kb + 8192
+        return mem_kb + self.vramsize
 
     def getRequiredInitialReservation(self):
         return self.vm.getMemoryTarget()
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/server/pciif.py     Wed Jan 28 13:06:45 2009 +0900
@@ -75,6 +75,12 @@ class PciController(DevController):
             slot = parse_hex(pci_config.get('slot', 0))
             func = parse_hex(pci_config.get('func', 0))            
 
+            opts = pci_config.get('opts', '')
+            if len(opts) > 0:
+                opts = map(lambda (x, y): x+'='+y, opts)
+                opts = reduce(lambda x, y: x+','+y, opts)
+                back['opts-%i' % pcidevid] = opts
+
             vslt = pci_config.get('vslt')
             if vslt is not None:
                 vslots = vslots + vslt + ";"
@@ -89,6 +95,9 @@ class PciController(DevController):
 
         back['num_devs']=str(pcidevid)
         back['uuid'] = config.get('uuid','')
+        if 'pci_msitranslate' in self.vm.info['platform']:
+            
back['msitranslate']=str(self.vm.info['platform']['pci_msitranslate'])
+
         return (0, back, {})
 
 
@@ -108,6 +117,9 @@ class PciController(DevController):
                 dev = back['dev-%i' % i]
                 state = states[i]
                 uuid = back['uuid-%i' %i]
+                opts = ''
+                if 'opts-%i' % i in back:
+                    opts = back['opts-%i' % i]
             except:
                 raise XendError('Error reading config')
 
@@ -129,6 +141,8 @@ class PciController(DevController):
                 self.writeBackend(devid, 'state-%i' % (num_olddevs + i),
                                   str(xenbusState['Initialising']))
                 self.writeBackend(devid, 'uuid-%i' % (num_olddevs + i), uuid)
+                if len(opts) > 0:
+                    self.writeBackend(devid, 'opts-%i' % (num_olddevs + i), 
opts)
                 self.writeBackend(devid, 'num_devs', str(num_olddevs + i + 1))
 
                 # Update vslots
@@ -540,6 +554,9 @@ class PciController(DevController):
                 self.removeBackend(devid, 'vdev-%i' % i)
                 self.removeBackend(devid, 'state-%i' % i)
                 self.removeBackend(devid, 'uuid-%i' % i)
+                tmpopts = self.readBackend(devid, 'opts-%i' % i)
+                if tmpopts is not None:
+                    self.removeBackend(devid, 'opts-%i' % i)
             else:
                 if new_num_devs != i:
                     tmpdev = self.readBackend(devid, 'dev-%i' % i)
@@ -556,6 +573,9 @@ class PciController(DevController):
                     tmpuuid = self.readBackend(devid, 'uuid-%i' % i)
                     self.writeBackend(devid, 'uuid-%i' % new_num_devs, tmpuuid)
                     self.removeBackend(devid, 'uuid-%i' % i)
+                    tmpopts = self.readBackend(devid, 'opts-%i' % i)
+                    if tmpopts is not None:
+                        self.removeBackend(devid, 'opts-%i' % i)
                 new_num_devs = new_num_devs + 1
 
         self.writeBackend(devid, 'num_devs', str(new_num_devs))
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xend/server/relocate.py  Wed Jan 28 13:06:45 2009 +0900
@@ -122,6 +122,8 @@ class RelocationProtocol(protocol.Protoc
         if self.transport:
             self.send_reply(["ready", name])
             p2cread, p2cwrite = os.pipe()
+            from xen.util import oshelp
+            oshelp.fcntl_setfd_cloexec(p2cwrite, True)
             
threading.Thread(target=connection.SSLSocketServerConnection.recv2fd,
                              args=(self.transport.sock, p2cwrite)).start()
             try:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/create.dtd
--- a/tools/python/xen/xm/create.dtd    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/create.dtd    Wed Jan 28 13:06:45 2009 +0900
@@ -82,11 +82,12 @@
 <!ELEMENT vtpm   (name*)>
 <!ATTLIST vtpm   backend         CDATA #REQUIRED>
 
-<!ELEMENT pci    EMPTY>
+<!ELEMENT pci    (pci_opt*)>
 <!ATTLIST pci    domain          CDATA #REQUIRED
                  bus             CDATA #REQUIRED
                  slot            CDATA #REQUIRED
                  func            CDATA #REQUIRED
+                 opts_str        CDATA #IMPLIED
                  vslt            CDATA #IMPLIED>
 
 <!ELEMENT vscsi  EMPTY>
@@ -138,6 +139,10 @@
 <!ATTLIST vcpu_param key   CDATA #REQUIRED
                      value CDATA #REQUIRED>
 
+<!ELEMENT pci_opt    EMPTY>
+<!ATTLIST pci_opt    key   CDATA #REQUIRED
+                     value CDATA #REQUIRED>
+
 <!ELEMENT other_config EMPTY>
 <!ATTLIST other_config key   CDATA #REQUIRED
                        value CDATA #REQUIRED>
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/create.py     Wed Jan 28 13:06:45 2009 +0900
@@ -318,11 +318,14 @@ gopts.var('disk', val='phy:DEV,VDEV,MODE
           backend driver domain to use for the disk.
           The option may be repeated to add more than one disk.""")
 
-gopts.var('pci', val='BUS:DEV.FUNC',
+gopts.var('pci', val='BUS:DEV.FUNC[,msitranslate=0|1]',
           fn=append_value, default=[],
           use="""Add a PCI device to a domain, using given params (in hex).
-         For example 'pci=c0:02.1'.
-         The option may be repeated to add more than one pci device.""")
+          For example 'pci=c0:02.1'.
+          If msitranslate is set, MSI-INTx translation is enabled if possible.
+          Guest that doesn't support MSI will get IO-APIC type IRQs
+          translated from physical MSI, HVM only. Default is 1.
+          The option may be repeated to add more than one pci device.""")
 
 gopts.var('vscsi', val='PDEV,VDEV[,DOM]',
           fn=append_value, default=[],
@@ -523,9 +526,9 @@ gopts.var('vncunused', val='',
           use="""Try to find an unused port for the VNC server.
           Only valid when vnc=1.""")
 
-gopts.var('videoram', val='',
-          fn=set_value, default=None,
-          use="""Maximum amount of videoram PV guest can allocate
+gopts.var('videoram', val='MEMORY',
+          fn=set_int, default=4,
+          use="""Maximum amount of videoram a guest can allocate
           for frame buffer.""")
 
 gopts.var('sdl', val='',
@@ -587,6 +590,11 @@ gopts.var('suppress_spurious_page_faults
 gopts.var('suppress_spurious_page_faults', val='yes|no',
           fn=set_bool, default=None,
           use="""Do not inject spurious page faults into this guest""")
+
+gopts.var('pci_msitranslate', val='TRANSLATE',
+          fn=set_int, default=1,
+          use="""Global PCI MSI-INTx translation flag (0=disable;
+          1=enable.""")
 
 def err(msg):
     """Print an error to stderr and exit.
@@ -667,9 +675,23 @@ def configure_pci(config_devs, vals):
     """Create the config for pci devices.
     """
     config_pci = []
-    for (domain, bus, slot, func) in vals.pci:
-        config_pci.append(['dev', ['domain', domain], ['bus', bus], \
-                        ['slot', slot], ['func', func]])
+    for (domain, bus, slot, func, opts) in vals.pci:
+        config_pci_opts = []
+        d = comma_sep_kv_to_dict(opts)
+
+        def f(k):
+            if k not in ['msitranslate']:
+                err('Invalid pci option: ' + k)
+
+            config_pci_opts.append([k, d[k]])
+
+        config_pci_bdf = ['dev', ['domain', domain], ['bus', bus], \
+                          ['slot', slot], ['func', func]]
+        map(f, d.keys())
+        if len(config_pci_opts)>0:
+            config_pci_bdf.append(['opts', config_pci_opts])
+
+        config_pci.append(config_pci_bdf)
 
     if len(config_pci)>0:
         config_pci.insert(0, 'pci')
@@ -862,12 +884,12 @@ def configure_hvm(config_image, vals):
     """Create the config for HVM devices.
     """
     args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 
'timer_mode',
-             'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
+             'localtime', 'serial', 'stdvga', 'videoram', 'isa', 'nographic', 
'soundhw',
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
              'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
              'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
              'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check',
-             'viridian', 'xen_extended_power_mgmt' ]
+             'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate' ]
 
     for a in args:
         if a in vals.__dict__ and vals.__dict__[a] is not None:
@@ -991,14 +1013,18 @@ def preprocess_pci(vals):
         pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \
                 r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \
                 r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \
-                r"(?P<func>[0-7])$", pci_dev_str)
+                r"(?P<func>[0-7])" + \
+                r"(,(?P<opts>.*))?$", pci_dev_str)
         if pci_match!=None:
-            pci_dev_info = pci_match.groupdict('0')
+            pci_dev_info = pci_match.groupdict('')
+            if pci_dev_info['domain']=='':
+                pci_dev_info['domain']='0'
             try:
                 pci.append( ('0x'+pci_dev_info['domain'], \
                         '0x'+pci_dev_info['bus'], \
                         '0x'+pci_dev_info['slot'], \
-                        '0x'+pci_dev_info['func']))
+                        '0x'+pci_dev_info['func'], \
+                        pci_dev_info['opts']))
             except IndexError:
                 err('Error in PCI slot syntax "%s"'%(pci_dev_str))
     vals.pci = pci
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/main.py       Wed Jan 28 13:06:45 2009 +0900
@@ -187,7 +187,7 @@ SUBCOMMAND_HELP = {
     'vnet-delete'   :  ('<VnetId>', 'Delete a Vnet.'),
     'vnet-list'     :  ('[-l|--long]', 'List Vnets.'),
     'vtpm-list'     :  ('<Domain> [--long]', 'List virtual TPM devices.'),
-    'pci-attach'    :  ('<Domain> <domain:bus:slot.func> [virtual slot]',
+    'pci-attach'    :  ('[-o|--options=<opt>] <Domain> <domain:bus:slot.func> 
[virtual slot]',
                         'Insert a new pass-through pci device.'),
     'pci-detach'    :  ('<Domain> <domain:bus:slot.func>',
                         'Remove a domain\'s pass-through pci device.'),
@@ -2428,7 +2428,7 @@ def xm_network_attach(args):
             vif.append(vif_param)
         server.xend.domain.device_create(dom, vif)
 
-def parse_pci_configuration(args, state):
+def parse_pci_configuration(args, state, opts = ''):
     dom = args[0]
     pci_dev_str = args[1]
     if len(args) == 3:
@@ -2443,12 +2443,17 @@ def parse_pci_configuration(args, state)
     if pci_match == None:
         raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
     pci_dev_info = pci_match.groupdict('0')
+
     try:
-        pci.append(['dev', ['domain', '0x'+ pci_dev_info['domain']], \
+        pci_bdf =['dev', ['domain', '0x'+ pci_dev_info['domain']], \
                 ['bus', '0x'+ pci_dev_info['bus']],
                 ['slot', '0x'+ pci_dev_info['slot']],
                 ['func', '0x'+ pci_dev_info['func']],
-                ['vslt', '0x%x' % int(vslt, 16)]])
+                ['vslt', '0x%x' % int(vslt, 16)]]
+        if len(opts) > 0:
+            pci_bdf.append(['opts', opts])
+        pci.append(pci_bdf)
+
     except:
         raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
     pci.append(['state', state])
@@ -2456,8 +2461,22 @@ def parse_pci_configuration(args, state)
     return (dom, pci)
 
 def xm_pci_attach(args):
-    arg_check(args, 'pci-attach', 2, 3)
-    (dom, pci) = parse_pci_configuration(args, 'Initialising')
+    config_pci_opts = []
+    (options, params) = getopt.gnu_getopt(args, 'o:', ['options='])
+    for (k, v) in options:
+        if k in ('-o', '--options'):
+            if len(v.split('=')) != 2:
+                err("Invalid pci attach option: %s" % v)
+                usage('pci-attach')
+            config_pci_opts.append(v.split('='))
+
+    n = len([i for i in params if i != '--'])
+    if n < 2 or n > 3:
+        err("Invalid argument for 'xm pci-attach'")
+        usage('pci-attach')
+
+    (dom, pci) = parse_pci_configuration(params, 'Initialising',
+                     config_pci_opts)
 
     if serverType == SERVER_XEN_API:
 
@@ -2480,7 +2499,8 @@ def xm_pci_attach(args):
         dpci_record = {
             "VM":           get_single_vm(dom),
             "PPCI":         target_ref,
-            "hotplug_slot": vslt
+            "hotplug_slot": vslt,
+            "options":      dict(config_pci_opts)
         }
         server.xenapi.DPCI.create(dpci_record)
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/python/xen/xm/xenapi_create.py      Wed Jan 28 13:06:45 2009 +0900
@@ -533,7 +533,10 @@ class xenapi_create:
             "PPCI":
                 target_ref,
             "hotplug_slot":
-                int(pci.attributes["func"].value, 16)
+                int(pci.attributes["func"].value, 16),
+            "options":
+                get_child_nodes_as_dict(pci,
+                  "pci_opt", "key", "value")
         }
 
         return server.xenapi.DPCI.create(dpci_record)
@@ -931,6 +934,12 @@ class sxp2xml:
                     = get_child_by_name(dev_sxp, "func", "0")
                 pci.attributes["vslt"] \
                     = get_child_by_name(dev_sxp, "vslt", "0")
+                for opt in get_child_by_name(dev_sxp, "opts", ""):
+                    if len(opt) > 0:
+                        pci_opt = document.createElement("pci_opt")
+                        pci_opt.attributes["key"] = opt[0]
+                        pci_opt.attributes["value"] = opt[1]
+                        pci.appendChild(pci_opt)
 
                 pcis.append(pci)
 
@@ -1032,6 +1041,7 @@ class sxp2xml:
             'vhpt',
             'guest_os_type',
             'hap',
+            'pci_msitranslate',
         ]
 
         platform_configs = []
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/tests/blowfish.mk
--- a/tools/tests/blowfish.mk   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/tests/blowfish.mk   Wed Jan 28 13:06:45 2009 +0900
@@ -1,13 +1,13 @@
 
 override XEN_TARGET_ARCH = x86_32
 XEN_ROOT = ../..
-CFLAGS :=
+CFLAGS =
 include $(XEN_ROOT)/tools/Rules.mk
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
 
 CFLAGS += -fno-builtin -msoft-float
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vnet/libxutil/Makefile
--- a/tools/vnet/libxutil/Makefile      Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vnet/libxutil/Makefile      Wed Jan 28 13:06:45 2009 +0900
@@ -24,13 +24,10 @@ LIB_OBJS := $(LIB_SRCS:.c=.o)
 LIB_OBJS := $(LIB_SRCS:.c=.o)
 PIC_OBJS := $(LIB_SRCS:.c=.opic)
 
-CFLAGS   += -Werror -fno-strict-aliasing $(call 
cc-option,$(CC),-fgnu89-inline,)
+$(call cc-option-add,CFLAGS,CC,-fgnu89-inline)
+CFLAGS   += -Werror -fno-strict-aliasing
 CFLAGS   += -O3
 #CFLAGS   += -g
-
-# Get gcc to generate the dependencies for us.
-CFLAGS   += -Wp,-MD,.$(@F).d
-DEPS     = .*.d
 
 MAJOR    := 3.0
 MINOR    := 0
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm/Makefile
--- a/tools/vtpm/Makefile       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vtpm/Makefile       Wed Jan 28 13:06:45 2009 +0900
@@ -89,6 +89,6 @@ build_sub:
                        $(MAKE) -C $(TPM_EMULATOR_DIR); \
                fi \
        else \
-               echo "*** Unable to build VTPMs. libgmp could not be found."; \
+               echo "=== Unable to build VTPMs. libgmp could not be found."; \
        fi
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm/Rules.mk
--- a/tools/vtpm/Rules.mk       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vtpm/Rules.mk       Wed Jan 28 13:06:45 2009 +0900
@@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
 # General compiler flags
 CFLAGS   = -Werror -g3 -I.
 
-# For generating dependencies
-CFLAGS += -Wp,-MD,.$(@F).d
-
-DEP_FILES      = .*.d
-
 # Generic project files
 HDRS   = $(wildcard *.h)
 SRCS   = $(wildcard *.c)
@@ -26,7 +21,7 @@ OBJS  = $(patsubst %.c,%.o,$(SRCS))
 
 $(OBJS): $(SRCS)
 
--include $(DEP_FILES)
+-include $(DEPS)
 
 BUILD_EMULATOR = y
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/vtpm_manager/Rules.mk
--- a/tools/vtpm_manager/Rules.mk       Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/vtpm_manager/Rules.mk       Wed Jan 28 13:06:45 2009 +0900
@@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
 # General compiler flags
 CFLAGS = -Werror -g3 -I.
 
-# For generating dependencies
-CFLAGS += -Wp,-MD,.$(@F).d
-
-DEP_FILES      = .*.d
-
 # Generic project files
 HDRS   = $(wildcard *.h)
 SRCS   = $(wildcard *.c)
@@ -26,7 +21,7 @@ OBJS  = $(patsubst %.c,%.o,$(SRCS))
 
 $(OBJS): $(SRCS)
 
--include $(DEP_FILES)
+-include $(FILES)
 
 # Make sure these are just rules
 .PHONY : all build install clean
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xcutils/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -13,10 +13,6 @@ include $(XEN_ROOT)/tools/Rules.mk
 
 CFLAGS += -Werror
 CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore)
-
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-PROG_DEP = .*.d
 
 PROGRAMS = xc_restore xc_save readnotes lsevtchn
 
@@ -40,6 +36,6 @@ install: build
 .PHONY: clean
 clean:
        $(RM) *.o $(PROGRAMS)
-       $(RM) $(PROG_DEP)
+       $(RM) $(DEPS)
 
--include $(PROG_DEP)
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xcutils/readnotes.c
--- a/tools/xcutils/readnotes.c Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xcutils/readnotes.c Wed Jan 28 13:06:45 2009 +0900
@@ -13,7 +13,7 @@
 #include <xg_private.h>
 #include <xc_dom.h> /* gunzip bits */
 
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
 
 static void print_string_note(const char *prefix, struct elf_binary *elf,
                              const elf_note *note)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenmon/Makefile
--- a/tools/xenmon/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenmon/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -38,10 +38,12 @@ install: build
 
 .PHONY: clean
 clean:
-       rm -f $(BIN)
+       rm -f $(BIN) $(DEPS)
 
 
 %: %.c Makefile
        $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
 xentrace_%: %.c Makefile
        $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenpmd/Makefile
--- a/tools/xenpmd/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenpmd/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -17,4 +17,6 @@ install: all
 
 .PHONY: clean
 clean:
-       $(RM) -f $(BIN)
+       $(RM) -f $(BIN) $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstat/libxenstat/Makefile
--- a/tools/xenstat/libxenstat/Makefile Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenstat/libxenstat/Makefile Wed Jan 28 13:06:45 2009 +0900
@@ -155,4 +155,6 @@ endif
 .PHONY: clean
 clean:
        rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS-y) \
-             $(BINDINGS) $(BINDINGSRC)
+             $(BINDINGS) $(BINDINGSRC) $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstat/xentop/Makefile
--- a/tools/xenstat/xentop/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenstat/xentop/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -37,4 +37,6 @@ endif
 
 .PHONY: clean
 clean:
-       rm -f xentop xentop.o
+       rm -f xentop xentop.o $(DEPS)
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xenstore/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -7,10 +7,6 @@ CFLAGS += -Werror
 CFLAGS += -Werror
 CFLAGS += -I.
 CFLAGS += $(CFLAGS_libxenctrl)
-
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEP    = .*.d
 
 CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm 
xenstore-chmod
 CLIENTS += xenstore-write xenstore-ls
@@ -82,7 +78,7 @@ clean:
        rm -f xenstored xs_random xs_stress xs_crashme
        rm -f xs_tdb_dump xenstore-control
        rm -f xenstore $(CLIENTS)
-       $(RM) $(DEP)
+       $(RM) $(DEPS)
 
 .PHONY: TAGS
 TAGS:
@@ -113,7 +109,7 @@ install: all
        $(INSTALL_DATA) xs.h $(DESTDIR)$(INCLUDEDIR)
        $(INSTALL_DATA) xs_lib.h $(DESTDIR)$(INCLUDEDIR)
 
--include $(DEP)
+-include $(DEPS)
 
 # never delete any intermediate files.
 .SECONDARY:
diff -r 4fd4dcf2f891 -r 79f259a26a11 tools/xentrace/Makefile
--- a/tools/xentrace/Makefile   Wed Jan 28 12:22:58 2009 +0900
+++ b/tools/xentrace/Makefile   Wed Jan 28 13:06:45 2009 +0900
@@ -46,9 +46,12 @@ install: build
 
 .PHONY: clean
 clean:
-       $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN)
+       $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN) $(DEPS)
 
 %: %.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
 xentrace_%: %.c $(HDRS) Makefile
        $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+-include $(DEPS)
+
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/Rules.mk
--- a/xen/Rules.mk      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/Rules.mk      Wed Jan 28 13:06:45 2009 +0900
@@ -23,9 +23,6 @@ ifeq ($(perfc_arrays),y)
 ifeq ($(perfc_arrays),y)
 perfc := y
 endif
-ifeq ($(frame_pointer),y)
-CFLAGS := $(shell echo $(CFLAGS) | sed -e 's/-f[^ ]*omit-frame-pointer//g')
-endif
 
 # Set ARCH/SUBARCH appropriately.
 override TARGET_SUBARCH  := $(XEN_TARGET_ARCH)
@@ -34,20 +31,7 @@ override TARGET_ARCH     := $(shell echo
 
 TARGET := $(BASEDIR)/xen
 
-HDRS := $(wildcard *.h)
-HDRS += $(wildcard $(BASEDIR)/include/xen/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/xen/hvm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/public/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/public/*/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/compat/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h)
-
 include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
-
-# Do not depend on auto-generated header files.
-AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS))
-HDRS  := $(filter-out %/asm-offsets.h,$(AHDRS))
 
 # Note that link order matters!
 ALL_OBJS-y               += $(BASEDIR)/common/built_in.o
@@ -77,15 +61,18 @@ AFLAGS-y                += -D__ASSEMBLY_
 
 ALL_OBJS := $(ALL_OBJS-y)
 
-CFLAGS   := $(strip $(CFLAGS) $(CFLAGS-y))
+# Get gcc to generate the dependencies for us.
+CFLAGS-y += -MMD -MF .$(@F).d
+DEPS = .*.d
+
+CFLAGS += $(CFLAGS-y)
 
 # Most CFLAGS are safe for assembly files:
 #  -std=gnu{89,99} gets confused by #-prefixed end-of-line comments
-AFLAGS   := $(strip $(AFLAGS) $(AFLAGS-y))
-AFLAGS   += $(patsubst -std=gnu%,,$(CFLAGS))
+AFLAGS += $(AFLAGS-y) $(filter-out -std=gnu%,$(CFLAGS))
 
 # LDFLAGS are only passed directly to $(LD)
-LDFLAGS  := $(strip $(LDFLAGS) $(LDFLAGS_DIRECT))
+LDFLAGS += $(LDFLAGS_DIRECT)
 
 include Makefile
 
@@ -115,19 +102,21 @@ FORCE:
 
 .PHONY: clean
 clean:: $(addprefix _clean_, $(subdir-all))
-       rm -f *.o *~ core
+       rm -f *.o *~ core $(DEPS)
 _clean_%/: FORCE
        $(MAKE) -f $(BASEDIR)/Rules.mk -C $* clean
 
-%.o: %.c $(HDRS) Makefile
+%.o: %.c Makefile
        $(CC) $(CFLAGS) -c $< -o $@
 
-%.o: %.S $(AHDRS) Makefile
+%.o: %.S Makefile
        $(CC) $(AFLAGS) -c $< -o $@
 
-%.i: %.c $(HDRS) Makefile
+%.i: %.c Makefile
        $(CPP) $(CFLAGS) $< -o $@
 
 # -std=gnu{89,99} gets confused by # as an end-of-line comment marker
-%.s: %.S $(AHDRS) Makefile
+%.s: %.S Makefile
        $(CPP) $(AFLAGS) $< -o $@
+
+-include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/Makefile    Wed Jan 28 13:06:45 2009 +0900
@@ -29,11 +29,11 @@ subdir-y += linux-xen
 # Headers do not depend on auto-generated header, but object files do.
 $(ALL_OBJS): $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
 
-asm-offsets.s: asm-offsets.c $(HDRS) \
+asm-offsets.s: asm-offsets.c \
     $(BASEDIR)/include/asm-ia64/.offsets.h.stamp 
        $(CC) $(CFLAGS) -DGENERATE_ASM_OFFSETS -DIA64_TASK_SIZE=0 -S -o $@ $<
 
-asm-xsi-offsets.s: asm-xsi-offsets.c $(HDRS)
+asm-xsi-offsets.s: asm-xsi-offsets.c
        $(CC) $(CFLAGS) -S -o $@ $<
 
 $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h: asm-xsi-offsets.s
@@ -61,7 +61,7 @@ asm-xsi-offsets.s: asm-xsi-offsets.c $(H
        touch $@
 
 # I'm sure a Makefile wizard would know a better way to do this
-xen.lds.s: xen/xen.lds.S $(HDRS)
+xen.lds.s: xen/xen.lds.S
        $(CC) -E $(CPPFLAGS) -P -DXEN $(AFLAGS) \
                -o xen.lds.s xen/xen.lds.S
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/Rules.mk
--- a/xen/arch/ia64/Rules.mk    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/Rules.mk    Wed Jan 28 13:06:45 2009 +0900
@@ -72,19 +72,4 @@ CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTV
 CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTVFAULT
 endif
 
-LDFLAGS := -g
-
-# Additionnal IA64 include dirs.
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/sn/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/sn/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm-generic/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/byteorder/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/hvm/*.h)
-
-HDRS := $(filter-out %/include/asm-ia64/asm-xsi-offsets.h,$(HDRS))
+LDFLAGS = -g
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/tools/p2m_foreign/Makefile
--- a/xen/arch/ia64/tools/p2m_foreign/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-#
-# xen/arch/ia64/tools/p2m_foreign
-#
-# Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
-#                    VA Linux Systems Japan K.K.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-XEN_ROOT       = ../../../../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-CFLAGS += -Werror -ggdb3
-CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE)
-
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
-PROGRAMS = p2m_foreign
-LDLIBS   = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl
-
-.PHONY: all
-all: build
-
-.PHONY: build
-build: $(PROGRAMS)
-
-$(PROGRAMS): %: %.o
-       $(CC) $(CFLAGS) $^ $(LDLIBS) -o $@
-
-
-.PHONY: install
-install:
-
-.PHONY: clean
-clean:
-       $(RM) *.o $(PROGRAMS)
-       $(RM) $(DEPS)
-
--include $(DEPS)
diff -r 4fd4dcf2f891 -r 79f259a26a11 
xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c
--- a/xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c     Wed Jan 28 12:22:58 
2009 +0900
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-/*
- * Foreign p2m exposure test.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- */
-
-#include <sys/mman.h>
-#include <err.h>
-#include <errno.h>
-#include <assert.h>
-
-#include <xc_private.h>
-#include <xenctrl.h>
-#include <xenguest.h>
-#include <xc_efi.h>
-#include <ia64/xc_ia64.h>
-
-#if 1
-# define printd(fmt, args...)  printf(fmt, ##args)
-#else
-# define printd(fmt, args...)  ((void)0)
-#endif
-
-/* xc_memory_op() in xc_private.c doesn't support translate_gpfn_list */
-static int
-__xc_memory_op(int xc_handle, int cmd, void *arg)
-{
-       DECLARE_HYPERCALL;
-       struct xen_translate_gpfn_list* translate = arg;
-
-       xen_ulong_t* gpfns;
-       xen_ulong_t* mfns;
-       size_t len;
-
-       long ret = -EINVAL;
-
-       hypercall.op     = __HYPERVISOR_memory_op;
-       hypercall.arg[0] = (unsigned long)cmd;
-       hypercall.arg[1] = (unsigned long)arg;
-
-       assert(cmd == XENMEM_translate_gpfn_list);
-
-       get_xen_guest_handle(gpfns, translate->gpfn_list);
-       get_xen_guest_handle(mfns, translate->mfn_list);
-       len = sizeof(gpfns[0]) * translate->nr_gpfns;
-       if (lock_pages(translate, sizeof(*translate)) ||
-           lock_pages(gpfns, len) ||
-           lock_pages(mfns, len))
-               goto out;
-
-       ret = do_xen_hypercall(xc_handle, &hypercall);
-
-out:
-       unlock_pages(mfns, len);
-       unlock_pages(gpfns, len);
-       unlock_pages(translate, sizeof(*translate));
-
-       return ret;
-}
-
-int
-xc_translate_gpfn_list(int xc_handle, uint32_t domid, xen_ulong_t nr_gpfns,
-                      xen_ulong_t* gpfns, xen_ulong_t* mfns)
-{
-       struct xen_translate_gpfn_list translate = {
-               .domid = domid,
-               .nr_gpfns = nr_gpfns,
-       };
-       set_xen_guest_handle(translate.gpfn_list, gpfns);
-       set_xen_guest_handle(translate.mfn_list, mfns);
-
-       return __xc_memory_op(xc_handle,
-                             XENMEM_translate_gpfn_list, &translate);
-}
-
-int
-main(int argc, char** argv)
-{
-       uint32_t domid;
-       int xc_handle;
-
-       xc_dominfo_t info;
-       shared_info_t* shinfo;
-
-       unsigned long map_size;
-       xen_ia64_memmap_info_t* memmap_info;
-       struct xen_ia64_p2m_table p2m_table;
-
-       char* p;
-       char* start;
-       char* end;
-       xen_ulong_t nr_gpfns;
-
-       xen_ulong_t* gpfns;
-       xen_ulong_t* mfns;
-
-       unsigned long i;
-
-       if (argc != 2)
-               errx(EXIT_FAILURE, "usage: %s <domid>", argv[0]);
-       domid = atol(argv[1]);
-
-       printd("xc_interface_open()\n");
-       xc_handle = xc_interface_open();
-       if (xc_handle < 0)
-               errx(EXIT_FAILURE, "can't open control interface");
-
-       printd("xc_domain_getinfo\n");
-       if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1)
-               errx(EXIT_FAILURE, "Could not get info for domain");
-
-
-       printd("shared info\n");
-       shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                     PROT_READ, info.shared_info_frame);
-       if (shinfo == NULL)
-               errx(EXIT_FAILURE, "can't map shared info");
-
-       printd("memmap_info\n");
-       map_size = PAGE_SIZE * shinfo->arch.memmap_info_num_pages;
-       memmap_info = xc_map_foreign_range(xc_handle, info.domid,
-                                          map_size, PROT_READ,
-                                          shinfo->arch.memmap_info_pfn);
-       if (memmap_info == NULL)
-               errx(EXIT_FAILURE, "can't map memmap_info");
-
-#if 1
-       start = (char*)&memmap_info->memdesc;
-       end = start + memmap_info->efi_memmap_size;
-       i = 0;
-       for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
-               efi_memory_desc_t* md = (efi_memory_desc_t*)p;
-               printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n",
-                      i, md->phys_addr,
-                      md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-                      md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT));
-               i++;
-       }
-#endif
-
-
-       printd("p2m map\n");
-       if (xc_ia64_p2m_map(&p2m_table, xc_handle, domid, memmap_info, 0) < 0)
-               errx(EXIT_FAILURE, "can't map foreign p2m table");
-       printd("p2m map done\n");
-
-       start = (char*)&memmap_info->memdesc;
-       end = start + memmap_info->efi_memmap_size;
-       nr_gpfns = 0;
-       i = 0;
-       for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
-               efi_memory_desc_t* md = (efi_memory_desc_t*)p;
-               if ( md->type != EFI_CONVENTIONAL_MEMORY ||
-                    md->attribute != EFI_MEMORY_WB ||
-                    md->num_pages == 0 )
-                       continue;
-
-               printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n",
-                      i, md->phys_addr,
-                      md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
-                      md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT));
-               nr_gpfns += md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT);
-               i++;
-       }
-
-       printd("total 0x%lx gpfns\n", nr_gpfns);
-       gpfns = malloc(sizeof(gpfns[0]) * nr_gpfns);
-       mfns = malloc(sizeof(mfns[0]) * nr_gpfns);
-       if (gpfns == NULL || mfns == NULL)
-               err(EXIT_FAILURE, "can't allocate memory for gpfns/mfns");
-
-       i = 0;
-       for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
-               efi_memory_desc_t* md = (efi_memory_desc_t*)p;
-               unsigned long j;
-               if ( md->type != EFI_CONVENTIONAL_MEMORY ||
-                    md->attribute != EFI_MEMORY_WB ||
-                    md->num_pages == 0 )
-                       continue;
-
-               for (j = 0;
-                    j < md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT);
-                    j++) {
-                       gpfns[i] = (md->phys_addr >> PAGE_SHIFT) + j;
-                       i++;
-               }
-       }
-       for (i = 0; i < nr_gpfns; i++)
-               mfns[i] = INVALID_MFN;
-
-       printd("issue translate gpfn list hypercall. "
-              "this may take a while\n");
-       if (xc_translate_gpfn_list(xc_handle,
-                                  domid, nr_gpfns, gpfns, mfns) < 0)
-               err(EXIT_FAILURE, "translate gpfn list hypercall failure");
-       printd("translate gpfn list hypercall done\n");
-
-       printd("checking p2m table\n");
-       for (i = 0; i < nr_gpfns; i++) {
-               unsigned long mfn_by_translated = mfns[i];
-               unsigned long mfn_by_p2m =
-                       xc_ia64_p2m_mfn(&p2m_table, gpfns[i]);
-               if (mfn_by_translated != mfn_by_p2m &&
-                   !(mfn_by_translated == 0 && mfn_by_p2m == INVALID_MFN)) {
-                       printf("ERROR! i 0x%lx gpfn "
-                              "0x%lx trnslated 0x%lx p2m 0x%lx\n",
-                              i, gpfns[i], mfn_by_translated, mfn_by_p2m);
-               }
-       }
-       printd("checking p2m table done\n");
-
-       xc_ia64_p2m_unmap(&p2m_table);
-       munmap(memmap_info, map_size);
-       munmap(shinfo, PAGE_SIZE);
-
-       return EXIT_SUCCESS;
-}
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/domain.c        Wed Jan 28 13:06:45 2009 +0900
@@ -31,7 +31,7 @@
 #include <xen/event.h>
 #include <xen/console.h>
 #include <xen/version.h>
-#include <public/libelf.h>
+#include <xen/libelf.h>
 #include <asm/pgalloc.h>
 #include <asm/offsets.h>  /* for IA64_THREAD_INFO_SIZE */
 #include <asm/vcpu.h>   /* for function declarations */
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/irq.c   Wed Jan 28 13:06:45 2009 +0900
@@ -402,7 +402,7 @@ void __do_IRQ_guest(int irq)
        }
 }
 
-int pirq_acktype(int irq)
+static int pirq_acktype(int irq)
 {
     irq_desc_t *desc = &irq_desc[irq];
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/machine_kexec.c
--- a/xen/arch/ia64/xen/machine_kexec.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/machine_kexec.c Wed Jan 28 13:06:45 2009 +0900
@@ -195,6 +195,7 @@ int machine_kexec_get(xen_kexec_range_t 
 
 void arch_crash_save_vmcoreinfo(void)
 {
+    VMCOREINFO_SYMBOL(xenheap_phys_end);
        VMCOREINFO_SYMBOL(dom_xen);
        VMCOREINFO_SYMBOL(dom_io);
        VMCOREINFO_SYMBOL(xen_pstart);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/mm.c    Wed Jan 28 13:06:45 2009 +0900
@@ -3246,9 +3246,9 @@ int get_page_type(struct page_info *page
     return 1;
 }
 
-int memory_is_conventional_ram(paddr_t p)
-{
-    return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY);
+int page_is_conventional_ram(unsigned long mfn)
+{
+    return (efi_mem_type(pfn_to_paddr(mfn)) == EFI_CONVENTIONAL_MEMORY);
 }
 
 
@@ -3295,38 +3295,39 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
 
             spin_unlock(&d->grant_table->lock);
             break;
-        case XENMAPSPACE_mfn:
-        {
-            if ( get_page_from_pagenr(xatp.idx, d) ) {
-                struct xen_ia64_memmap_info memmap_info;
-                efi_memory_desc_t md;
-                int ret;
-
-                mfn = xatp.idx;
-                page = mfn_to_page(mfn);
-
-                memmap_info.efi_memmap_size = sizeof(md);
-                memmap_info.efi_memdesc_size = sizeof(md);
-                memmap_info.efi_memdesc_version =
-                    EFI_MEMORY_DESCRIPTOR_VERSION;
-
-                md.type = EFI_CONVENTIONAL_MEMORY;
-                md.pad = 0;
-                md.phys_addr = xatp.gpfn << PAGE_SHIFT;
-                md.virt_addr = 0;
-                md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
-                md.attribute = EFI_MEMORY_WB;
-
-                ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
-                if (ret != 0) {
-                    put_page(page);
-                    rcu_unlock_domain(d);
-                    gdprintk(XENLOG_DEBUG,
-                             "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
-                             __func__, __LINE__,
-                             d->domain_id, xatp.gpfn, xatp.idx, ret);
-                    return ret;
-                }
+        case XENMAPSPACE_gmfn: {
+            struct xen_ia64_memmap_info memmap_info;
+            efi_memory_desc_t md;
+            int ret;
+
+            xatp.idx = gmfn_to_mfn(d, xatp.idx);
+            if ( !get_page_from_pagenr(xatp.idx, d) )
+                break;
+
+            mfn = xatp.idx;
+            page = mfn_to_page(mfn);
+
+            memmap_info.efi_memmap_size = sizeof(md);
+            memmap_info.efi_memdesc_size = sizeof(md);
+            memmap_info.efi_memdesc_version =
+                EFI_MEMORY_DESCRIPTOR_VERSION;
+
+            md.type = EFI_CONVENTIONAL_MEMORY;
+            md.pad = 0;
+            md.phys_addr = xatp.gpfn << PAGE_SHIFT;
+            md.virt_addr = 0;
+            md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
+            md.attribute = EFI_MEMORY_WB;
+
+            ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
+            if (ret != 0) {
+                put_page(page);
+                rcu_unlock_domain(d);
+                gdprintk(XENLOG_DEBUG,
+                         "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
+                         __func__, __LINE__,
+                         d->domain_id, xatp.gpfn, xatp.idx, ret);
+                return ret;
             }
             break;
         }
@@ -3377,34 +3378,6 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
 
         break;
     }
-
-    case XENMEM_remove_from_physmap:
-    {
-        struct xen_remove_from_physmap xrfp;
-        unsigned long mfn;
-        struct domain *d;
-
-        if ( copy_from_guest(&xrfp, arg, 1) )
-            return -EFAULT;
-
-        rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
-        if ( rc != 0 )
-            return rc;
-
-        domain_lock(d);
-
-        mfn = gmfn_to_mfn(d, xrfp.gpfn);
-
-        if ( mfn_valid(mfn) )
-            guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
-
-        domain_unlock(d);
-
-        rcu_unlock_domain(d);
-
-        break;
-    }
-
 
     case XENMEM_machine_memory_map:
     {
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/ia64/xen/xensetup.c      Wed Jan 28 13:06:45 2009 +0900
@@ -747,8 +747,3 @@ int xen_in_range(paddr_t start, paddr_t 
 
     return start < end;
 }
-
-int tboot_in_range(paddr_t start, paddr_t end)
-{
-    return 0;
-}
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/Makefile     Wed Jan 28 13:06:45 2009 +0900
@@ -53,6 +53,7 @@ obj-y += crash.o
 obj-y += crash.o
 obj-y += tboot.o
 obj-y += hpet.o
+obj-y += bzimage.o
 
 obj-$(crash_debug) += gdbstub.o
 
@@ -78,10 +79,10 @@ ALL_OBJS := $(BASEDIR)/arch/x86/boot/bui
            $(@D)/.$(@F).1.o -o $@
        rm -f $(@D)/.$(@F).[0-9]*
 
-asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
+asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
        $(CC) $(CFLAGS) -S -o $@ $<
 
-xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
+xen.lds: $(TARGET_SUBARCH)/xen.lds.S
        $(CC) -P -E -Ui386 $(AFLAGS) -o $@ $<
 
 boot/mkelf32: boot/mkelf32.c
@@ -90,4 +91,4 @@ boot/mkelf32: boot/mkelf32.c
 .PHONY: clean
 clean::
        rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
-       rm -f $(BASEDIR)/.xen-syms.[0-9]*
+       rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/Rules.mk
--- a/xen/arch/x86/Rules.mk     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/Rules.mk     Wed Jan 28 13:06:45 2009 +0900
@@ -26,9 +26,9 @@ CFLAGS += -msoft-float
 CFLAGS += -msoft-float
 
 # Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
@@ -45,16 +45,12 @@ CFLAGS += -mno-red-zone -fpic -fno-reord
 CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks
 CFLAGS += -fno-asynchronous-unwind-tables
 # -fvisibility=hidden reduces -fpic cost, if it's available
-CFLAGS += $(call cc-option,$(CC),-fvisibility=hidden,)
-CFLAGS := $(subst -fvisibility=hidden,-DGCC_HAS_VISIBILITY_ATTRIBUTE,$(CFLAGS))
+ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
+CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
+endif
 x86_32 := n
 x86_64 := y
 endif
 
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/svm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/vmx/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/mach-*/*.h)
-
 # Require GCC v3.4+ (to avoid issues with alignment constraints in Xen headers)
 $(call cc-ver-check,CC,0x030400,"Xen requires at least gcc-3.4")
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Wed Jan 28 13:06:45 2009 +0900
@@ -50,11 +50,6 @@
 
 #define DEBUG_PM_CX
 
-#define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
-#define PM_TIMER_TICKS_TO_US(t)     ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
-#define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
-#define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
-
 static void (*lapic_timer_off)(void);
 static void (*lapic_timer_on)(void);
 
@@ -366,7 +361,7 @@ static void acpi_processor_idle(void)
     cx->usage++;
     if ( sleep_ticks > 0 )
     {
-        power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
+        power->last_residency = acpi_pm_tick_to_ns(sleep_ticks) / 1000UL;
         cx->time += sleep_ticks;
     }
 
@@ -611,7 +606,7 @@ static void set_cx(
     cx->latency  = xen_cx->latency;
     cx->power    = xen_cx->power;
     
-    cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+    cx->latency_ticks = ns_to_acpi_pm_tick(cx->latency * 1000UL);
     cx->target_residency = cx->latency * latency_factor;
     if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
         acpi_power->safe_state = cx;
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/acpi/power.c Wed Jan 28 13:06:45 2009 +0900
@@ -221,6 +221,7 @@ static int enter_state(u32 state)
 
  enable_cpu:
     cpufreq_add_cpu(0);
+    microcode_resume_cpu(0);
     enable_nonboot_cpus();
     thaw_domains();
     spin_unlock(&pm_lock);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/apic.c       Wed Jan 28 13:06:45 2009 +0900
@@ -40,7 +40,7 @@
 /*
  * Knob to control our willingness to enable the local APIC.
  */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+static int enable_local_apic __initdata = 0; /* -1=force-disable, 
+1=force-enable */
 
 /*
  * Debug level
@@ -742,7 +742,7 @@ static void __init lapic_disable(char *s
 static void __init lapic_disable(char *str)
 {
     enable_local_apic = -1;
-    clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+    setup_clear_cpu_cap(X86_FEATURE_APIC);
 }
 custom_param("nolapic", lapic_disable);
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/boot/Makefile
--- a/xen/arch/x86/boot/Makefile        Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/boot/Makefile        Wed Jan 28 13:06:45 2009 +0900
@@ -1,4 +1,1 @@ obj-y += head.o
 obj-y += head.o
-
-head.o: head.S $(TARGET_SUBARCH).S trampoline.S mem.S video.S \
-       cmdline.S edd.S wakeup.S
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/boot/mkelf32.c
--- a/xen/arch/x86/boot/mkelf32.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/boot/mkelf32.c       Wed Jan 28 13:06:45 2009 +0900
@@ -25,7 +25,7 @@
 #define s16 int16_t
 #define s32 int32_t
 #define s64 int64_t
-#include "../../../include/public/elfstructs.h"
+#include "../../../include/xen/elfstructs.h"
 
 #define DYNAMICALLY_FILLED   0
 #define RAW_OFFSET         128
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/bzimage.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/bzimage.c    Wed Jan 28 13:06:45 2009 +0900
@@ -0,0 +1,242 @@
+#include <xen/cache.h>
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/string.h>
+#include <xen/types.h>
+
+#define HEAPORDER 3
+
+static unsigned char *window;
+#define memptr long
+static memptr free_mem_ptr;
+static memptr free_mem_end_ptr;
+
+#define WSIZE           0x80000000
+
+static unsigned char    *inbuf;
+static unsigned         insize;
+
+/* Index of next byte to be processed in inbuf: */
+static unsigned         inptr;
+
+/* Bytes in output buffer: */
+static unsigned         outcnt;
+
+#define OF(args)        args
+#define STATIC          static
+
+#define memzero(s, n)   memset((s), 0, (n))
+
+typedef unsigned char   uch;
+typedef unsigned short  ush;
+typedef unsigned long   ulg;
+
+#define INIT __init
+
+#define get_byte()      (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
+#  define Trace(x)      do { fprintf x; } while (0)
+#  define Tracev(x)     do { if (verbose) fprintf x ; } while (0)
+#  define Tracevv(x)    do { if (verbose > 1) fprintf x ; } while (0)
+#  define Tracec(c, x)  do { if (verbose && (c)) fprintf x ; } while (0)
+#  define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0)
+#else
+#  define Assert(cond, msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c, x)
+#  define Tracecv(c, x)
+#endif
+
+static long bytes_out;
+static void flush_window(void);
+
+static __init void error(char *x)
+{
+    printk("%s\n", x);
+    BUG();
+}
+
+static __init int fill_inbuf(void)
+{
+        error("ran out of input data");
+        return 0;
+}
+
+
+#include "../../common/inflate.c"
+
+static __init void flush_window(void)
+{
+    /*
+     * The window is equal to the output buffer therefore only need to
+     * compute the crc.
+     */
+    unsigned long c = crc;
+    unsigned n;
+    unsigned char *in, ch;
+
+    in = window;
+    for ( n = 0; n < outcnt; n++ )
+    {
+        ch = *in++;
+        c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+
+    bytes_out += (unsigned long)outcnt;
+    outcnt = 0;
+}
+
+static __init int gzip_length(char *image, unsigned long image_len)
+{
+    return *(uint32_t *)&image[image_len - 4];
+}
+
+static  __init int perform_gunzip(char *output, char **_image_start, unsigned 
long *image_len)
+{
+    char *image = *_image_start;
+    int rc;
+    unsigned char magic0 = (unsigned char)image[0];
+    unsigned char magic1 = (unsigned char)image[1];
+
+    if ( magic0 != 0x1f || ( (magic1 != 0x8b) && (magic1 != 0x9e) ) )
+        return 0;
+
+    window = (unsigned char *)output;
+
+    free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER);
+    free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER);
+
+    inbuf = (unsigned char *)image;
+    insize = *image_len;
+    inptr = 0;
+
+    makecrc();
+
+    if ( gunzip() < 0 )
+    {
+        rc = -EINVAL;
+    }
+    else
+    {
+        *_image_start = (char *)window;
+        *image_len = gzip_length(image, *image_len);
+        rc = 0;
+    }
+
+    free_xenheap_pages((void *)free_mem_ptr, HEAPORDER);
+
+    return rc;
+}
+
+struct setup_header {
+        uint8_t         _pad0[0x1f1];           /* skip uninteresting stuff */
+        uint8_t         setup_sects;
+        uint16_t        root_flags;
+        uint32_t        syssize;
+        uint16_t        ram_size;
+        uint16_t        vid_mode;
+        uint16_t        root_dev;
+        uint16_t        boot_flag;
+        uint16_t        jump;
+        uint32_t        header;
+#define HDR_MAGIC               "HdrS"
+#define HDR_MAGIC_SZ    4
+        uint16_t        version;
+#define VERSION(h,l)    (((h)<<8) | (l))
+        uint32_t        realmode_swtch;
+        uint16_t        start_sys;
+        uint16_t        kernel_version;
+        uint8_t         type_of_loader;
+        uint8_t         loadflags;
+        uint16_t        setup_move_size;
+        uint32_t        code32_start;
+        uint32_t        ramdisk_image;
+        uint32_t        ramdisk_size;
+        uint32_t        bootsect_kludge;
+        uint16_t        heap_end_ptr;
+        uint16_t        _pad1;
+        uint32_t        cmd_line_ptr;
+        uint32_t        initrd_addr_max;
+        uint32_t        kernel_alignment;
+        uint8_t         relocatable_kernel;
+        uint8_t         _pad2[3];
+        uint32_t        cmdline_size;
+        uint32_t        hardware_subarch;
+        uint64_t        hardware_subarch_data;
+        uint32_t        payload_offset;
+        uint32_t        payload_length;
+    } __attribute__((packed));
+
+static __init int bzimage_check(struct setup_header *hdr, unsigned long len)
+{
+    if ( len < sizeof(struct setup_header) )
+        return 0;
+
+    if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 )
+        return 0;
+
+    if ( hdr->version < VERSION(2,8) ) {
+        printk("Cannot load bzImage v%d.%02d at least v2.08 is required\n",
+           hdr->version >> 8, hdr->version & 0xff);
+        return -EINVAL;
+    }
+    return 1;
+}
+
+int __init bzimage_headroom(char *image_start, unsigned long image_length)
+{
+    struct setup_header *hdr = (struct setup_header *)image_start;
+    char *img;
+    int err, headroom;
+
+    err = bzimage_check(hdr, image_length);
+    if (err < 1)
+        return err;
+
+    img = image_start + (hdr->setup_sects+1) * 512;
+    img += hdr->payload_offset;
+
+    headroom = gzip_length(img, hdr->payload_length);
+    headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */
+    headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */
+    headroom = (headroom + 4095) & ~4095;
+
+    return headroom;
+}
+
+int __init bzimage_parse(char *image_base, char **image_start, unsigned long 
*image_len)
+{
+    struct setup_header *hdr = (struct setup_header *)(*image_start);
+    int err = bzimage_check(hdr, *image_len);
+
+    if (err < 1)
+        return err;
+
+    BUG_ON(!(image_base < *image_start));
+
+    *image_start += (hdr->setup_sects+1) * 512;
+    *image_start += hdr->payload_offset;
+    *image_len = hdr->payload_length;
+
+    if ( (err = perform_gunzip(image_base, image_start, image_len)) < 0 )
+        return err;
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/cpu/common.c Wed Jan 28 13:06:45 2009 +0900
@@ -29,6 +29,14 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
  */
 u64 host_pat = 0x050100070406;
 
+static unsigned int __cpuinitdata cleared_caps[NCAPINTS];
+
+void __init setup_clear_cpu_cap(unsigned int cap)
+{
+       __clear_bit(cap, boot_cpu_data.x86_capability);
+       __set_bit(cap, cleared_caps);
+}
+
 static void default_init(struct cpuinfo_x86 * c)
 {
        /* Not much we can do here... */
@@ -235,6 +243,7 @@ static void __init early_cpu_detect(void
                if (c->x86 >= 0x6)
                        c->x86_model += ((tfms >> 16) & 0xF) << 4;
                c->x86_mask = tfms & 15;
+               cap0 &= ~cleared_caps[0];
                if (cap0 & (1<<19))
                        c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
                c->x86_capability[0] = cap0; /* Added for Xen bootstrap */
@@ -329,6 +338,7 @@ void __cpuinit identify_cpu(struct cpuin
        c->x86_vendor_id[0] = '\0'; /* Unset */
        c->x86_model_id[0] = '\0';  /* Unset */
        c->x86_max_cores = 1;
+       c->x86_num_siblings = 1;
        c->x86_clflush_size = 0;
        memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
@@ -395,6 +405,9 @@ void __cpuinit identify_cpu(struct cpuin
        if (disable_pse)
                clear_bit(X86_FEATURE_PSE, c->x86_capability);
 
+       for (i = 0 ; i < NCAPINTS ; ++i)
+               c->x86_capability[i] &= ~cleared_caps[i];
+
        /* If the model name is still unset, do table lookup. */
        if ( !c->x86_model_id[0] ) {
                char *p;
@@ -468,27 +481,27 @@ void __cpuinit detect_ht(struct cpuinfo_
        if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
 
-       smp_num_siblings = (ebx & 0xff0000) >> 16;
-
-       if (smp_num_siblings == 1) {
+       c->x86_num_siblings = (ebx & 0xff0000) >> 16;
+
+       if (c->x86_num_siblings == 1) {
                printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-       } else if (smp_num_siblings > 1 ) {
-
-               if (smp_num_siblings > NR_CPUS) {
-                       printk(KERN_WARNING "CPU: Unsupported number of the 
siblings %d", smp_num_siblings);
-                       smp_num_siblings = 1;
+       } else if (c->x86_num_siblings > 1 ) {
+
+               if (c->x86_num_siblings > NR_CPUS) {
+                       printk(KERN_WARNING "CPU: Unsupported number of the 
siblings %d", c->x86_num_siblings);
+                       c->x86_num_siblings = 1;
                        return;
                }
 
-               index_msb = get_count_order(smp_num_siblings);
+               index_msb = get_count_order(c->x86_num_siblings);
                phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
 
                printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
                       phys_proc_id[cpu]);
 
-               smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
-               index_msb = get_count_order(smp_num_siblings) ;
+               c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
+
+               index_msb = get_count_order(c->x86_num_siblings) ;
 
                core_bits = get_count_order(c->x86_max_cores);
 
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Jan 28 13:06:45 2009 +0900
@@ -14,7 +14,6 @@ DEFINE_PER_CPU(cpu_banks_t, mce_banks_ow
 
 static int nr_intel_ext_msrs = 0;
 static int cmci_support = 0;
-extern int firstbank;
 
 #ifdef CONFIG_X86_MCE_THERMAL
 static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
@@ -121,7 +120,7 @@ static inline void intel_get_extended_ms
     if (nr_intel_ext_msrs == 0)
         return;
 
-    /*this function will called when CAP(9).MCG_EXT_P = 1*/
+    /* this function will called when CAP(9).MCG_EXT_P = 1 */
     memset(mc_ext, 0, sizeof(struct mcinfo_extended));
     mc_ext->common.type = MC_TYPE_EXTENDED;
     mc_ext->common.size = sizeof(mc_ext);
@@ -157,7 +156,7 @@ static inline void intel_get_extended_ms
  * 3. called in polling handler
  * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
  * consumer.
-*/
+ */
 static struct mc_info *machine_check_poll(int calltype)
 {
     struct mc_info *mi = NULL;
@@ -174,9 +173,9 @@ static struct mc_info *machine_check_pol
     memset(&mcg, 0, sizeof(mcg));
     mcg.common.type = MC_TYPE_GLOBAL;
     mcg.common.size = sizeof(mcg);
-    /*If called from cpu-reset check, don't need to fill them.
-     *If called from cmci context, we'll try to fill domid by memory addr
-    */
+    /* If called from cpu-reset check, don't need to fill them.
+     * If called from cmci context, we'll try to fill domid by memory addr
+     */
     mcg.mc_domid = -1;
     mcg.mc_vcpuid = -1;
     if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
@@ -186,12 +185,13 @@ static struct mc_info *machine_check_pol
     mcg.mc_socketid = phys_proc_id[cpu];
     mcg.mc_coreid = cpu_core_id[cpu];
     mcg.mc_apicid = cpu_physical_id(cpu);
-    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
+    mcg.mc_core_threadid =
+        mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); 
     rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
 
     for ( i = 0; i < nr_mce_banks; i++ ) {
         struct mcinfo_bank mcb;
-        /*For CMCI, only owners checks the owned MSRs*/
+        /* For CMCI, only owners checks the owned MSRs */
         if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
              (calltype & MC_FLAG_CMCI) )
             continue;
@@ -240,7 +240,7 @@ static struct mc_info *machine_check_pol
         x86_mcinfo_add(mi, &mcb);
         nr_unit++;
         add_taint(TAINT_MACHINE_CHECK);
-        /*Clear state for this bank */
+        /* Clear state for this bank */
         wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
         printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%"PRIx64"]\n", 
                 i, cpu, status);
@@ -249,12 +249,12 @@ static struct mc_info *machine_check_pol
                 mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
  
     }
-    /*if pcc = 1, uc must be 1*/
+    /* if pcc = 1, uc must be 1 */
     if (pcc)
         mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
     else if (uc)
         mcg.mc_flags |= MC_FLAG_RECOVERABLE;
-    else /*correctable*/
+    else /* correctable */
         mcg.mc_flags |= MC_FLAG_CORRECTABLE;
 
     if (nr_unit && nr_intel_ext_msrs && 
@@ -264,7 +264,7 @@ static struct mc_info *machine_check_pol
     }
     if (nr_unit) 
         x86_mcinfo_add(mi, &mcg);
-    /*Clear global state*/
+    /* Clear global state */
     return mi;
 }
 
@@ -541,8 +541,7 @@ static void mce_init(void)
      * This also clears all registers*/
 
     mi = machine_check_poll(MC_FLAG_RESET);
-    /*in the boot up stage, not expect inject to DOM0, but go print out
-    */
+    /* in the boot up stage, don't inject to DOM0, but print out */
     if (mi)
         x86_mcinfo_dump(mi);
 
@@ -553,22 +552,22 @@ static void mce_init(void)
 
     for (i = firstbank; i < nr_mce_banks; i++)
     {
-        /*Some banks are shared across cores, use MCi_CTRL to judge whether
-         * this bank has been initialized by other cores already.*/
+        /* Some banks are shared across cores, use MCi_CTRL to judge whether
+         * this bank has been initialized by other cores already. */
         rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
-        if (!l & !h)
+        if (!(l | h))
         {
-            /*if ctl is 0, this bank is never initialized*/
+            /* if ctl is 0, this bank is never initialized */
             printk(KERN_DEBUG "mce_init: init bank%d\n", i);
             wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
             wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
-       }
-    }
-    if (firstbank) /*if cmci enabled, firstbank = 0*/
+        }
+    }
+    if (firstbank) /* if cmci enabled, firstbank = 0 */
         wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
 }
 
-/*p4/p6 faimily has similar MCA initialization process*/
+/* p4/p6 family have similar MCA initialization process */
 void intel_mcheck_init(struct cpuinfo_x86 *c)
 {
     mce_cap_init(c);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/domain.c     Wed Jan 28 13:06:45 2009 +0900
@@ -143,7 +143,7 @@ void dump_pageframe_info(struct domain *
     {
         list_for_each_entry ( page, &d->page_list, list )
         {
-            printk("    DomPage %p: caf=%08x, taf=%" PRtype_info "\n",
+            printk("    DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                    _p(page_to_mfn(page)),
                    page->count_info, page->u.inuse.type_info);
         }
@@ -156,7 +156,7 @@ void dump_pageframe_info(struct domain *
 
     list_for_each_entry ( page, &d->xenpage_list, list )
     {
-        printk("    XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
+        printk("    XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                _p(page_to_mfn(page)),
                page->count_info, page->u.inuse.type_info);
     }
@@ -405,8 +405,17 @@ int arch_domain_create(struct domain *d,
         if ( d->arch.ioport_caps == NULL )
             goto fail;
 
+#ifdef __i386__
         if ( (d->shared_info = alloc_xenheap_page()) == NULL )
             goto fail;
+#else
+        pg = alloc_domheap_page(
+            NULL, MEMF_node(domain_to_node(d)) | MEMF_bits(32));
+        if ( pg == NULL )
+            goto fail;
+        pg->count_info |= PGC_xen_heap;
+        d->shared_info = page_to_virt(pg);
+#endif
 
         clear_page(d->shared_info);
         share_xen_page_with_guest(
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/domain_build.c       Wed Jan 28 13:06:45 2009 +0900
@@ -19,6 +19,7 @@
 #include <xen/iocap.h>
 #include <xen/bitops.h>
 #include <xen/compat.h>
+#include <xen/libelf.h>
 #include <asm/regs.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -30,7 +31,9 @@
 #include <asm/e820.h>
 
 #include <public/version.h>
-#include <public/libelf.h>
+
+int __init bzimage_parse(
+    char *output, char **image_start, unsigned long *image_len);
 
 extern unsigned long initial_images_nrpages(void);
 extern void discard_initial_images(void);
@@ -196,7 +199,8 @@ static void __init process_dom0_ioports_
 
 int __init construct_dom0(
     struct domain *d,
-    unsigned long _image_start, unsigned long image_len, 
+    unsigned long _image_base,
+    unsigned long _image_start, unsigned long image_len,
     unsigned long _initrd_start, unsigned long initrd_len,
     char *cmdline)
 {
@@ -213,9 +217,11 @@ int __init construct_dom0(
     struct vcpu *v = d->vcpu[0];
     unsigned long long value;
 #if defined(__i386__)
+    char *image_base   = (char *)_image_base;   /* use lowmem mappings */
     char *image_start  = (char *)_image_start;  /* use lowmem mappings */
     char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
 #elif defined(__x86_64__)
+    char *image_base   = __va(_image_base);
     char *image_start  = __va(_image_start);
     char *initrd_start = __va(_initrd_start);
 #endif
@@ -262,6 +268,9 @@ int __init construct_dom0(
 
     nr_pages = compute_dom0_nr_pages();
 
+    if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
+        return rc;
+
     if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
         return rc;
 #ifdef VERBOSE
@@ -341,6 +350,12 @@ int __init construct_dom0(
 #endif
     }
 
+    if ( (parms.p2m_base != UNSET_ADDR) && elf_32bit(&elf) )
+    {
+        printk(XENLOG_WARNING "P2M table base ignored\n");
+        parms.p2m_base = UNSET_ADDR;
+    }
+
     domain_set_alloc_bitsize(d);
 
     /*
@@ -359,6 +374,8 @@ int __init construct_dom0(
     vphysmap_end     = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ?
                                                      sizeof(unsigned long) :
                                                      sizeof(unsigned int)));
+    if ( parms.p2m_base != UNSET_ADDR )
+        vphysmap_end = vphysmap_start;
     vstartinfo_start = round_pgup(vphysmap_end);
     vstartinfo_end   = (vstartinfo_start +
                         sizeof(struct start_info) +
@@ -400,6 +417,11 @@ int __init construct_dom0(
     /* Ensure that our low-memory 1:1 mapping covers the allocation. */
     page = alloc_domheap_pages(d, order, MEMF_bits(30));
 #else
+    if ( parms.p2m_base != UNSET_ADDR )
+    {
+        vphysmap_start = parms.p2m_base;
+        vphysmap_end   = vphysmap_start + nr_pages * sizeof(unsigned long);
+    }
     page = alloc_domheap_pages(d, order, 0);
 #endif
     if ( page == NULL )
@@ -429,14 +451,6 @@ int __init construct_dom0(
            _p(vstack_start), _p(vstack_end),
            _p(v_start), _p(v_end));
     printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
-
-    if ( ((v_end - v_start)>>PAGE_SHIFT) > nr_pages )
-    {
-        printk("Initial guest OS requires too much space\n"
-               "(%luMB is greater than %luMB limit)\n",
-               (v_end-v_start)>>20, nr_pages>>(20-PAGE_SHIFT));
-        return -ENOMEM;
-    }
 
     mpt_alloc = (vpt_start - v_start) +
         (unsigned long)pfn_to_paddr(alloc_spfn);
@@ -748,8 +762,109 @@ int __init construct_dom0(
     snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s",
              elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : "");
 
+    count = d->tot_pages;
+#ifdef __x86_64__
+    /* Set up the phys->machine table if not part of the initial mapping. */
+    if ( parms.p2m_base != UNSET_ADDR )
+    {
+        unsigned long va = vphysmap_start;
+
+        if ( v_start <= vphysmap_end && vphysmap_start <= v_end )
+            panic("DOM0 P->M table overlaps initial mapping");
+
+        while ( va < vphysmap_end )
+        {
+            if ( d->tot_pages + ((round_pgup(vphysmap_end) - va)
+                                 >> PAGE_SHIFT) + 3 > nr_pages )
+                panic("Dom0 allocation too small for initial P->M table.\n");
+
+            l4tab = l4start + l4_table_offset(va);
+            if ( !l4e_get_intpte(*l4tab) )
+            {
+                page = alloc_domheap_page(d, 0);
+                if ( !page )
+                    break;
+                /* No mapping, PGC_allocated + page-table page. */
+                page->count_info = PGC_allocated | 2;
+                page->u.inuse.type_info =
+                    PGT_l3_page_table | PGT_validated | 1;
+                clear_page(page_to_virt(page));
+                *l4tab = l4e_from_page(page, L4_PROT);
+            }
+            l3tab = page_to_virt(l4e_get_page(*l4tab));
+            l3tab += l3_table_offset(va);
+            if ( !l3e_get_intpte(*l3tab) )
+            {
+                if ( cpu_has_page1gb &&
+                     !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) &&
+                     vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) &&
+                     (page = alloc_domheap_pages(d,
+                                                 L3_PAGETABLE_SHIFT -
+                                                     PAGE_SHIFT,
+                                                 0)) != NULL )
+                {
+                    *l3tab = l3e_from_page(page,
+                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+                    va += 1UL << L3_PAGETABLE_SHIFT;
+                    continue;
+                }
+                if ( (page = alloc_domheap_page(d, 0)) == NULL )
+                    break;
+                else
+                {
+                    /* No mapping, PGC_allocated + page-table page. */
+                    page->count_info = PGC_allocated | 2;
+                    page->u.inuse.type_info =
+                        PGT_l2_page_table | PGT_validated | 1;
+                    clear_page(page_to_virt(page));
+                    *l3tab = l3e_from_page(page, L3_PROT);
+                }
+            }
+            l2tab = page_to_virt(l3e_get_page(*l3tab));
+            l2tab += l2_table_offset(va);
+            if ( !l2e_get_intpte(*l2tab) )
+            {
+                if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) &&
+                     vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) &&
+                     (page = alloc_domheap_pages(d,
+                                                 L2_PAGETABLE_SHIFT -
+                                                     PAGE_SHIFT,
+                                                 0)) != NULL )
+                {
+                    *l2tab = l2e_from_page(page,
+                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+                    va += 1UL << L2_PAGETABLE_SHIFT;
+                    continue;
+                }
+                if ( (page = alloc_domheap_page(d, 0)) == NULL )
+                    break;
+                else
+                {
+                    /* No mapping, PGC_allocated + page-table page. */
+                    page->count_info = PGC_allocated | 2;
+                    page->u.inuse.type_info =
+                        PGT_l1_page_table | PGT_validated | 1;
+                    clear_page(page_to_virt(page));
+                    *l2tab = l2e_from_page(page, L2_PROT);
+                }
+            }
+            l1tab = page_to_virt(l2e_get_page(*l2tab));
+            l1tab += l1_table_offset(va);
+            BUG_ON(l1e_get_intpte(*l1tab));
+            page = alloc_domheap_page(d, 0);
+            if ( !page )
+                break;
+            *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY);
+            va += PAGE_SIZE;
+            va &= PAGE_MASK;
+        }
+        if ( !page )
+            panic("Not enough RAM for DOM0 P->M table.\n");
+    }
+#endif
+
     /* Write the phys->machine and machine->phys table entries. */
-    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
+    for ( pfn = 0; pfn < count; pfn++ )
     {
         mfn = pfn + alloc_spfn;
 #ifndef NDEBUG
@@ -763,6 +878,26 @@ int __init construct_dom0(
             ((unsigned int *)vphysmap_start)[pfn] = mfn;
         set_gpfn_from_mfn(mfn, pfn);
     }
+    si->first_p2m_pfn = pfn;
+    si->nr_p2m_frames = d->tot_pages - count;
+    list_for_each_entry ( page, &d->page_list, list )
+    {
+        mfn = page_to_mfn(page);
+        if ( get_gpfn_from_mfn(mfn) >= count )
+        {
+            BUG_ON(is_pv_32bit_domain(d));
+            if ( !page->u.inuse.type_info &&
+                 !get_page_and_type(page, d, PGT_writable_page) )
+                BUG();
+            ((unsigned long *)vphysmap_start)[pfn] = mfn;
+            set_gpfn_from_mfn(mfn, pfn);
+            ++pfn;
+#ifndef NDEBUG
+            ++alloc_epfn;
+#endif
+        }
+    }
+    BUG_ON(pfn != d->tot_pages);
     while ( pfn < nr_pages )
     {
         if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/hvm.c    Wed Jan 28 13:06:45 2009 +0900
@@ -20,6 +20,7 @@
  */
 
 #include <xen/config.h>
+#include <xen/ctype.h>
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/trace.h>
@@ -272,6 +273,10 @@ static int hvm_print_line(
     char c = *val;
 
     BUG_ON(bytes != 1);
+
+    /* Accept only printable characters, newline, and horizontal tab. */
+    if ( !isprint(c) && (c != '\n') && (c != '\t') )
+        return X86EMUL_OKAY;
 
     spin_lock(&hd->pbuf_lock);
     hd->pbuf[hd->pbuf_idx++] = c;
@@ -1503,7 +1508,15 @@ static enum hvm_copy_result __hvm_copy(
 
         if ( flags & HVMCOPY_to_guest )
         {
-            if ( p2mt != p2m_ram_ro )
+            if ( p2mt == p2m_ram_ro )
+            {
+                static unsigned long lastpage;
+                if ( xchg(&lastpage, gfn) != gfn )
+                    gdprintk(XENLOG_DEBUG, "guest attempted write to read-only"
+                             " memory page. gfn=%#lx, mfn=%#lx\n",
+                             gfn, mfn);
+            }
+            else
             {
                 memcpy(p, buf, count);
                 paging_mark_dirty(curr->domain, mfn);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/mtrr.c   Wed Jan 28 13:06:45 2009 +0900
@@ -702,12 +702,15 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
                           1, HVMSR_PER_VCPU);
 
 uint8_t epte_get_entry_emt(
-    struct domain *d, unsigned long gfn, unsigned long mfn)
+    struct domain *d, unsigned long gfn, 
+    unsigned long mfn, uint8_t *igmt, int direct_mmio)
 {
     uint8_t gmtrr_mtype, hmtrr_mtype;
     uint32_t type;
     struct vcpu *v = current;
 
+    *igmt = 0;
+
     if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) )
         return MTRR_TYPE_WRBACK;
 
@@ -722,6 +725,21 @@ uint8_t epte_get_entry_emt(
 
     if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
         return type;
+
+    if ( !iommu_enabled )
+    {
+        *igmt = 1;
+        return MTRR_TYPE_WRBACK;
+    }
+
+    if ( direct_mmio )
+        return MTRR_TYPE_UNCACHABLE;
+
+    if ( iommu_snoop )
+    {
+        *igmt = 1;
+        return MTRR_TYPE_WRBACK;
+    }
 
     gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT));
     hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT));
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/vmsi.c
--- a/xen/arch/x86/hvm/vmsi.c   Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/vmsi.c   Wed Jan 28 13:06:45 2009 +0900
@@ -134,7 +134,7 @@ int vmsi_deliver(struct domain *d, int p
                 "vector=%x trig_mode=%x\n",
                 dest, dest_mode, delivery_mode, vector, trig_mode);
 
-    if ( !test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags) )
+    if ( !( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) )
     {
         gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
         return 0;
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jan 28 13:06:45 2009 +0900
@@ -167,14 +167,15 @@ static void vmx_init_vmcs_config(void)
 #endif
 
     min = VM_EXIT_ACK_INTR_ON_EXIT;
-    opt = 0;
+    opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT;
 #ifdef __x86_64__
     min |= VM_EXIT_IA32E_MODE;
 #endif
     _vmx_vmexit_control = adjust_vmx_controls(
         min, opt, MSR_IA32_VMX_EXIT_CTLS);
 
-    min = opt = 0;
+    min = 0;
+    opt = VM_ENTRY_LOAD_GUEST_PAT;
     _vmx_vmentry_control = adjust_vmx_controls(
         min, opt, MSR_IA32_VMX_ENTRY_CTLS);
 
@@ -519,8 +520,6 @@ static int construct_vmcs(struct vcpu *v
 
     /* VMCS controls. */
     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
-    __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
-    __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
 
     v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
     v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
@@ -534,12 +533,18 @@ static int construct_vmcs(struct vcpu *v
     else
     {
         v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+        vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT |
+                                VM_EXIT_LOAD_HOST_PAT);
+        vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT;
     }
 
     /* Do not enable Monitor Trap Flag unless start single step debug */
     v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
 
     __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+    __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
+    __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
+
     if ( cpu_has_vmx_secondary_exec_control )
         __vmwrite(SECONDARY_VM_EXEC_CONTROL,
                   v->arch.hvm_vmx.secondary_exec_control);
@@ -561,6 +566,8 @@ static int construct_vmcs(struct vcpu *v
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+        if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+            vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
     }
 
     /* I/O access bitmap. */
@@ -690,6 +697,21 @@ static int construct_vmcs(struct vcpu *v
         v->arch.hvm_vmx.vpid =
             v->domain->arch.hvm_domain.vmx.vpid_base + v->vcpu_id;
         __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
+    }
+
+    if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+    {
+        u64 host_pat, guest_pat;
+
+        rdmsrl(MSR_IA32_CR_PAT, host_pat);
+        guest_pat = 0x7040600070406ULL;
+
+        __vmwrite(HOST_PAT, host_pat);
+        __vmwrite(GUEST_PAT, guest_pat);
+#ifdef __i386__
+        __vmwrite(HOST_PAT_HIGH, host_pat >> 32);
+        __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32);
+#endif
     }
 
     vmx_vmcs_exit(v);
@@ -989,6 +1011,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
     vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
     vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
     vmx_dump_sel("TR", GUEST_TR_SELECTOR);
+    printk("Guest PAT = 0x%08x%08x\n",
+           (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT));
     x  = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
     x |= (uint32_t)vmr(TSC_OFFSET);
     printk("TSC Offset = %016llx\n", x);
@@ -1027,6 +1051,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
            (unsigned long long)vmr(HOST_SYSENTER_ESP),
            (int)vmr(HOST_SYSENTER_CS),
            (unsigned long long)vmr(HOST_SYSENTER_EIP));
+    printk("Host PAT = 0x%08x%08x\n",
+           (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT));
 
     printk("*** Control State ***\n");
     printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/io_apic.c    Wed Jan 28 13:06:45 2009 +0900
@@ -84,7 +84,9 @@ int disable_timer_pin_1 __initdata;
 
 static struct irq_pin_list {
     int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+} irq_2_pin[PIN_MAP_SIZE] = {
+    [0 ... PIN_MAP_SIZE-1].pin = -1
+};
 static int irq_2_pin_free_entry = NR_IRQS;
 
 int vector_irq[NR_VECTORS] __read_mostly = {
@@ -1017,11 +1019,6 @@ static void __init enable_IO_APIC(void)
     int i8259_apic, i8259_pin;
     int i, apic;
     unsigned long flags;
-
-    for (i = 0; i < PIN_MAP_SIZE; i++) {
-        irq_2_pin[i].pin = -1;
-        irq_2_pin[i].next = 0;
-    }
 
     /* Initialise dynamic irq_2_pin free list. */
     for (i = NR_IRQS; i < PIN_MAP_SIZE; i++)
@@ -1557,11 +1554,14 @@ static unsigned int startup_msi_vector(u
 
 static void ack_msi_vector(unsigned int vector)
 {
-    ack_APIC_irq();
+    if ( msi_maskable_irq(irq_desc[vector].msi_desc) )
+        ack_APIC_irq(); /* ACKTYPE_NONE */
 }
 
 static void end_msi_vector(unsigned int vector)
 {
+    if ( !msi_maskable_irq(irq_desc[vector].msi_desc) )
+        ack_APIC_irq(); /* ACKTYPE_EOI */
 }
 
 static void shutdown_msi_vector(unsigned int vector)
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/irq.c        Wed Jan 28 13:06:45 2009 +0900
@@ -491,7 +491,7 @@ int pirq_guest_unmask(struct domain *d)
 }
 
 extern int ioapic_ack_new;
-int pirq_acktype(struct domain *d, int irq)
+static int pirq_acktype(struct domain *d, int irq)
 {
     irq_desc_t  *desc;
     unsigned int vector;
@@ -705,6 +705,10 @@ static irq_guest_action_t *__pirq_guest_
             spin_lock_irq(&desc->lock);
         }
         break;
+    case ACKTYPE_NONE:
+        stop_timer(&irq_guest_eoi_timer[vector]);
+        _irq_guest_eoi(desc);
+        break;
     }
 
     /*
@@ -853,10 +857,6 @@ int map_domain_pirq(
     ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
 
-    /* XXX Until pcidev and msi locking is fixed. */
-    if ( type == MAP_PIRQ_TYPE_MSI )
-        return -EINVAL;
-
     if ( !IS_PRIV(current->domain) )
         return -EPERM;
 
@@ -867,8 +867,8 @@ int map_domain_pirq(
         return -EINVAL;
     }
 
-    old_vector = d->arch.pirq_vector[pirq];
-    old_pirq = d->arch.vector_pirq[vector];
+    old_vector = domain_irq_to_vector(d, pirq);
+    old_pirq = domain_vector_to_irq(d, vector);
 
     if ( (old_vector && (old_vector != vector) ) ||
          (old_pirq && (old_pirq != pirq)) )
@@ -891,6 +891,10 @@ int map_domain_pirq(
     if ( type == MAP_PIRQ_TYPE_MSI )
     {
         struct msi_info *msi = (struct msi_info *)data;
+
+        ret = -ENODEV;
+        if ( !cpu_has_apic )
+            goto done;
 
         pdev = pci_get_pdev(msi->bus, msi->devfn);
         ret = pci_enable_msi(msi, &msi_desc);
@@ -937,7 +941,7 @@ int unmap_domain_pirq(struct domain *d, 
     ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
 
-    vector = d->arch.pirq_vector[pirq];
+    vector = domain_irq_to_vector(d, pirq);
     if ( vector <= 0 )
     {
         dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
@@ -958,7 +962,7 @@ int unmap_domain_pirq(struct domain *d, 
 
     spin_lock_irqsave(&desc->lock, flags);
 
-    BUG_ON(vector != d->arch.pirq_vector[pirq]);
+    BUG_ON(vector != domain_irq_to_vector(d, pirq));
 
     if ( msi_desc )
         teardown_msi_vector(vector);
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/machine_kexec.c
--- a/xen/arch/x86/machine_kexec.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/machine_kexec.c      Wed Jan 28 13:06:45 2009 +0900
@@ -150,6 +150,9 @@ void arch_crash_save_vmcoreinfo(void)
        VMCOREINFO_SYMBOL(dom_xen);
        VMCOREINFO_SYMBOL(dom_io);
 
+#ifdef CONFIG_X86_32
+    VMCOREINFO_SYMBOL(xenheap_phys_end);
+#endif
 #ifdef CONFIG_X86_PAE
        VMCOREINFO_SYMBOL_ALIAS(pgd_l3, idle_pg_table);
 #endif
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c  Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/microcode.c  Wed Jan 28 13:06:45 2009 +0900
@@ -49,31 +49,22 @@ struct microcode_info {
     char buffer[1];
 };
 
-static void microcode_fini_cpu(int cpu)
+static void __microcode_fini_cpu(int cpu)
 {
     struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
+    xfree(uci->mc.mc_valid);
+    memset(uci, 0, sizeof(*uci));
+}
+
+static void microcode_fini_cpu(int cpu)
+{
     spin_lock(&microcode_mutex);
-    xfree(uci->mc.valid_mc);
-    uci->mc.valid_mc = NULL;
-    uci->valid = 0;
+    __microcode_fini_cpu(cpu);
     spin_unlock(&microcode_mutex);
 }
 
-static int collect_cpu_info(int cpu)
-{
-    int err = 0;
-    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-    memset(uci, 0, sizeof(*uci));
-    err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
-    if ( !err )
-        uci->valid = 1;
-
-    return err;
-}
-
-static int microcode_resume_cpu(int cpu)
+int microcode_resume_cpu(int cpu)
 {
     int err = 0;
     struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -81,7 +72,7 @@ static int microcode_resume_cpu(int cpu)
 
     gdprintk(XENLOG_INFO, "microcode: CPU%d resumed\n", cpu);
 
-    if ( !uci->mc.valid_mc )
+    if ( !uci->mc.mc_valid )
         return -EIO;
 
     /*
@@ -95,16 +86,15 @@ static int microcode_resume_cpu(int cpu)
         return err;
     }
 
-    if ( memcmp(&nsig, &uci->cpu_sig, sizeof(nsig)) )
+    if ( microcode_ops->microcode_resume_match(cpu, &nsig) )
+    {
+        return microcode_ops->apply_microcode(cpu);
+    }
+    else
     {
         microcode_fini_cpu(cpu);
-        /* Should we look for a new ucode here? */
         return -EIO;
     }
-
-    err = microcode_ops->apply_microcode(cpu);
-
-    return err;
 }
 
 static int microcode_update_cpu(const void *buf, size_t size)
@@ -115,20 +105,11 @@ static int microcode_update_cpu(const vo
 
     spin_lock(&microcode_mutex);
 
-    /*
-     * Check if the system resume is in progress (uci->valid != NULL),
-     * otherwise just request a firmware:
-     */
-    if ( uci->valid )
-    {
-        err = microcode_resume_cpu(cpu);
-    }
+    err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
+    if ( likely(!err) )
+        err = microcode_ops->cpu_request_microcode(cpu, buf, size);
     else
-    {
-        err = collect_cpu_info(cpu);
-        if ( !err && uci->valid )
-            err = microcode_ops->cpu_request_microcode(cpu, buf, size);
-    }
+        __microcode_fini_cpu(cpu);
 
     spin_unlock(&microcode_mutex);
 
@@ -153,7 +134,6 @@ static long do_microcode_update(void *_i
     error = info->error;
     xfree(info);
     return error;
-
 }
 
 int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode_amd.c
--- a/xen/arch/x86/microcode_amd.c      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/microcode_amd.c      Wed Jan 28 13:06:45 2009 +0900
@@ -38,21 +38,16 @@
 #define MC_HEADER_SIZE          (sizeof(struct microcode_header_amd))
 #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
 #define DWSIZE                  (sizeof(uint32_t))
-/* For now we support a fixed ucode total size only */
-#define get_totalsize(mc) \
-        ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
-         + MC_HEADER_SIZE)
 
 /* serialize access to the physical write */
 static DEFINE_SPINLOCK(microcode_update_lock);
 
 struct equiv_cpu_entry *equiv_cpu_table;
 
-static long install_equiv_cpu_table(const void *, uint32_t, long);
-
 static int collect_cpu_info(int cpu, struct cpu_signature *csig)
 {
     struct cpuinfo_x86 *c = &cpu_data[cpu];
+    uint32_t dummy;
 
     memset(csig, 0, sizeof(*csig));
 
@@ -60,13 +55,10 @@ static int collect_cpu_info(int cpu, str
     {
         printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
                cpu);
-        return -1;
-    }
-
-    asm volatile (
-        "movl %1, %%ecx; rdmsr"
-        : "=a" (csig->rev)
-        : "i" (MSR_AMD_PATCHLEVEL) : "ecx" );
+        return -EINVAL;
+    }
+
+    rdmsr(MSR_AMD_PATCHLEVEL, csig->rev, dummy);
 
     printk(KERN_INFO "microcode: collect_cpu_info: patch_id=0x%x\n",
            csig->rev);
@@ -74,29 +66,17 @@ static int collect_cpu_info(int cpu, str
     return 0;
 }
 
-static int get_matching_microcode(void *mc, int cpu)
+static int microcode_fits(void *mc, int cpu)
 {
     struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
     struct microcode_header_amd *mc_header = mc;
-    unsigned long total_size = get_totalsize(mc_header);
-    void *new_mc;
     unsigned int current_cpu_id;
-    unsigned int equiv_cpu_id = 0x00;
+    unsigned int equiv_cpu_id = 0x0;
     unsigned int i;
 
     /* We should bind the task to the CPU */
     BUG_ON(cpu != raw_smp_processor_id());
 
-    /* This is a tricky part. We might be called from a write operation
-     * to the device file instead of the usual process of firmware
-     * loading. This routine needs to be able to distinguish both
-     * cases. This is done by checking if there already is a equivalent
-     * CPU table installed. If not, we're written through
-     * /dev/cpu/microcode.
-     * Since we ignore all checks. The error case in which going through
-     * firmware loading and that table is not loaded has already been
-     * checked earlier.
-     */
     if ( equiv_cpu_table == NULL )
     {
         printk(KERN_INFO "microcode: CPU%d microcode update with "
@@ -111,7 +91,7 @@ static int get_matching_microcode(void *
     {
         if ( current_cpu_id == equiv_cpu_table[i].installed_cpu )
         {
-            equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
+            equiv_cpu_id = equiv_cpu_table[i].equiv_cpu & 0xffff;
             break;
         }
     }
@@ -119,171 +99,136 @@ static int get_matching_microcode(void *
     if ( !equiv_cpu_id )
     {
         printk(KERN_ERR "microcode: CPU%d cpu_id "
-               "not found in equivalent cpu table \n", cpu);
-        return 0;
-    }
-
-    if ( (mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff) )
-    {
-        printk(KERN_INFO
-               "microcode: CPU%d patch does not match "
-               "(patch is %x, cpu extended is %x) \n",
-               cpu, mc_header->processor_rev_id[0],
-               (equiv_cpu_id & 0xff));
-        return 0;
-    }
-
-    if ( (mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff) )
+               "not found in equivalent cpu table\n", cpu);
+        return -EINVAL;
+    }
+
+    if ( (mc_header->processor_rev_id) != equiv_cpu_id )
     {
         printk(KERN_INFO "microcode: CPU%d patch does not match "
                "(patch is %x, cpu base id is %x) \n",
-               cpu, mc_header->processor_rev_id[1],
-               ((equiv_cpu_id >> 16) & 0xff));
-        return 0;
+               cpu, mc_header->processor_rev_id, equiv_cpu_id);
+        return -EINVAL;
     }
 
     if ( mc_header->patch_id <= uci->cpu_sig.rev )
-        return 0;
+        return -EINVAL;
 
     printk(KERN_INFO "microcode: CPU%d found a matching microcode "
            "update with version 0x%x (current=0x%x)\n",
            cpu, mc_header->patch_id, uci->cpu_sig.rev);
 
- out:
-    new_mc = xmalloc_bytes(UCODE_MAX_SIZE);
-    if ( new_mc == NULL )
-    {
-        printk(KERN_ERR "microcode: error, can't allocate memory\n");
-        return -ENOMEM;
-    }
-    memset(new_mc, 0, UCODE_MAX_SIZE);
-
-    /* free previous update file */
-    xfree(uci->mc.mc_amd);
-
-    memcpy(new_mc, mc, total_size);
-
-    uci->mc.mc_amd = new_mc;
-    return 1;
+out:
+    return 0;
 }
 
 static int apply_microcode(int cpu)
 {
     unsigned long flags;
-    uint32_t eax, edx, rev;
-    int cpu_num = raw_smp_processor_id();
-    struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-    uint64_t addr;
+    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+    uint32_t rev, dummy;
+    struct microcode_amd *mc_amd = uci->mc.mc_amd;
 
     /* We should bind the task to the CPU */
-    BUG_ON(cpu_num != cpu);
-
-    if ( uci->mc.mc_amd == NULL )
+    BUG_ON(raw_smp_processor_id() != cpu);
+
+    if ( mc_amd == NULL )
         return -EINVAL;
 
     spin_lock_irqsave(&microcode_update_lock, flags);
 
-    addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code;
-    edx = (uint32_t)(addr >> 32);
-    eax = (uint32_t)addr;
-
-    asm volatile (
-        "movl %0, %%ecx; wrmsr" :
-        : "i" (MSR_AMD_PATCHLOADER), "a" (eax), "d" (edx) : "ecx" );
+    wrmsrl(MSR_AMD_PATCHLOADER, (unsigned long)&mc_amd->hdr.data_code);
 
     /* get patch id after patching */
-    asm volatile (
-        "movl %1, %%ecx; rdmsr"
-        : "=a" (rev)
-        : "i" (MSR_AMD_PATCHLEVEL) : "ecx");
+    rdmsr(MSR_AMD_PATCHLEVEL, rev, dummy);
 
     spin_unlock_irqrestore(&microcode_update_lock, flags);
 
     /* check current patch id and patch's id for match */
-    if ( rev != uci->mc.mc_amd->hdr.patch_id )
+    if ( rev != mc_amd->hdr.patch_id )
     {
         printk(KERN_ERR "microcode: CPU%d update from revision "
-               "0x%x to 0x%x failed\n", cpu_num,
-               uci->mc.mc_amd->hdr.patch_id, rev);
+               "0x%x to 0x%x failed\n", cpu,
+               mc_amd->hdr.patch_id, rev);
         return -EIO;
     }
 
     printk("microcode: CPU%d updated from revision "
            "0x%x to 0x%x \n",
-           cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id);
+           cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
 
     uci->cpu_sig.rev = rev;
 
     return 0;
 }
 
-static long get_next_ucode_from_buffer_amd(void **mc, const void *buf,
-                                           unsigned long size, long offset)
+static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
+                                         size_t size, unsigned long *offset)
 {
     struct microcode_header_amd *mc_header;
-    unsigned long total_size;
-    const uint8_t *buf_pos = buf;
+    size_t total_size;
+    const uint8_t *bufp = buf;
+    unsigned long off;
+
+    off = *offset;
 
     /* No more data */
-    if ( offset >= size )
-        return 0;
-
-    if ( buf_pos[offset] != UCODE_UCODE_TYPE )
+    if ( off >= size )
+        return 1;
+
+    if ( bufp[off] != UCODE_UCODE_TYPE )
     {
         printk(KERN_ERR "microcode: error! "
                "Wrong microcode payload type field\n");
         return -EINVAL;
     }
 
-    mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]);
-
-    total_size = (unsigned long) (buf_pos[offset+4] +
-                                  (buf_pos[offset+5] << 8));
+    mc_header = (struct microcode_header_amd *)(&bufp[off+8]);
+
+    total_size = (unsigned long) (bufp[off+4] + (bufp[off+5] << 8));
 
     printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
-           size, total_size, offset);
-
-    if ( (offset + total_size) > size )
+           (unsigned long)size, total_size, off);
+
+    if ( (off + total_size) > size )
     {
         printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
         return -EINVAL;
     }
 
-    *mc = xmalloc_bytes(UCODE_MAX_SIZE);
-    if ( *mc == NULL )
-    {
-        printk(KERN_ERR "microcode: error! "
-               "Can not allocate memory for microcode patch\n");
-        return -ENOMEM;
-    }
-
-    memset(*mc, 0, UCODE_MAX_SIZE);
-    memcpy(*mc, (const void *)(buf + offset + 8), total_size);
-
-    return offset + total_size + 8;
-}
-
-static long install_equiv_cpu_table(const void *buf,
-                                    uint32_t size, long offset)
+    memset(mc, 0, UCODE_MAX_SIZE);
+    memcpy(mc, (const void *)(&bufp[off + 8]), total_size);
+
+    *offset = off + total_size + 8;
+
+    return 0;
+}
+
+static int install_equiv_cpu_table(const void *buf, uint32_t size,
+                                   unsigned long *offset)
 {
     const uint32_t *buf_pos = buf;
+    unsigned long off;
+
+    off = *offset;
+    *offset = 0;
 
     /* No more data */
-    if ( offset >= size )
-        return 0;
+    if ( off >= size )
+        return -EINVAL;
 
     if ( buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE )
     {
         printk(KERN_ERR "microcode: error! "
-               "Wrong microcode equivalnet cpu table type field\n");
-        return 0;
+               "Wrong microcode equivalent cpu table type field\n");
+        return -EINVAL;
     }
 
     if ( size == 0 )
     {
         printk(KERN_ERR "microcode: error! "
                "Wrong microcode equivalnet cpu table length\n");
-        return 0;
+        return -EINVAL;
     }
 
     equiv_cpu_table = xmalloc_bytes(size);
@@ -291,20 +236,24 @@ static long install_equiv_cpu_table(cons
     {
         printk(KERN_ERR "microcode: error, can't allocate "
                "memory for equiv CPU table\n");
-        return 0;
+        return -ENOMEM;
     }
 
     memset(equiv_cpu_table, 0, size);
     memcpy(equiv_cpu_table, (const void *)&buf_pos[3], size);
 
-    return size + 12; /* add header length */
+    *offset = size + 12;       /* add header length */
+
+    return 0;
 }
 
 static int cpu_request_microcode(int cpu, const void *buf, size_t size)
 {
     const uint32_t *buf_pos;
-    long offset = 0;
+    unsigned long offset = 0;
     int error = 0;
+    int ret;
+    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
     void *mc;
 
     /* We should bind the task to the CPU */
@@ -319,41 +268,63 @@ static int cpu_request_microcode(int cpu
         return -EINVAL;
     }
 
-    offset = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), offset);
-    if ( !offset )
+    error = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), &offset);
+    if ( error )
     {
         printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
         return -EINVAL;
     }
 
-    while ( (offset =
-             get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0 )
-    {
-        error = get_matching_microcode(mc, cpu);
-        if ( error < 0 )
+    mc = xmalloc_bytes(UCODE_MAX_SIZE);
+    if ( mc == NULL )
+    {
+        printk(KERN_ERR "microcode: error! "
+               "Can not allocate memory for microcode patch\n");
+        error = -ENOMEM;
+        goto out;
+    }
+
+    /* implicitely validates uci->mc.mc_valid */
+    uci->mc.mc_amd = mc;
+
+    /*
+     * It's possible the data file has multiple matching ucode,
+     * lets keep searching till the latest version
+     */
+    while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) == 
0)
+    {
+        error = microcode_fits(mc, cpu);
+        if (error != 0)
+            continue;
+
+        error = apply_microcode(cpu);
+        if (error == 0)
             break;
-        /*
-         * It's possible the data file has multiple matching ucode,
-         * lets keep searching till the latest version
-         */
-        if ( error == 1 )
-            error = apply_microcode(cpu);
+    }
+
+    /* On success keep the microcode patch for
+     * re-apply on resume.
+     */
+    if (error) {
         xfree(mc);
-    }
-    if ( offset > 0 )
-    {
-        xfree(mc);
-        xfree(equiv_cpu_table);
-        equiv_cpu_table = NULL;
-    }
-    if ( offset < 0 )
-        error = offset;
+        mc = NULL;
+    }
+    uci->mc.mc_amd = mc;
+
+out:
+    xfree(equiv_cpu_table);
+    equiv_cpu_table = NULL;
 
     return error;
 }
 
+static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
+{
+    return 0;
+}
+
 static struct microcode_ops microcode_amd_ops = {
-    .get_matching_microcode           = get_matching_microcode,
+    .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/microcode_intel.c
--- a/xen/arch/x86/microcode_intel.c    Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/microcode_intel.c    Wed Jan 28 13:06:45 2009 +0900
@@ -64,6 +64,8 @@ static int collect_cpu_info(int cpu_num,
     struct cpuinfo_x86 *c = &cpu_data[cpu_num];
     unsigned int val[2];
 
+    BUG_ON(cpu_num != smp_processor_id());
+
     memset(csig, 0, sizeof(*csig));
 
     if ( (c->x86_vendor != X86_VENDOR_INTEL) || (c->x86 < 6) ||
@@ -323,6 +325,7 @@ static int cpu_request_microcode(int cpu
     long offset = 0;
     int error = 0;
     void *mc;
+    unsigned int matching_count = 0;
 
     /* We should bind the task to the CPU */
     BUG_ON(cpu != raw_smp_processor_id());
@@ -341,7 +344,7 @@ static int cpu_request_microcode(int cpu
          */
         if ( error == 1 )
         {
-            apply_microcode(cpu);
+            matching_count++;
             error = 0;
         }
         xfree(mc);
@@ -351,11 +354,22 @@ static int cpu_request_microcode(int cpu
     if ( offset < 0 )
         error = offset;
 
+    if ( !error && matching_count )
+        apply_microcode(cpu);
+
     return error;
 }
 
+static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
+{
+    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+    return (sigmatch(nsig->sig, uci->cpu_sig.sig, nsig->pf, uci->cpu_sig.pf) &&
+            (uci->cpu_sig.rev > nsig->rev));
+}
+
 static struct microcode_ops microcode_intel_ops = {
-    .get_matching_microcode           = get_matching_microcode,
+    .microcode_resume_match           = microcode_resume_match,
     .cpu_request_microcode            = cpu_request_microcode,
     .collect_cpu_info                 = collect_cpu_info,
     .apply_microcode                  = apply_microcode,
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm.c Wed Jan 28 13:06:45 2009 +0900
@@ -205,11 +205,6 @@ void __init init_frametable(void)
     }
 
     memset(frame_table, 0, nr_pages << PAGE_SHIFT);
-
-#if defined(__x86_64__)
-    for ( i = 0; i < max_page; i ++ )
-        spin_lock_init(&frame_table[i].lock);
-#endif
 }
 
 void __init arch_init_memory(void)
@@ -290,15 +285,16 @@ void __init arch_init_memory(void)
     subarch_init_memory();
 }
 
-int memory_is_conventional_ram(paddr_t p)
-{
+int page_is_conventional_ram(unsigned long mfn)
+{
+    uint64_t maddr = pfn_to_paddr(mfn);
     int i;
 
     for ( i = 0; i < e820.nr_map; i++ )
     {
         if ( (e820.map[i].type == E820_RAM) &&
-             (e820.map[i].addr <= p) &&
-             (e820.map[i].size > p) )
+             (e820.map[i].addr <= maddr) &&
+             ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) )
             return 1;
     }
 
@@ -329,7 +325,7 @@ void share_xen_page_with_guest(
 
     page_set_owner(page, d);
     wmb(); /* install valid domain ptr before updating refcnt. */
-    ASSERT(page->count_info == 0);
+    ASSERT((page->count_info & ~PGC_xen_heap) == 0);
 
     /* Only add to the allocation list if the domain isn't dying. */
     if ( !d->is_dying )
@@ -738,8 +734,8 @@ get_page_from_l1e(
     else if ( pte_flags_to_cacheattr(l1f) !=
               ((page->count_info >> PGC_cacheattr_base) & 7) )
     {
-        uint32_t x, nx, y = page->count_info;
-        uint32_t cacheattr = pte_flags_to_cacheattr(l1f);
+        unsigned long x, nx, y = page->count_info;
+        unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
 
         if ( is_xen_heap_page(page) )
         {
@@ -1013,7 +1009,8 @@ static int put_page_from_l2e(l2_pgentry_
     {
         unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
         int writeable = l2e_get_flags(l2e) & _PAGE_RW;
-        ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1)));
+
+        ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
         do {
             put_data_page(mfn_to_page(m), writeable);
         } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
@@ -1031,14 +1028,28 @@ static int put_page_from_l3e(l3_pgentry_
 static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
                              int partial, int preemptible)
 {
-    if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 
-         (l3e_get_pfn(l3e) != pfn) )
-    {
-        if ( unlikely(partial > 0) )
-            return __put_page_type(l3e_get_page(l3e), preemptible);
-        return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
-    }
-    return 1;
+    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
+        return 1;
+
+#ifdef __x86_64__
+    if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
+    {
+        unsigned long mfn = l3e_get_pfn(l3e);
+        int writeable = l3e_get_flags(l3e) & _PAGE_RW;
+
+        ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
+        do {
+            put_data_page(mfn_to_page(mfn), writeable);
+        } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
+
+        return 0;
+    }
+#endif
+
+    if ( unlikely(partial > 0) )
+        return __put_page_type(l3e_get_page(l3e), preemptible);
+
+    return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
 }
 
 #if CONFIG_PAGING_LEVELS >= 4
@@ -1523,24 +1534,31 @@ static int free_l4_table(struct page_inf
 #define free_l4_table(page, preemptible) (-EINVAL)
 #endif
 
-static void page_lock(struct page_info *page)
-{
-#if defined(__i386__)
-    while ( unlikely(test_and_set_bit(_PGC_locked, &page->count_info)) )
-        while ( test_bit(_PGC_locked, &page->count_info) )
+static int page_lock(struct page_info *page)
+{
+    unsigned long x, nx;
+
+    do {
+        while ( (x = page->u.inuse.type_info) & PGT_locked )
             cpu_relax();
-#else
-    spin_lock(&page->lock);
-#endif
+        nx = x + (1 | PGT_locked);
+        if ( !(x & PGT_validated) ||
+             !(x & PGT_count_mask) ||
+             !(nx & PGT_count_mask) )
+            return 0;
+    } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
+
+    return 1;
 }
 
 static void page_unlock(struct page_info *page)
 {
-#if defined(__i386__)
-    clear_bit(_PGC_locked, &page->count_info);
-#else
-    spin_unlock(&page->lock);
-#endif
+    unsigned long x, nx, y = page->u.inuse.type_info;
+
+    do {
+        x = y;
+        nx = x - (1 | PGT_locked);
+    } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
 }
 
 /* How to write an entry to the guest pagetables.
@@ -1603,19 +1621,15 @@ static int mod_l1_entry(l1_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     unsigned long mfn;
-    struct page_info *l1pg = mfn_to_page(gl1mfn);
     p2m_type_t p2mt;
     int rc = 1;
 
-    page_lock(l1pg);
-
     if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
-        return page_unlock(l1pg), 0;
+        return 0;
 
     if ( unlikely(paging_mode_refcounts(d)) )
     {
         rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
-        page_unlock(l1pg);
         return rc;
     }
 
@@ -1624,13 +1638,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
         /* Translate foreign guest addresses. */
         mfn = mfn_x(gfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e), &p2mt));
         if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
-            return page_unlock(l1pg), 0;
+            return 0;
         ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
         nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
 
         if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
         {
-            page_unlock(l1pg);
             MEM_LOG("Bad L1 flags %x",
                     l1e_get_flags(nl1e) & l1_disallow_mask(d));
             return 0;
@@ -1642,12 +1655,11 @@ static int mod_l1_entry(l1_pgentry_t *pl
             adjust_guest_l1e(nl1e, d);
             rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
                               preserve_ad);
-            page_unlock(l1pg);
             return rc;
         }
 
         if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
-            return page_unlock(l1pg), 0;
+            return 0;
         
         adjust_guest_l1e(nl1e, d);
         if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
@@ -1660,11 +1672,9 @@ static int mod_l1_entry(l1_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l1pg);
         return 0;
     }
 
-    page_unlock(l1pg);
     put_page_from_l1e(ol1e, d);
     return rc;
 }
@@ -1674,13 +1684,13 @@ static int mod_l2_entry(l2_pgentry_t *pl
 static int mod_l2_entry(l2_pgentry_t *pl2e, 
                         l2_pgentry_t nl2e, 
                         unsigned long pfn,
-                        unsigned long type,
                         int preserve_ad)
 {
     l2_pgentry_t ol2e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     struct page_info *l2pg = mfn_to_page(pfn);
+    unsigned long type = l2pg->u.inuse.type_info;
     int rc = 1;
 
     if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
@@ -1689,16 +1699,13 @@ static int mod_l2_entry(l2_pgentry_t *pl
         return 0;
     }
 
-    page_lock(l2pg);
-
     if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
-        return page_unlock(l2pg), 0;
+        return 0;
 
     if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
     {
         if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
         {
-            page_unlock(l2pg);
             MEM_LOG("Bad L2 flags %x",
                     l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
             return 0;
@@ -1709,12 +1716,11 @@ static int mod_l2_entry(l2_pgentry_t *pl
         {
             adjust_guest_l2e(nl2e, d);
             rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
-            page_unlock(l2pg);
             return rc;
         }
 
         if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
-            return page_unlock(l2pg), 0;
+            return 0;
 
         adjust_guest_l2e(nl2e, d);
         if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
@@ -1727,11 +1733,9 @@ static int mod_l2_entry(l2_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l2pg);
         return 0;
     }
 
-    page_unlock(l2pg);
     put_page_from_l2e(ol2e, pfn);
     return rc;
 }
@@ -1746,7 +1750,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
     l3_pgentry_t ol3e;
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
-    struct page_info *l3pg = mfn_to_page(pfn);
     int rc = 0;
 
     if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
@@ -1762,16 +1765,13 @@ static int mod_l3_entry(l3_pgentry_t *pl
     if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
         return -EINVAL;
 
-    page_lock(l3pg);
-
     if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
-        return page_unlock(l3pg), -EFAULT;
+        return -EFAULT;
 
     if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
     {
         if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
         {
-            page_unlock(l3pg);
             MEM_LOG("Bad L3 flags %x",
                     l3e_get_flags(nl3e) & l3_disallow_mask(d));
             return -EINVAL;
@@ -1782,13 +1782,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
         {
             adjust_guest_l3e(nl3e, d);
             rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
-            page_unlock(l3pg);
             return rc ? 0 : -EFAULT;
         }
 
         rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
         if ( unlikely(rc < 0) )
-            return page_unlock(l3pg), rc;
+            return rc;
         rc = 0;
 
         adjust_guest_l3e(nl3e, d);
@@ -1802,7 +1801,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l3pg);
         return -EFAULT;
     }
 
@@ -1814,7 +1812,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
         pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
     }
 
-    page_unlock(l3pg);
     put_page_from_l3e(ol3e, pfn, 0, 0);
     return rc;
 }
@@ -1831,7 +1828,6 @@ static int mod_l4_entry(l4_pgentry_t *pl
     struct vcpu *curr = current;
     struct domain *d = curr->domain;
     l4_pgentry_t ol4e;
-    struct page_info *l4pg = mfn_to_page(pfn);
     int rc = 0;
 
     if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
@@ -1840,16 +1836,13 @@ static int mod_l4_entry(l4_pgentry_t *pl
         return -EINVAL;
     }
 
-    page_lock(l4pg);
-
     if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
-        return page_unlock(l4pg), -EFAULT;
+        return -EFAULT;
 
     if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
     {
         if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
         {
-            page_unlock(l4pg);
             MEM_LOG("Bad L4 flags %x",
                     l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
             return -EINVAL;
@@ -1860,13 +1853,12 @@ static int mod_l4_entry(l4_pgentry_t *pl
         {
             adjust_guest_l4e(nl4e, d);
             rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
-            page_unlock(l4pg);
             return rc ? 0 : -EFAULT;
         }
 
         rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
         if ( unlikely(rc < 0) )
-            return page_unlock(l4pg), rc;
+            return rc;
         rc = 0;
 
         adjust_guest_l4e(nl4e, d);
@@ -1880,11 +1872,9 @@ static int mod_l4_entry(l4_pgentry_t *pl
     else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
                                      preserve_ad)) )
     {
-        page_unlock(l4pg);
         return -EFAULT;
     }
 
-    page_unlock(l4pg);
     put_page_from_l4e(ol4e, pfn, 0, 0);
     return rc;
 }
@@ -1893,7 +1883,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
 
 void put_page(struct page_info *page)
 {
-    u32 nx, x, y = page->count_info;
+    unsigned long nx, x, y = page->count_info;
 
     do {
         x  = y;
@@ -1911,36 +1901,30 @@ void put_page(struct page_info *page)
 
 int get_page(struct page_info *page, struct domain *domain)
 {
-    u32 x, nx, y = page->count_info;
-    u32 d, nd = page->u.inuse._domain;
-    u32 _domain = pickle_domptr(domain);
+    unsigned long x, y = page->count_info;
 
     do {
-        x  = y;
-        nx = x + 1;
-        d  = nd;
+        x = y;
         if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
              /* Keep one spare reference to be acquired by get_page_light(). */
-             unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
-             unlikely(d != _domain) )                /* Wrong owner? */
-        {
-            if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
-                gdprintk(XENLOG_INFO,
-                         "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
-                         PRtype_info "\n",
-                         page_to_mfn(page), domain, unpickle_domptr(d),
-                         x, page->u.inuse.type_info);
-            return 0;
-        }
-        asm volatile (
-            LOCK_PREFIX "cmpxchg8b %2"
-            : "=d" (nd), "=a" (y),
-            "=m" (*(volatile u64 *)(&page->count_info))
-            : "0" (d), "1" (x), "c" (d), "b" (nx) );
-    }
-    while ( unlikely(nd != d) || unlikely(y != x) );
-
-    return 1;
+             unlikely(((x + 2) & PGC_count_mask) <= 1) ) /* Overflow? */
+            goto fail;
+    }
+    while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
+
+    if ( likely(page_get_owner(page) == domain) )
+        return 1;
+
+    put_page(page);
+
+ fail:
+    if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
+        gdprintk(XENLOG_INFO,
+                 "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%"
+                 PRtype_info "\n",
+                 page_to_mfn(page), domain, page_get_owner(page),
+                 y, page->u.inuse.type_info);
+    return 0;
 }
 
 /*
@@ -1953,7 +1937,7 @@ int get_page(struct page_info *page, str
  */
 static void get_page_light(struct page_info *page)
 {
-    u32 x, nx, y = page->count_info;
+    unsigned long x, nx, y = page->count_info;
 
     do {
         x  = y;
@@ -1994,7 +1978,7 @@ static int alloc_page_type(struct page_i
         rc = alloc_segdesc_page(page);
         break;
     default:
-        printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 
+        printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n", 
                type, page->u.inuse.type_info,
                page->count_info);
         rc = -EINVAL;
@@ -2018,7 +2002,7 @@ static int alloc_page_type(struct page_i
     {
         ASSERT(rc < 0);
         MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
-                PRtype_info ": caf=%08x taf=%" PRtype_info,
+                PRtype_info ": caf=%08lx taf=%" PRtype_info,
                 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
                 type, page->count_info, page->u.inuse.type_info);
         page->u.inuse.type_info = 0;
@@ -2949,7 +2933,6 @@ int do_mmu_update(
     unsigned int cmd, done = 0;
     struct vcpu *v = current;
     struct domain *d = v->domain;
-    unsigned long type_info;
     struct domain_mmap_cache mapcache;
 
     if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
@@ -3021,24 +3004,9 @@ int do_mmu_update(
                           (unsigned long)(req.ptr & ~PAGE_MASK));
             page = mfn_to_page(mfn);
 
-            switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
+            if ( page_lock(page) )
             {
-            case PGT_l1_page_table:
-            case PGT_l2_page_table:
-            case PGT_l3_page_table:
-            case PGT_l4_page_table:
-            {
-                if ( paging_mode_refcounts(d) )
-                {
-                    MEM_LOG("mmu update on auto-refcounted domain!");
-                    break;
-                }
-
-                if ( unlikely(!get_page_type(
-                    page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
-                    goto not_a_pt;
-
-                switch ( type_info & PGT_type_mask )
+                switch ( page->u.inuse.type_info & PGT_type_mask )
                 {
                 case PGT_l1_page_table:
                 {
@@ -3050,7 +3018,7 @@ int do_mmu_update(
                 case PGT_l2_page_table:
                 {
                     l2_pgentry_t l2e = l2e_from_intpte(req.val);
-                    okay = mod_l2_entry(va, l2e, mfn, type_info,
+                    okay = mod_l2_entry(va, l2e, mfn,
                                         cmd == MMU_PT_UPDATE_PRESERVE_AD);
                 }
                 break;
@@ -3072,31 +3040,23 @@ int do_mmu_update(
                 }
                 break;
 #endif
+                case PGT_writable_page:
+                    perfc_incr(writable_mmu_updates);
+                    okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
+                    break;
                 }
-
-                put_page_type(page);
+                page_unlock(page);
                 if ( rc == -EINTR )
                     rc = -EAGAIN;
             }
-            break;
-
-            default:
-            not_a_pt:
+            else if ( get_page_type(page, PGT_writable_page) )
             {
-                if ( unlikely(!get_page_type(page, PGT_writable_page)) )
-                    break;
-
                 perfc_incr(writable_mmu_updates);
-
                 okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
-
                 put_page_type(page);
             }
-            break;
-            }
 
             unmap_domain_page_with_cache(va, &mapcache);
-
             put_page(page);
             break;
 
@@ -3175,7 +3135,6 @@ static int create_grant_pte_mapping(
     void *va;
     unsigned long gmfn, mfn;
     struct page_info *page;
-    u32 type;
     l1_pgentry_t ol1e;
     struct domain *d = v->domain;
 
@@ -3196,21 +3155,23 @@ static int create_grant_pte_mapping(
     va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
     page = mfn_to_page(mfn);
 
-    type = page->u.inuse.type_info & PGT_type_mask;
-    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
-    {
-        MEM_LOG("Grant map attempted to update a non-L1 page");
+    if ( !page_lock(page) )
+    {
         rc = GNTST_general_error;
         goto failed;
     }
 
-    page_lock(page);
+    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(page);
+        rc = GNTST_general_error;
+        goto failed;
+    }
 
     ol1e = *(l1_pgentry_t *)va;
     if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
     {
         page_unlock(page);
-        put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     } 
@@ -3220,8 +3181,6 @@ static int create_grant_pte_mapping(
     if ( !paging_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
-    put_page_type(page);
- 
  failed:
     unmap_domain_page(va);
     put_page(page);
@@ -3236,7 +3195,6 @@ static int destroy_grant_pte_mapping(
     void *va;
     unsigned long gmfn, mfn;
     struct page_info *page;
-    u32 type;
     l1_pgentry_t ol1e;
 
     gmfn = addr >> PAGE_SHIFT;
@@ -3252,15 +3210,18 @@ static int destroy_grant_pte_mapping(
     va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
     page = mfn_to_page(mfn);
 
-    type = page->u.inuse.type_info & PGT_type_mask;
-    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
-    {
-        MEM_LOG("Grant map attempted to update a non-L1 page");
+    if ( !page_lock(page) )
+    {
         rc = GNTST_general_error;
         goto failed;
     }
 
-    page_lock(page);
+    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(page);
+        rc = GNTST_general_error;
+        goto failed;
+    }
 
     ol1e = *(l1_pgentry_t *)va;
     
@@ -3270,7 +3231,6 @@ static int destroy_grant_pte_mapping(
         page_unlock(page);
         MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
                 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
-        put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     }
@@ -3284,13 +3244,11 @@ static int destroy_grant_pte_mapping(
     {
         page_unlock(page);
         MEM_LOG("Cannot delete PTE entry at %p", va);
-        put_page_type(page);
         rc = GNTST_general_error;
         goto failed;
     }
 
     page_unlock(page);
-    put_page_type(page);
 
  failed:
     unmap_domain_page(va);
@@ -3318,21 +3276,40 @@ static int create_grant_va_mapping(
         MEM_LOG("Could not find L1 PTE for address %lx", va);
         return GNTST_general_error;
     }
+
+    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+    {
+        guest_unmap_l1e(v, pl1e);
+        return GNTST_general_error;
+    }
+
     l1pg = mfn_to_page(gl1mfn);
-    page_lock(l1pg);
+    if ( !page_lock(l1pg) )
+    {
+        put_page(l1pg);
+        guest_unmap_l1e(v, pl1e);
+        return GNTST_general_error;
+    }
+
+    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(l1pg);
+        put_page(l1pg);
+        guest_unmap_l1e(v, pl1e);
+        return GNTST_general_error;
+    }
+
     ol1e = *pl1e;
     okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
+
     page_unlock(l1pg);
+    put_page(l1pg);
     guest_unmap_l1e(v, pl1e);
-    pl1e = NULL;
-
-    if ( !okay )
-            return GNTST_general_error;
-
-    if ( !paging_mode_refcounts(d) )
+
+    if ( okay && !paging_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
-    return GNTST_okay;
+    return okay ? GNTST_okay : GNTST_general_error;
 }
 
 static int replace_grant_va_mapping(
@@ -3350,31 +3327,48 @@ static int replace_grant_va_mapping(
         return GNTST_general_error;
     }
 
+    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+    {
+        rc = GNTST_general_error;
+        goto out;
+    }
+
     l1pg = mfn_to_page(gl1mfn);
-    page_lock(l1pg);
+    if ( !page_lock(l1pg) )
+    {
+        rc = GNTST_general_error;
+        put_page(l1pg);
+        goto out;
+    }
+
+    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        rc = GNTST_general_error;
+        goto unlock_and_out;
+    }
+
     ol1e = *pl1e;
 
     /* Check that the virtual address supplied is actually mapped to frame. */
     if ( unlikely(l1e_get_pfn(ol1e) != frame) )
     {
-        page_unlock(l1pg);
         MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
                 l1e_get_pfn(ol1e), addr, frame);
         rc = GNTST_general_error;
-        goto out;
+        goto unlock_and_out;
     }
 
     /* Delete pagetable entry. */
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
     {
-        page_unlock(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         rc = GNTST_general_error;
-        goto out;
-    }
-
+        goto unlock_and_out;
+    }
+
+ unlock_and_out:
     page_unlock(l1pg);
-
+    put_page(l1pg);
  out:
     guest_unmap_l1e(v, pl1e);
     return rc;
@@ -3436,20 +3430,42 @@ int replace_grant_host_mapping(
         return GNTST_general_error;
     }
 
+    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+    {
+        guest_unmap_l1e(curr, pl1e);
+        return GNTST_general_error;
+    }
+
     l1pg = mfn_to_page(gl1mfn);
-    page_lock(l1pg);
+    if ( !page_lock(l1pg) )
+    {
+        put_page(l1pg);
+        guest_unmap_l1e(curr, pl1e);
+        return GNTST_general_error;
+    }
+
+    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(l1pg);
+        put_page(l1pg);
+        guest_unmap_l1e(curr, pl1e);
+        return GNTST_general_error;
+    }
+
     ol1e = *pl1e;
 
     if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
                                 gl1mfn, curr, 0)) )
     {
         page_unlock(l1pg);
+        put_page(l1pg);
         MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
         guest_unmap_l1e(curr, pl1e);
         return GNTST_general_error;
     }
 
     page_unlock(l1pg);
+    put_page(l1pg);
     guest_unmap_l1e(curr, pl1e);
 
     rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
@@ -3462,49 +3478,47 @@ int steal_page(
 int steal_page(
     struct domain *d, struct page_info *page, unsigned int memflags)
 {
-    u32 _d, _nd, x, y;
+    unsigned long x, y;
 
     spin_lock(&d->page_alloc_lock);
 
+    if ( is_xen_heap_page(page) || (page_get_owner(page) != d) )
+        goto fail;
+
     /*
-     * The tricky bit: atomically release ownership while there is just one 
-     * benign reference to the page (PGC_allocated). If that reference 
-     * disappears then the deallocation routine will safely spin.
+     * We require there is just one reference (PGC_allocated). We temporarily
+     * drop this reference now so that we can safely swizzle the owner.
      */
-    _d  = pickle_domptr(d);
-    _nd = page->u.inuse._domain;
-    y   = page->count_info;
+    y = page->count_info;
     do {
         x = y;
-        if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
-                      (1 | PGC_allocated)) || unlikely(_nd != _d) )
-        { 
-            MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
-                    " caf=%08x, taf=%" PRtype_info "\n", 
-                    (void *) page_to_mfn(page),
-                    d, d->domain_id, unpickle_domptr(_nd), x, 
-                    page->u.inuse.type_info);
-            spin_unlock(&d->page_alloc_lock);
-            return -1;
-        }
-        asm volatile (
-            LOCK_PREFIX "cmpxchg8b %2"
-            : "=d" (_nd), "=a" (y),
-            "=m" (*(volatile u64 *)(&page->count_info))
-            : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
-    } while (unlikely(_nd != _d) || unlikely(y != x));
-
-    /*
-     * Unlink from 'd'. At least one reference remains (now anonymous), so 
-     * noone else is spinning to try to delete this page from 'd'.
-     */
+        if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
+            goto fail;
+        y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
+    } while ( y != x );
+
+    /* Swizzle the owner then reinstate the PGC_allocated reference. */
+    page_set_owner(page, NULL);
+    y = page->count_info;
+    do {
+        x = y;
+        BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
+    } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
+
+    /* Unlink from original owner. */
     if ( !(memflags & MEMF_no_refcount) )
         d->tot_pages--;
     list_del(&page->list);
 
     spin_unlock(&d->page_alloc_lock);
-
     return 0;
+
+ fail:
+    spin_unlock(&d->page_alloc_lock);
+    MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08lx, taf=%" PRtype_info,
+            (void *)page_to_mfn(page), d, d->domain_id,
+            page_get_owner(page), page->count_info, page->u.inuse.type_info);
+    return -1;
 }
 
 int do_update_va_mapping(unsigned long va, u64 val64,
@@ -3513,28 +3527,45 @@ int do_update_va_mapping(unsigned long v
     l1_pgentry_t   val = l1e_from_intpte(val64);
     struct vcpu   *v   = current;
     struct domain *d   = v->domain;
+    struct page_info *gl1pg;
     l1_pgentry_t  *pl1e;
     unsigned long  vmask, bmap_ptr, gl1mfn;
     cpumask_t      pmask;
-    int            rc  = 0;
+    int            rc;
 
     perfc_incr(calls_to_update_va);
-
-    if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
-        return -EINVAL;
 
     rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
     if ( rc )
         return rc;
 
+    rc = -EINVAL;
     pl1e = guest_map_l1e(v, va, &gl1mfn);
-
-    if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
-        rc = -EINVAL;
-
+    if ( unlikely(!pl1e || !get_page_from_pagenr(gl1mfn, d)) )
+        goto out;
+
+    gl1pg = mfn_to_page(gl1mfn);
+    if ( !page_lock(gl1pg) )
+    {
+        put_page(gl1pg);
+        goto out;
+    }
+
+    if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(gl1pg);
+        put_page(gl1pg);
+        goto out;
+    }
+
+    rc = mod_l1_entry(pl1e, val, gl1mfn, 0) ? 0 : -EINVAL;
+
+    page_unlock(gl1pg);
+    put_page(gl1pg);
+
+ out:
     if ( pl1e )
         guest_unmap_l1e(v, pl1e);
-    pl1e = NULL;
 
     process_deferred_ops();
 
@@ -3793,14 +3824,13 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
             spin_unlock(&d->grant_table->lock);
             break;
-        case XENMAPSPACE_mfn:
-        {
-            if ( get_page_from_pagenr(xatp.idx, d) ) {
-                mfn = xatp.idx;
-                page = mfn_to_page(mfn);
-            }
+        case XENMAPSPACE_gmfn:
+            xatp.idx = gmfn_to_mfn(d, xatp.idx);
+            if ( !get_page_from_pagenr(xatp.idx, d) )
+                break;
+            mfn = xatp.idx;
+            page = mfn_to_page(mfn);
             break;
-        }
         default:
             break;
         }
@@ -3839,39 +3869,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
         if ( page )
             put_page(page);
-
-        rcu_unlock_domain(d);
-
-        break;
-    }
-
-    case XENMEM_remove_from_physmap:
-    {
-        struct xen_remove_from_physmap xrfp;
-        unsigned long mfn;
-        struct domain *d;
-
-        if ( copy_from_guest(&xrfp, arg, 1) )
-            return -EFAULT;
-
-        rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
-        if ( rc != 0 )
-            return rc;
-
-        if ( xsm_remove_from_physmap(current->domain, d) )
-        {
-            rcu_unlock_domain(d);
-            return -EPERM;
-        }
-
-        domain_lock(d);
-
-        mfn = gmfn_to_mfn(d, xrfp.gpfn);
-
-        if ( mfn_valid(mfn) )
-            guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
-
-        domain_unlock(d);
 
         rcu_unlock_domain(d);
 
@@ -4245,15 +4242,25 @@ int ptwr_do_page_fault(struct vcpu *v, u
 
     /* Attempt to read the PTE that maps the VA being accessed. */
     guest_get_eff_l1e(v, addr, &pte);
-    page = l1e_get_page(pte);
 
     /* We are looking only for read-only mappings of p.t. pages. */
     if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
-         !mfn_valid(l1e_get_pfn(pte)) ||
-         ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
-         ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
-         (page_get_owner(page) != d) )
+         !get_page_from_pagenr(l1e_get_pfn(pte), d) )
         goto bail;
+
+    page = l1e_get_page(pte);
+    if ( !page_lock(page) )
+    {
+        put_page(page);
+        goto bail;
+    }
+
+    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(page);
+        put_page(page);
+        goto bail;
+    }
 
     ptwr_ctxt.ctxt.regs = regs;
     ptwr_ctxt.ctxt.force_writeback = 0;
@@ -4262,9 +4269,11 @@ int ptwr_do_page_fault(struct vcpu *v, u
     ptwr_ctxt.cr2 = addr;
     ptwr_ctxt.pte = pte;
 
-    page_lock(page);
     rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
+
     page_unlock(page);
+    put_page(page);
+
     if ( rc == X86EMUL_UNHANDLEABLE )
         goto bail;
 
@@ -4741,12 +4750,18 @@ void memguard_init(void)
 void memguard_init(void)
 {
     unsigned long start = max_t(unsigned long, xen_phys_start, 1UL << 20);
+#ifdef __i386__
     map_pages_to_xen(
         (unsigned long)__va(start),
         start >> PAGE_SHIFT,
         (xenheap_phys_end - start) >> PAGE_SHIFT,
         __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
-#ifdef __x86_64__
+#else
+    map_pages_to_xen(
+        (unsigned long)__va(start),
+        start >> PAGE_SHIFT,
+        (__pa(&_end) + PAGE_SIZE - 1 - start) >> PAGE_SHIFT,
+        __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
     BUG_ON(start != xen_phys_start);
     map_pages_to_xen(
         XEN_VIRT_START,
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/Makefile
--- a/xen/arch/x86/mm/Makefile  Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/Makefile  Wed Jan 28 13:06:45 2009 +0900
@@ -7,5 +7,5 @@ obj-y += guest_walk_3.o
 obj-y += guest_walk_3.o
 obj-$(x86_64) += guest_walk_4.o
 
-guest_walk_%.o: guest_walk.c $(HDRS) Makefile
+guest_walk_%.o: guest_walk.c Makefile
        $(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/Makefile
--- a/xen/arch/x86/mm/hap/Makefile      Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/hap/Makefile      Wed Jan 28 13:06:45 2009 +0900
@@ -7,5 +7,5 @@ guest_levels  = $(subst level,,$(filter 
 guest_levels  = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
 guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
 
-guest_walk_%level.o: guest_walk.c $(HDRS) Makefile
+guest_walk_%level.o: guest_walk.c Makefile
        $(CC) $(CFLAGS) $(call guest_walk_defns,$(@F)) -c $< -o $@
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/hap/hap.c Wed Jan 28 13:06:45 2009 +0900
@@ -166,7 +166,7 @@ void hap_free_p2m_page(struct domain *d,
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
     if ( (pg->count_info & PGC_count_mask) != 1 )
-        HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+        HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
                   pg->count_info, pg->u.inuse.type_info);
     pg->count_info = 0;
     /* Free should not decrement domain's total allocation, since
diff -r 4fd4dcf2f891 -r 79f259a26a11 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Wed Jan 28 12:22:58 2009 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Wed Jan 28 13:06:45 2009 +0900
@@ -66,6 +66,7 @@ static int ept_set_middle_entry(struct d
     list_add_tail(&pg->list, &d->arch.p2m->pages);
 
     ept_entry->emt = 0;
+    ept_entry->igmt = 0;
     ept_entry->sp_avail = 0;
     ept_entry->avail1 = 0;
     ept_entry->mfn = page_to_mfn(pg);
@@ -114,9 +115,13 @@ static int ept_next_level(struct domain 
     }
 }
 
+/*
+ * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * by observing whether any gfn->mfn translations are modified.
+ */
 static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
-              unsigned int order, p2m_type_t p2mt)
+_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
+              unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
 {
     ept_entry_t *table = NULL;
     unsigned long gfn_remainder = gfn, offset = 0;
@@ -124,6 +129,8 @@ ept_set_entry(struct domain *d, unsigned
     u32 index;
     int i, rv = 0, ret = 0;
     int walk_level = order / EPT_TABLE_ORDER;
+    int direct_mmio = (p2mt == p2m_mmio_direct);
+    uint8_t igmt = 0;
 
     /* we only support 4k and 2m pages now */
 
@@ -157,7 +164,9 @@ ept_set_entry(struct domain *d, unsigned
     {
         if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
         {
-            ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+            ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+                                &igmt, direct_mmio);
+            ept_entry->igmt = igmt;
             ept_entry->sp_avail = walk_level ? 1 : 0;
 
             if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -208,7 +217,10 @@ ept_set_entry(struct domain *d, unsigned
         {
             split_ept_entry = split_table + i;
             split_ept_entry->emt = epte_get_entry_emt(d,
-                                        gfn-offset+i, split_mfn+i);
+                                        gfn-offset+i, split_mfn+i, 
+                                        &igmt, direct_mmio);
+            split_ept_entry->igmt = igmt;
+
             split_ept_entry->sp_avail =  0;
 
             split_ept_entry->mfn = split_mfn+i;
@@ -223,7 +235,10 @@ ept_set_entry(struct domain *d, unsigned
 
         /* Set the destinated 4k page as normal */
         split_ept_entry = split_table + offset;
-        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn), 
+                                                &igmt, direct_mmio);
+        split_ept_entry->igmt = igmt;
+
         split_ept_entry->mfn = mfn_x(mfn);
         split_ept_entry->avail1 = p2mt;
         ept_p2m_type_to_flags(split_ept_entry, p2mt);
@@ -246,7 +261,8 @@ out:
 
     /* Now the p2m table is not shared with vt-d page table */
 
-    if ( iommu_enabled && is_hvm_domain(d) )
+    if ( iommu_enabled && is_hvm_domain(d)  
+             && need_modify_vtd_table )
     {
         if ( p2mt == p2m_ram_rw )
         {
@@ -271,6 +287,17 @@ out:
     }
 
     return rv;
+}
+
+static int
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+              unsigned int order, p2m_type_t p2mt)
+{
+    /* ept_set_entry() are called from set_entry(),
+     * We should always create VT-d page table acording 
+     * to the gfn to mfn translations changes.
+     */
+    return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); 
 }
 
 /* Read ept p2m entries */
@@ -395,18 +422,30 @@ void ept_change_entry_emt_with_range(str
                  * Set emt for super page.
                  */
                 order = EPT_TABLE_ORDER;
-                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+                /* vmx_set_uc_mode() dont' touch the gfn to mfn
+                 * translations, only modify the emt field of the EPT entries.
+                 * so we need not modify the current VT-d page tables.
+                 */
+                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
                 gfn += 0x1FF;
             }
             else
             {
-                /* change emt for partial entries of the 2m area */
-                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+                /* 1)change emt for partial entries of the 2m area.
+                 * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+                 * translations, only modify the emt field of the EPT entries.
+                 * so we need not modify the current VT-d page tables.
+                 */
+                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
                 gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
             }
         }
-        else /* gfn assigned with 4k */
-            ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+        else /* 1)gfn assigned with 4k
+              * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+              * translations, only modify the emt field of the EPT entries.
+              * so we need not modify the current VT-d page tables.
+             */
+            _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
     }
 }
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>