WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 04 Apr 2007 13:30:19 -0700
Delivery-date: Wed, 04 Apr 2007 13:30:32 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1175627091 21600
# Node ID f378c424e0ced4cbc584e5c6125d065f1cc05d0c
# Parent  fc9e2f7920c95229caaf5ad8fc44965dd891f600
# Parent  7e431ea834a877b1f0c90bdb1e6f1346da4e81cc
merge with xen-unstable.hg
---
 README                                           |   22 
 docs/src/user.tex                                |    4 
 linux-2.6-xen-sparse/arch/ia64/Kconfig           |    9 
 linux-2.6-xen-sparse/drivers/xen/Kconfig         |   16 
 linux-2.6-xen-sparse/drivers/xen/Makefile        |    7 
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c |   38 
 linux-2.6-xen-sparse/drivers/xen/core/Makefile   |    3 
 linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile |    1 
 linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c |  973 +++++++++++++++++++++++
 linux-2.6-xen-sparse/drivers/xen/util.c          |   22 
 linux-2.6-xen-sparse/include/linux/mm.h          |    4 
 linux-2.6-xen-sparse/include/xen/driver_util.h   |    3 
 linux-2.6-xen-sparse/include/xen/public/gntdev.h |  105 ++
 linux-2.6-xen-sparse/mm/memory.c                 |    9 
 tools/blktap/drivers/qcow2raw.c                  |    9 
 tools/examples/xmexample.hvm                     |    4 
 tools/ioemu/target-i386-dm/helper2.c             |   33 
 tools/ioemu/vl.c                                 |    3 
 tools/ioemu/vl.h                                 |    6 
 tools/ioemu/xenstore.c                           |   69 +
 tools/libxc/ia64/xc_ia64_linux_restore.c         |   51 -
 tools/libxc/xc_core.c                            |    4 
 tools/libxc/xc_core_x86.c                        |   12 
 tools/libxc/xc_hvm_restore.c                     |   14 
 tools/libxc/xc_hvm_save.c                        |    7 
 tools/libxc/xc_linux.c                           |  156 +++
 tools/libxc/xc_linux_restore.c                   |   85 +-
 tools/libxc/xc_linux_save.c                      |   66 -
 tools/libxc/xc_resume.c                          |    4 
 tools/libxc/xenctrl.h                            |   59 +
 tools/libxc/xenguest.h                           |    9 
 tools/libxc/xg_private.h                         |    9 
 tools/python/xen/lowlevel/scf/scf.c              |    2 
 tools/python/xen/xend/XendCheckpoint.py          |   13 
 tools/python/xen/xend/XendConfig.py              |    3 
 tools/python/xen/xend/XendDomainInfo.py          |    8 
 tools/python/xen/xend/balloon.py                 |   18 
 tools/python/xen/xend/image.py                   |    3 
 tools/python/xen/xend/osdep.py                   |   50 +
 tools/python/xen/xend/server/SrvServer.py        |    4 
 tools/python/xen/xend/server/relocate.py         |    8 
 tools/python/xen/xm/create.py                    |    6 
 tools/python/xen/xm/main.py                      |    8 
 tools/python/xen/xm/xenapi_create.py             |    1 
 tools/xcutils/xc_restore.c                       |   33 
 tools/xenstat/xentop/xentop.c                    |    2 
 xen/arch/x86/hvm/hvm.c                           |    9 
 xen/arch/x86/hvm/intercept.c                     |   38 
 xen/arch/x86/hvm/io.c                            |   11 
 xen/arch/x86/hvm/platform.c                      |   20 
 xen/arch/x86/hvm/rtc.c                           |    8 
 xen/arch/x86/hvm/svm/vmcb.c                      |   28 
 xen/arch/x86/hvm/vmx/vmcs.c                      |    2 
 xen/arch/x86/hvm/vmx/vmx.c                       |   13 
 xen/arch/x86/mm.c                                |    3 
 xen/arch/x86/mm/hap/hap.c                        |   68 -
 xen/arch/x86/mm/shadow/multi.c                   |    4 
 xen/arch/x86/setup.c                             |    4 
 xen/arch/x86/time.c                              |    2 
 xen/arch/x86/traps.c                             |   17 
 xen/arch/x86/x86_32/traps.c                      |    7 
 xen/arch/x86/x86_64/traps.c                      |   10 
 xen/common/domain.c                              |  121 ++
 xen/common/domctl.c                              |    5 
 xen/common/page_alloc.c                          |   12 
 xen/common/symbols.c                             |   12 
 xen/drivers/char/console.c                       |    8 
 xen/include/asm-x86/domain.h                     |    1 
 xen/include/asm-x86/hvm/io.h                     |    2 
 xen/include/asm-x86/hvm/support.h                |    1 
 xen/include/asm-x86/hvm/vmx/vmcs.h               |    1 
 xen/include/asm-x86/processor.h                  |    8 
 xen/include/asm-x86/time.h                       |    5 
 xen/include/public/hvm/ioreq.h                   |    1 
 xen/include/xen/sched.h                          |   12 
 75 files changed, 2055 insertions(+), 353 deletions(-)

diff -r fc9e2f7920c9 -r f378c424e0ce README
--- a/README    Fri Mar 30 17:18:42 2007 -0600
+++ b/README    Tue Apr 03 13:04:51 2007 -0600
@@ -177,3 +177,25 @@ 5. To rebuild a kernel with a modified c
    an initial ram disk, just like a native system e.g.
     # depmod 2.6.16-xen
     # mkinitrd -v -f --with=aacraid --with=sd_mod --with=scsi_mod 
initrd-2.6.16-xen.img 2.6.16-xen
+
+
+Python Runtime Libraries
+========================
+
+Xend (the Xen daemon) has the following runtime dependencies:
+
+    * Python 2.3 or later.
+      In many distros, the XML-aspects to the standard library
+      (xml.dom.minidom etc) are broken out into a separate python-xml package.
+      This is also required.
+
+          URL:    http://www.python.org/
+          Debian: python, python-xml
+
+    * For optional SSL support, pyOpenSSL:
+          URL:    http://pyopenssl.sourceforge.net/
+          Debian: python-pyopenssl
+
+    * For optional PAM support, PyPAM:
+          URL:    http://www.pangalactic.org/PyPAM/
+          Debian: python-pam
diff -r fc9e2f7920c9 -r f378c424e0ce docs/src/user.tex
--- a/docs/src/user.tex Fri Mar 30 17:18:42 2007 -0600
+++ b/docs/src/user.tex Tue Apr 03 13:04:51 2007 -0600
@@ -3250,6 +3250,10 @@ editing \path{grub.conf}.
 \item [ dma\_emergency\_pool=xxx ] Specify lower bound on size of DMA
   pool below which ordinary allocations will fail rather than fall
   back to allocating from the DMA pool.
+\item [ hap ] Instruct Xen to detect hardware-assisted paging support, such
+  as AMD-V's nested paging or Intel\textregistered VT's extended paging. If 
+  available, Xen will use hardware-assisted paging instead of shadow paging 
+  for guest memory management.
 \end{description}
 
 In addition, the following options may be specified on the Xen command
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/arch/ia64/Kconfig
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig    Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig    Tue Apr 03 13:04:51 2007 -0600
@@ -576,15 +576,6 @@ source "crypto/Kconfig"
 # override default values of drivers/xen/Kconfig
 #
 if XEN
-config XEN_UTIL
-       default n
-
-config XEN_BALLOON
-       default y
-
-config XEN_REBOOT
-       default y
-
 config XEN_SMPBOOT
        default n
 endif
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/Kconfig
--- a/linux-2.6-xen-sparse/drivers/xen/Kconfig  Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig  Tue Apr 03 13:04:51 2007 -0600
@@ -253,22 +253,6 @@ config NO_IDLE_HZ
        bool
        default y
 
-config XEN_UTIL
-       bool
-       default y
-
-config XEN_BALLOON
-       bool
-       default y
-
-config XEN_DEVMEM
-       bool
-       default y
-
-config XEN_REBOOT
-       bool
-       default y
-
 config XEN_SMPBOOT
        bool
        default y
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Tue Apr 03 13:04:51 2007 -0600
@@ -3,10 +3,11 @@ obj-y += evtchn/
 obj-y  += evtchn/
 obj-y  += privcmd/
 obj-y  += xenbus/
+obj-y  += gntdev/
+obj-y  += balloon/
+obj-y  += char/
 
-obj-$(CONFIG_XEN_UTIL)                 += util.o
-obj-$(CONFIG_XEN_BALLOON)              += balloon/
-obj-$(CONFIG_XEN_DEVMEM)               += char/
+obj-y  += util.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri Mar 30 17:18:42 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Tue Apr 03 13:04:51 
2007 -0600
@@ -44,6 +44,7 @@
 #include <asm/hypervisor.h>
 #include "common.h"
 #include <xen/balloon.h>
+#include <xen/driver_util.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
@@ -55,30 +56,6 @@
 
 #define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
 #define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
-
-
-struct class *xen_class;
-EXPORT_SYMBOL_GPL(xen_class);
-
-/*
- * Setup the xen class.  This should probably go in another file, but
- * since blktap is the only user of it so far, it gets to keep it.
- */
-int setup_xen_class(void)
-{
-       int ret;
-
-       if (xen_class)
-               return 0;
-
-       xen_class = class_create(THIS_MODULE, "xen");
-       if ((ret = IS_ERR(xen_class))) {
-               xen_class = NULL;
-               return ret;
-       }
-
-       return 0;
-}
 
 /*
  * The maximum number of requests that can be outstanding at any time
@@ -347,6 +324,7 @@ static const struct file_operations blkt
 
 static tap_blkif_t *get_next_free_dev(void)
 {
+       struct class *class;
        tap_blkif_t *info;
        int minor;
 
@@ -409,9 +387,10 @@ found:
                wmb();
                tapfds[minor] = info;
 
-               class_device_create(xen_class, NULL,
-                                   MKDEV(blktap_major, minor), NULL,
-                                   "blktap%d", minor);
+               if ((class = get_xen_class()) != NULL)
+                       class_device_create(class, NULL,
+                                           MKDEV(blktap_major, minor), NULL,
+                                           "blktap%d", minor);
        }
 
 out:
@@ -1487,6 +1466,7 @@ static int __init blkif_init(void)
 static int __init blkif_init(void)
 {
        int i, ret;
+       struct class *class;
 
        if (!is_running_on_xen())
                return -ENODEV;
@@ -1522,7 +1502,7 @@ static int __init blkif_init(void)
        DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
 
        /* Make sure the xen class exists */
-       if (!setup_xen_class()) {
+       if ((class = get_xen_class()) != NULL) {
                /*
                 * This will allow udev to create the blktap ctrl device.
                 * We only want to create blktap0 first.  We don't want
@@ -1530,7 +1510,7 @@ static int __init blkif_init(void)
                 * We only create the device when a request of a new device is
                 * made.
                 */
-               class_device_create(xen_class, NULL,
+               class_device_create(class, NULL,
                                    MKDEV(blktap_major, 0), NULL,
                                    "blktap0");
        } else {
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Fri Mar 30 17:18:42 
2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Tue Apr 03 13:04:51 
2007 -0600
@@ -2,12 +2,11 @@
 # Makefile for the linux kernel.
 #
 
-obj-y := evtchn.o gnttab.o features.o
+obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o
 
 obj-$(CONFIG_PROC_FS)          += xen_proc.o
 obj-$(CONFIG_SYSFS)            += hypervisor_sysfs.o
 obj-$(CONFIG_HOTPLUG_CPU)      += cpu_hotplug.o
 obj-$(CONFIG_XEN_SYSFS)                += xen_sysfs.o
-obj-$(CONFIG_XEN_REBOOT)       += reboot.o machine_reboot.o
 obj-$(CONFIG_XEN_SMPBOOT)      += smpboot.o
 obj-$(CONFIG_KEXEC)            += machine_kexec.o
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile  Tue Apr 03 13:04:51 
2007 -0600
@@ -0,0 +1,1 @@
+obj-y  := gntdev.o
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c  Tue Apr 03 13:04:51 
2007 -0600
@@ -0,0 +1,973 @@
+/******************************************************************************
+ * gntdev.c
+ * 
+ * Device for accessing (in user-space) pages that have been granted by other
+ * domains.
+ *
+ * Copyright (c) 2006-2007, D G Murray.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <asm/atomic.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <xen/gnttab.h>
+#include <asm/hypervisor.h>
+#include <xen/balloon.h>
+#include <xen/evtchn.h>
+#include <xen/driver_util.h>
+
+#include <linux/types.h>
+#include <xen/public/gntdev.h>
+
+
+#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@xxxxxxxxxxxx>"
+#define DRIVER_DESC   "User-space granted page access driver"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+
+#define MAX_GRANTS 128
+
+/* A slot can be in one of three states:
+ *
+ * 0. GNTDEV_SLOT_INVALID:
+ *    This slot is not associated with a grant reference, and is therefore free
+ *    to be overwritten by a new grant reference.
+ *
+ * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
+ *    This slot is associated with a grant reference (via the 
+ *    IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
+ *
+ * 2. GNTDEV_SLOT_MAPPED:
+ *    This slot is associated with a grant reference, and has been mmap()-ed.
+ */
+typedef enum gntdev_slot_state {
+       GNTDEV_SLOT_INVALID = 0,
+       GNTDEV_SLOT_NOT_YET_MAPPED,
+       GNTDEV_SLOT_MAPPED
+} gntdev_slot_state_t;
+
+#define GNTDEV_INVALID_HANDLE    -1
+#define GNTDEV_FREE_LIST_INVALID -1
+/* Each opened instance of gntdev is associated with a list of grants,
+ * represented by an array of elements of the following type,
+ * gntdev_grant_info_t.
+ */
+typedef struct gntdev_grant_info {
+       gntdev_slot_state_t state;
+       union {
+               uint32_t free_list_index;
+               struct {
+                       domid_t domid;
+                       grant_ref_t ref;
+                       grant_handle_t kernel_handle;
+                       grant_handle_t user_handle;
+                       uint64_t dev_bus_addr;
+               } valid;
+       } u;
+} gntdev_grant_info_t;
+
+/* Private data structure, which is stored in the file pointer for files
+ * associated with this device.
+ */
+typedef struct gntdev_file_private_data {
+  
+       /* Array of grant information. */
+       gntdev_grant_info_t grants[MAX_GRANTS];
+
+       /* Read/write semaphore used to protect the grants array. */
+       struct rw_semaphore grants_sem;
+
+       /* An array of indices of free slots in the grants array.
+        * N.B. An entry in this list may temporarily have the value
+        * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
+        * from the list by the contiguous allocator, but the list has not yet
+        * been compressed. However, this is not visible across invocations of
+        * the device.
+        */
+       int32_t free_list[MAX_GRANTS];
+       
+       /* The number of free slots in the grants array. */
+       uint32_t free_list_size;
+
+       /* Read/write semaphore used to protect the free list. */
+       struct rw_semaphore free_list_sem;
+       
+       /* Index of the next slot after the most recent contiguous allocation, 
+        * for use in a next-fit allocator.
+        */
+       uint32_t next_fit_index;
+
+       /* Used to map grants into the kernel, before mapping them into user
+        * space.
+        */
+       struct page **foreign_pages;
+
+} gntdev_file_private_data_t;
+
+/* Module lifecycle operations. */
+static int __init gntdev_init(void);
+static void __exit gntdev_exit(void);
+
+module_init(gntdev_init);
+module_exit(gntdev_exit);
+
+/* File operations. */
+static int gntdev_open(struct inode *inode, struct file *flip);
+static int gntdev_release(struct inode *inode, struct file *flip);
+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
+static int gntdev_ioctl (struct inode *inode, struct file *flip,
+                        unsigned int cmd, unsigned long arg);
+
+static struct file_operations gntdev_fops = {
+       .owner = THIS_MODULE,
+       .open = gntdev_open,
+       .release = gntdev_release,
+       .mmap = gntdev_mmap,
+       .ioctl = gntdev_ioctl
+};
+
+/* VM operations. */
+static void gntdev_vma_close(struct vm_area_struct *vma);
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+                             pte_t *ptep, int is_fullmm);
+
+static struct vm_operations_struct gntdev_vmops = {
+       .close = gntdev_vma_close,
+       .zap_pte = gntdev_clear_pte
+};
+
+/* Global variables. */
+
+/* The driver major number, for use when unregistering the driver. */
+static int gntdev_major;
+
+#define GNTDEV_NAME "gntdev"
+
+/* Memory mapping functions
+ * ------------------------
+ *
+ * Every granted page is mapped into both kernel and user space, and the two
+ * following functions return the respective virtual addresses of these pages.
+ *
+ * When shadow paging is disabled, the granted page is mapped directly into
+ * user space; when it is enabled, it is mapped into the kernel and remapped
+ * into user space using vm_insert_page() (see gntdev_mmap(), below).
+ */
+
+/* Returns the virtual address (in user space) of the @page_index'th page
+ * in the given VM area.
+ */
+static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
+                                           int page_index)
+{
+       return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
+}
+
+/* Returns the virtual address (in kernel space) of the @slot_index'th page
+ * mapped by the gntdev instance that owns the given private data struct.
+ */
+static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
+                                             int slot_index)
+{
+       unsigned long pfn;
+       void *kaddr;
+       pfn = page_to_pfn(priv->foreign_pages[slot_index]);
+       kaddr = pfn_to_kaddr(pfn);
+       return (unsigned long) kaddr;
+}
+
+/* Helper functions. */
+
+/* Adds information about a grant reference to the list of grants in the file's
+ * private data structure. Returns non-zero on failure. On success, sets the
+ * value of *offset to the offset that should be mmap()-ed in order to map the
+ * grant reference.
+ */
+static int add_grant_reference(struct file *flip,
+                              struct ioctl_gntdev_grant_ref *op,
+                              uint64_t *offset)
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+
+       uint32_t slot_index;
+
+       if (unlikely(private_data->free_list_size == 0)) {
+               return -ENOMEM;
+       }
+
+       slot_index = private_data->free_list[--private_data->free_list_size];
+
+       /* Copy the grant information into file's private data. */
+       private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
+       private_data->grants[slot_index].u.valid.domid = op->domid;
+       private_data->grants[slot_index].u.valid.ref = op->ref;
+
+       /* The offset is calculated as the index of the chosen entry in the
+        * file's private data's array of grant information. This is then
+        * shifted to give an offset into the virtual "file address space".
+        */
+       *offset = slot_index << PAGE_SHIFT;
+
+       return 0;
+}
+
+/* Adds the @count grant references to the contiguous range in the slot array
+ * beginning at @first_slot. It is assumed that @first_slot was returned by a
+ * previous invocation of find_contiguous_free_range(), during the same
+ * invocation of the driver.
+ */
+static int add_grant_references(struct file *flip,
+                               int count,
+                               struct ioctl_gntdev_grant_ref *ops,
+                               uint32_t first_slot)
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       int i;
+       
+       for (i = 0; i < count; ++i) {
+
+               /* First, mark the slot's entry in the free list as invalid. */
+               int free_list_index = 
+                       private_data->grants[first_slot+i].u.free_list_index;
+               private_data->free_list[free_list_index] = 
+                       GNTDEV_FREE_LIST_INVALID;
+
+               /* Now, update the slot. */
+               private_data->grants[first_slot+i].state = 
+                       GNTDEV_SLOT_NOT_YET_MAPPED;
+               private_data->grants[first_slot+i].u.valid.domid =
+                       ops[i].domid;
+               private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
+       }
+
+       return 0;       
+}
+
+/* Scans through the free list for @flip, removing entries that are marked as
+ * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
+ * the number of valid entries.
+ */
+static void compress_free_list(struct file *flip) 
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       int i, j = 0, old_size;
+       
+       old_size = private_data->free_list_size;
+       for (i = 0; i < old_size; ++i) {
+               if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
+                       private_data->free_list[j] = 
+                               private_data->free_list[i];
+                       ++j;
+               } else {
+                       --private_data->free_list_size;
+               }
+       }
+}
+
+/* Searches the grant array in the private data of @flip for a range of
+ * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
+ *
+ * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
+ */
+static int find_contiguous_free_range(struct file *flip,
+                                     uint32_t num_slots) 
+{
+       gntdev_file_private_data_t *private_data 
+               = (gntdev_file_private_data_t *) flip->private_data;
+       
+       int i;
+       int start_index = private_data->next_fit_index;
+       int range_start = 0, range_length;
+
+       if (private_data->free_list_size < num_slots) {
+               return -ENOMEM;
+       }
+
+       /* First search from the start_index to the end of the array. */
+       range_length = 0;
+       for (i = start_index; i < MAX_GRANTS; ++i) {
+               if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+                       if (range_length == 0) {
+                               range_start = i;
+                       }
+                       ++range_length;
+                       if (range_length == num_slots) {
+                               return range_start;
+                       }
+               }
+       }
+       
+       /* Now search from the start of the array to the start_index. */
+       range_length = 0;
+       for (i = 0; i < start_index; ++i) {
+               if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
+                       if (range_length == 0) {
+                               range_start = i;
+                       }
+                       ++range_length;
+                       if (range_length == num_slots) {
+                               return range_start;
+                       }
+               }
+       }
+       
+       return -ENOMEM;
+}
+
+/* Interface functions. */
+
+/* Initialises the driver. Called when the module is loaded. */
+static int __init gntdev_init(void)
+{
+       struct class *class;
+       struct class_device *device;
+
+       if (!is_running_on_xen()) {
+               printk(KERN_ERR "You must be running Xen to use gntdev\n");
+               return -ENODEV;
+       }
+
+       gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
+       if (gntdev_major < 0)
+       {
+               printk(KERN_ERR "Could not register gntdev device\n");
+               return -ENOMEM;
+       }
+
+       /* Note that if the sysfs code fails, we will still initialise the
+        * device, and output the major number so that the device can be
+        * created manually using mknod.
+        */
+       if ((class = get_xen_class()) == NULL) {
+               printk(KERN_ERR "Error setting up xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n", 
+                      gntdev_major);
+               return 0;
+       }
+
+       device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
+                                    NULL, GNTDEV_NAME);
+       if (IS_ERR(device)) {
+               printk(KERN_ERR "Error creating gntdev device in xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n",
+                      gntdev_major);
+               return 0;
+       }
+
+       return 0;
+}
+
+/* Cleans up and unregisters the driver. Called when the driver is unloaded.
+ */
+static void __exit gntdev_exit(void)
+{
+       struct class *class;
+       if ((class = get_xen_class()) != NULL)
+               class_device_destroy(class, MKDEV(gntdev_major, 0));
+       unregister_chrdev(gntdev_major, GNTDEV_NAME);
+}
+
+/* Called when the device is opened. */
+static int gntdev_open(struct inode *inode, struct file *flip)
+{
+       gntdev_file_private_data_t *private_data;
+       int i;
+
+       try_module_get(THIS_MODULE);
+
+       /* Allocate space for the per-instance private data. */
+       private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
+       if (!private_data)
+               goto nomem_out;
+
+       /* Allocate space for the kernel-mapping of granted pages. */
+       private_data->foreign_pages = 
+               alloc_empty_pages_and_pagevec(MAX_GRANTS);
+       if (!private_data->foreign_pages)
+               goto nomem_out2;
+
+       /* Initialise the free-list, which contains all slots at first.
+        */
+       for (i = 0; i < MAX_GRANTS; ++i) {
+               private_data->free_list[MAX_GRANTS - i - 1] = i;
+               private_data->grants[i].state = GNTDEV_SLOT_INVALID;
+               private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1;
+       }
+       private_data->free_list_size = MAX_GRANTS;
+       private_data->next_fit_index = 0;
+
+       init_rwsem(&private_data->grants_sem);
+       init_rwsem(&private_data->free_list_sem);
+
+       flip->private_data = private_data;
+
+       return 0;
+
+nomem_out2:
+       kfree(private_data);
+nomem_out:
+       return -ENOMEM;
+}
+
+/* Called when the device is closed.
+ */
+static int gntdev_release(struct inode *inode, struct file *flip)
+{
+       if (flip->private_data) {
+               gntdev_file_private_data_t *private_data = 
+                       (gntdev_file_private_data_t *) flip->private_data;
+               if (private_data->foreign_pages) {
+                       free_empty_pages_and_pagevec
+                               (private_data->foreign_pages, MAX_GRANTS);
+               }
+               kfree(private_data);
+       }
+       module_put(THIS_MODULE);
+       return 0;
+}
+
+/* Called when an attempt is made to mmap() the device. The private data from
+ * @flip contains the list of grant references that can be mapped. The vm_pgoff
+ * field of @vma contains the index into that list that refers to the grant
+ * reference that will be mapped. Only mappings that are a multiple of
+ * PAGE_SIZE are handled.
+ */
+static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) 
+{
+       struct gnttab_map_grant_ref op;
+       unsigned long slot_index = vma->vm_pgoff;
+       unsigned long kernel_vaddr, user_vaddr;
+       uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+       uint64_t ptep;
+       int ret;
+       int flags;
+       int i;
+       struct page *page;
+       gntdev_file_private_data_t *private_data = flip->private_data;
+
+       if (unlikely(!private_data)) {
+               printk(KERN_ERR "File's private data is NULL.\n");
+               return -EINVAL;
+       }
+
+       if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) {
+               printk(KERN_ERR "Invalid number of pages or offset"
+                      "(num_pages = %d, first_slot = %ld).\n",
+                      size, slot_index);
+               return -ENXIO;
+       }
+
+       if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
+               printk(KERN_ERR "Writable mappings must be shared.\n");
+               return -EINVAL;
+       }
+
+       /* Slots must be in the NOT_YET_MAPPED state. */
+       down_write(&private_data->grants_sem);
+       for (i = 0; i < size; ++i) {
+               if (private_data->grants[slot_index + i].state != 
+                   GNTDEV_SLOT_NOT_YET_MAPPED) {
+                       printk(KERN_ERR "Slot (index = %ld) is in the wrong "
+                              "state (%d).\n", slot_index + i, 
+                              private_data->grants[slot_index + i].state);
+                       up_write(&private_data->grants_sem);
+                       return -EINVAL;
+               }
+       }
+
+       /* Install the hook for unmapping. */
+       vma->vm_ops = &gntdev_vmops;
+    
+       /* The VM area contains pages from another VM. */
+       vma->vm_flags |= VM_FOREIGN;
+       vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *), 
+                                      GFP_KERNEL);
+       if (vma->vm_private_data == NULL) {
+               printk(KERN_ERR "Couldn't allocate mapping structure for VM "
+                      "area.\n");
+               return -ENOMEM;
+       }
+
+       /* This flag prevents Bad PTE errors when the memory is unmapped. */
+       vma->vm_flags |= VM_RESERVED;
+
+       /* This flag prevents this VM area being copied on a fork(). A better
+        * behaviour might be to explicitly carry out the appropriate mappings
+        * on fork(), but I don't know if there's a hook for this.
+        */
+       vma->vm_flags |= VM_DONTCOPY;
+
+#ifdef CONFIG_X86
+       /* This flag ensures that the page tables are not unpinned before the
+        * VM area is unmapped. Therefore Xen still recognises the PTE as
+        * belonging to an L1 pagetable, and the grant unmap operation will
+        * succeed, even if the process does not exit cleanly.
+        */
+       vma->vm_mm->context.has_foreign_mappings = 1;
+#endif
+
+       for (i = 0; i < size; ++i) {
+
+               flags = GNTMAP_host_map;
+               if (!(vma->vm_flags & VM_WRITE))
+                       flags |= GNTMAP_readonly;
+
+               kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
+               user_vaddr = get_user_vaddr(vma, i);
+               page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
+
+               gnttab_set_map_op(&op, kernel_vaddr, flags,   
+                                 private_data->grants[slot_index+i]
+                                 .u.valid.ref, 
+                                 private_data->grants[slot_index+i]
+                                 .u.valid.domid);
+
+               /* Carry out the mapping of the grant reference. */
+               ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 
+                                               &op, 1);
+               BUG_ON(ret);
+               if (op.status) {
+                       printk(KERN_ERR "Error mapping the grant reference "
+                              "into the kernel (%d). domid = %d; ref = %d\n",
+                              op.status,
+                              private_data->grants[slot_index+i]
+                              .u.valid.domid,
+                              private_data->grants[slot_index+i]
+                              .u.valid.ref);
+                       goto undo_map_out;
+               }
+
+               /* Store a reference to the page that will be mapped into user
+                * space.
+                */
+               ((struct page **) vma->vm_private_data)[i] = page;
+
+               /* Mark mapped page as reserved. */
+               SetPageReserved(page);
+
+               /* Record the grant handle, for use in the unmap operation. */
+               private_data->grants[slot_index+i].u.valid.kernel_handle = 
+                       op.handle;
+               private_data->grants[slot_index+i].u.valid.dev_bus_addr = 
+                       op.dev_bus_addr;
+               
+               private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
+               private_data->grants[slot_index+i].u.valid.user_handle =
+                       GNTDEV_INVALID_HANDLE;
+
+               /* Now perform the mapping to user space. */
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+
+                       /* NOT USING SHADOW PAGE TABLES. */
+                       /* In this case, we map the grant(s) straight into user
+                        * space.
+                        */
+
+                       /* Get the machine address of the PTE for the user 
+                        *  page.
+                        */
+                       if ((ret = create_lookup_pte_addr(vma->vm_mm, 
+                                                         vma->vm_start 
+                                                         + (i << PAGE_SHIFT), 
+                                                         &ptep)))
+                       {
+                               printk(KERN_ERR "Error obtaining PTE pointer "
+                                      "(%d).\n", ret);
+                               goto undo_map_out;
+                       }
+                       
+                       /* Configure the map operation. */
+               
+                       /* The reference is to be used by host CPUs. */
+                       flags = GNTMAP_host_map;
+                       
+                       /* Specifies a user space mapping. */
+                       flags |= GNTMAP_application_map;
+                       
+                       /* The map request contains the machine address of the
+                        * PTE to update.
+                        */
+                       flags |= GNTMAP_contains_pte;
+                       
+                       if (!(vma->vm_flags & VM_WRITE))
+                               flags |= GNTMAP_readonly;
+
+                       gnttab_set_map_op(&op, ptep, flags, 
+                                         private_data->grants[slot_index+i]
+                                         .u.valid.ref, 
+                                         private_data->grants[slot_index+i]
+                                         .u.valid.domid);
+
+                       /* Carry out the mapping of the grant reference. */
+                       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+                                                       &op, 1);
+                       BUG_ON(ret);
+                       if (op.status) {
+                               printk(KERN_ERR "Error mapping the grant "
+                                      "reference into user space (%d). domid "
+                                      "= %d; ref = %d\n", op.status,
+                                      private_data->grants[slot_index+i].u
+                                      .valid.domid,
+                                      private_data->grants[slot_index+i].u
+                                      .valid.ref);
+                               goto undo_map_out;
+                       }
+                       
+                       /* Record the grant handle, for use in the unmap 
+                        * operation. 
+                        */
+                       private_data->grants[slot_index+i].u.
+                               valid.user_handle = op.handle;
+
+                       /* Update p2m structure with the new mapping. */
+                       set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
+                                           FOREIGN_FRAME(private_data->
+                                                         grants[slot_index+i]
+                                                         .u.valid.dev_bus_addr
+                                                         >> PAGE_SHIFT));
+               } else {
+                       /* USING SHADOW PAGE TABLES. */
+                       /* In this case, we simply insert the page into the VM
+                        * area. */
+                       ret = vm_insert_page(vma, user_vaddr, page);
+               }
+
+       }
+
+       up_write(&private_data->grants_sem);
+       return 0;
+
+undo_map_out:
+       /* If we have a mapping failure, the unmapping will be taken care of
+        * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
+        * All we need to do here is free the vma_private_data.
+        */
+       kfree(vma->vm_private_data);
+
+       /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+        * to NULL on failure. However, we need this in gntdev_clear_pte() to
+        * unmap the grants. Therefore, we smuggle a reference to the file's
+        * private data in the VM area's private data pointer.
+        */
+       vma->vm_private_data = private_data;
+       
+       up_write(&private_data->grants_sem);
+
+       return -ENOMEM;
+}
+
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
+                             pte_t *ptep, int is_fullmm)
+{
+       int slot_index, ret;
+       pte_t copy;
+       struct gnttab_unmap_grant_ref op;
+       gntdev_file_private_data_t *private_data;
+
+       /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
+        * to NULL on failure. However, we need this in gntdev_clear_pte() to
+        * unmap the grants. Therefore, we smuggle a reference to the file's
+        * private data in the VM area's private data pointer.
+        */
+       if (vma->vm_file) {
+               private_data = (gntdev_file_private_data_t *)
+                       vma->vm_file->private_data;
+       } else if (vma->vm_private_data) {
+               private_data = (gntdev_file_private_data_t *)
+                       vma->vm_private_data;
+       } else {
+               private_data = NULL; /* gcc warning */
+               BUG();
+       }
+
+       /* Copy the existing value of the PTE for returning. */
+       copy = *ptep;
+
+       /* Calculate the grant relating to this PTE. */
+       slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+
+       /* Only unmap grants if the slot has been mapped. This could be being
+        * called from a failing mmap().
+        */
+       if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
+
+               /* First, we clear the user space mapping, if it has been made.
+                */
+               if (private_data->grants[slot_index].u.valid.user_handle !=
+                   GNTDEV_INVALID_HANDLE && 
+                   !xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* NOT USING SHADOW PAGE TABLES. */
+                       gnttab_set_unmap_op(&op, virt_to_machine(ptep), 
+                                           GNTMAP_contains_pte,
+                                           private_data->grants[slot_index]
+                                           .u.valid.user_handle);
+                       ret = HYPERVISOR_grant_table_op(
+                               GNTTABOP_unmap_grant_ref, &op, 1);
+                       BUG_ON(ret);
+                       if (op.status)
+                               printk("User unmap grant status = %d\n", 
+                                      op.status);
+               } else {
+                       /* USING SHADOW PAGE TABLES. */
+                       pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+               }
+
+               /* Finally, we unmap the grant from kernel space. */
+               gnttab_set_unmap_op(&op, 
+                                   get_kernel_vaddr(private_data, slot_index),
+                                   GNTMAP_host_map, 
+                                   private_data->grants[slot_index].u.valid
+                                   .kernel_handle);
+               ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 
+                                               &op, 1);
+               BUG_ON(ret);
+               if (op.status)
+                       printk("Kernel unmap grant status = %d\n", op.status);
+
+
+               /* Return slot to the not-yet-mapped state, so that it may be
+                * mapped again, or removed by a subsequent ioctl.
+                */
+               private_data->grants[slot_index].state = 
+                       GNTDEV_SLOT_NOT_YET_MAPPED;
+
+               /* Invalidate the physical to machine mapping for this page. */
+               set_phys_to_machine(__pa(get_kernel_vaddr(private_data, 
+                                                         slot_index)) 
+                                   >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+
+       } else {
+               pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+       }
+
+       return copy;
+}
+
+/* "Destructor" for a VM area.
+ */
+static void gntdev_vma_close(struct vm_area_struct *vma) {
+       if (vma->vm_private_data) {
+               kfree(vma->vm_private_data);
+       }
+}
+
+/* Called when an ioctl is made on the device.
+ */
+static int gntdev_ioctl(struct inode *inode, struct file *flip,
+                       unsigned int cmd, unsigned long arg)
+{
+       int rc = 0;
+       gntdev_file_private_data_t *private_data = 
+               (gntdev_file_private_data_t *) flip->private_data;
+
+       switch (cmd) {
+       case IOCTL_GNTDEV_MAP_GRANT_REF:
+       {
+               struct ioctl_gntdev_map_grant_ref op;
+               down_write(&private_data->grants_sem);
+               down_write(&private_data->free_list_sem);
+
+               if ((rc = copy_from_user(&op, (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto map_out;
+               }
+               if (unlikely(op.count <= 0)) {
+                       rc = -EINVAL;
+                       goto map_out;
+               }
+
+               if (op.count == 1) {
+                       if ((rc = add_grant_reference(flip, &op.refs[0],
+                                                     &op.index)) < 0) {
+                               printk(KERN_ERR "Adding grant reference "
+                                      "failed (%d).\n", rc);
+                               goto map_out;
+                       }
+               } else {
+                       struct ioctl_gntdev_grant_ref *refs, *u;
+                       refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
+                       if (!refs) {
+                               rc = -ENOMEM;
+                               goto map_out;
+                       }
+                       u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
+                       if ((rc = copy_from_user(refs,
+                                                (void __user *)u,
+                                                sizeof(*refs) * op.count))) {
+                               printk(KERN_ERR "Copying refs from user failed"
+                                      " (%d).\n", rc);
+                               rc = -EINVAL;
+                               goto map_out;
+                       }
+                       if ((rc = find_contiguous_free_range(flip, op.count))
+                           < 0) {
+                               printk(KERN_ERR "Finding contiguous range "
+                                      "failed (%d).\n", rc);
+                               kfree(refs);
+                               goto map_out;
+                       }
+                       op.index = rc << PAGE_SHIFT;
+                       if ((rc = add_grant_references(flip, op.count,
+                                                      refs, rc))) {
+                               printk(KERN_ERR "Adding grant references "
+                                      "failed (%d).\n", rc);
+                               kfree(refs);
+                               goto map_out;
+                       }
+                       compress_free_list(flip);
+                       kfree(refs);
+               }
+               if ((rc = copy_to_user((void __user *) arg, 
+                                      &op, 
+                                      sizeof(op)))) {
+                       printk(KERN_ERR "Copying result back to user failed "
+                              "(%d)\n", rc);
+                       rc = -EFAULT;
+                       goto map_out;
+               }
+       map_out:
+               up_write(&private_data->grants_sem);
+               up_write(&private_data->free_list_sem);
+               return rc;
+       }
+       case IOCTL_GNTDEV_UNMAP_GRANT_REF:
+       {
+               struct ioctl_gntdev_unmap_grant_ref op;
+               int i, start_index;
+
+               down_write(&private_data->grants_sem);
+               down_write(&private_data->free_list_sem);
+
+               if ((rc = copy_from_user(&op, 
+                                        (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto unmap_out;
+               }
+
+               start_index = op.index >> PAGE_SHIFT;
+
+               /* First, check that all pages are in the NOT_YET_MAPPED
+                * state.
+                */
+               for (i = 0; i < op.count; ++i) {
+                       if (unlikely
+                           (private_data->grants[start_index + i].state
+                            != GNTDEV_SLOT_NOT_YET_MAPPED)) {
+                               if (private_data->grants[start_index + i].state
+                                   == GNTDEV_SLOT_INVALID) {
+                                       printk(KERN_ERR
+                                              "Tried to remove an invalid "
+                                              "grant at offset 0x%x.",
+                                              (start_index + i) 
+                                              << PAGE_SHIFT);
+                                       rc = -EINVAL;
+                               } else {
+                                       printk(KERN_ERR
+                                              "Tried to remove a grant which "
+                                              "is currently mmap()-ed at "
+                                              "offset 0x%x.",
+                                              (start_index + i) 
+                                              << PAGE_SHIFT);
+                                       rc = -EBUSY;
+                               }
+                               goto unmap_out;
+                       }
+               }
+
+               /* Unmap pages and add them to the free list.
+                */
+               for (i = 0; i < op.count; ++i) {
+                       private_data->grants[start_index+i].state = 
+                               GNTDEV_SLOT_INVALID;
+                       private_data->grants[start_index+i].u.free_list_index =
+                               private_data->free_list_size;
+                       private_data->free_list[private_data->free_list_size] =
+                               start_index + i;
+                       ++private_data->free_list_size;
+               }
+               compress_free_list(flip);
+
+       unmap_out:
+               up_write(&private_data->grants_sem);
+               up_write(&private_data->free_list_sem);
+               return rc;
+       }
+       case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
+       {
+               struct ioctl_gntdev_get_offset_for_vaddr op;
+               struct vm_area_struct *vma;
+               unsigned long vaddr;
+
+               if ((rc = copy_from_user(&op, 
+                                        (void __user *) arg, 
+                                        sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto get_offset_out;
+               }
+               vaddr = (unsigned long)op.vaddr;
+
+               down_read(&current->mm->mmap_sem);              
+               vma = find_vma(current->mm, vaddr);
+               if (vma == NULL) {
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
+                       printk(KERN_ERR "The vaddr specified does not belong "
+                              "to a gntdev instance: %#lx\n", vaddr);
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               if (vma->vm_start != vaddr) {
+                       printk(KERN_ERR "The vaddr specified in an "
+                              "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
+                              "the start of the VM area. vma->vm_start = "
+                              "%#lx; vaddr = %#lx\n",
+                              vma->vm_start, vaddr);
+                       rc = -EFAULT;
+                       goto get_offset_unlock_out;
+               }
+               op.offset = vma->vm_pgoff << PAGE_SHIFT;
+               op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+               up_read(&current->mm->mmap_sem);
+               if ((rc = copy_to_user((void __user *) arg, 
+                                      &op, 
+                                      sizeof(op)))) {
+                       rc = -EFAULT;
+                       goto get_offset_out;
+               }
+               goto get_offset_out;
+       get_offset_unlock_out:
+               up_read(&current->mm->mmap_sem);
+       get_offset_out:
+               return rc;
+       }
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       return 0;
+}
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/drivers/xen/util.c
--- a/linux-2.6-xen-sparse/drivers/xen/util.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/util.c   Tue Apr 03 13:04:51 2007 -0600
@@ -4,6 +4,26 @@
 #include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 #include <xen/driver_util.h>
+
+struct class *get_xen_class(void)
+{
+       static struct class *xen_class;
+
+       if (xen_class)
+               return xen_class;
+
+       xen_class = class_create(THIS_MODULE, "xen");
+       if (IS_ERR(xen_class)) {
+               printk("Failed to create xen sysfs class.\n");
+               xen_class = NULL;
+       }
+
+       return xen_class;
+}
+EXPORT_SYMBOL_GPL(get_xen_class);
+
+/* Todo: merge ia64 ('auto-translate physmap') versions of these functions. */
+#ifndef __ia64__
 
 static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
 {
@@ -46,3 +66,5 @@ void free_vm_area(struct vm_struct *area
        kfree(area);
 }
 EXPORT_SYMBOL_GPL(free_vm_area);
+
+#endif /* !__ia64__ */
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/include/linux/mm.h
--- a/linux-2.6-xen-sparse/include/linux/mm.h   Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/include/linux/mm.h   Tue Apr 03 13:04:51 2007 -0600
@@ -205,6 +205,10 @@ struct vm_operations_struct {
        /* notification that a previously read-only page is about to become
         * writable, if an error is returned it will cause a SIGBUS */
        int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
+       /* Area-specific function for clearing the PTE at @ptep. Returns the
+        * original value of @ptep. */
+       pte_t (*zap_pte)(struct vm_area_struct *vma, 
+                        unsigned long addr, pte_t *ptep, int is_fullmm);
 #ifdef CONFIG_NUMA
        int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
        struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/include/xen/driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/driver_util.h    Fri Mar 30 17:18:42 
2007 -0600
+++ b/linux-2.6-xen-sparse/include/xen/driver_util.h    Tue Apr 03 13:04:51 
2007 -0600
@@ -3,9 +3,12 @@
 #define __ASM_XEN_DRIVER_UTIL_H__
 
 #include <linux/vmalloc.h>
+#include <linux/device.h>
 
 /* Allocate/destroy a 'vmalloc' VM area. */
 extern struct vm_struct *alloc_vm_area(unsigned long size);
 extern void free_vm_area(struct vm_struct *area);
 
+extern struct class *get_xen_class(void);
+
 #endif /* __ASM_XEN_DRIVER_UTIL_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce 
linux-2.6-xen-sparse/include/xen/public/gntdev.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/public/gntdev.h  Tue Apr 03 13:04:51 
2007 -0600
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * gntdev.h
+ * 
+ * Interface to /dev/xen/gntdev.
+ * 
+ * Copyright (c) 2007, D G Murray
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTDEV_H__
+#define __LINUX_PUBLIC_GNTDEV_H__
+
+struct ioctl_gntdev_grant_ref {
+       /* The domain ID of the grant to be mapped. */
+       uint32_t domid;
+       /* The grant reference of the grant to be mapped. */
+       uint32_t ref;
+};
+
+/*
+ * Inserts the grant references into the mapping table of an instance
+ * of gntdev. N.B. This does not perform the mapping, which is deferred
+ * until mmap() is called with @index as the offset.
+ */
+#define IOCTL_GNTDEV_MAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
+struct ioctl_gntdev_map_grant_ref {
+       /* IN parameters */
+       /* The number of grants to be mapped. */
+       uint32_t count;
+       uint32_t pad;
+       /* OUT parameters */
+       /* The offset to be used on a subsequent call to mmap(). */
+       uint64_t index;
+       /* Variable IN parameter. */
+       /* Array of grant references, of size @count. */
+       struct ioctl_gntdev_grant_ref refs[1];
+};
+
+/*
+ * Removes the grant references from the mapping table of an instance of
+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
+ * before this ioctl is called, or an error will result.
+ */
+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
+struct ioctl_gntdev_unmap_grant_ref {
+       /* IN parameters */
+       /* The offset was returned by the corresponding map operation. */
+       uint64_t index;
+       /* The number of pages to be unmapped. */
+       uint32_t count;
+       uint32_t pad;
+};
+
+/*
+ * Returns the offset in the driver's address space that corresponds
+ * to @vaddr. This can be used to perform a munmap(), followed by an
+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
+ * the caller. The number of pages that were allocated at the same time as
+ * @vaddr is returned in @count.
+ *
+ * N.B. Where more than one page has been mapped into a contiguous range, the
+ *      supplied @vaddr must correspond to the start of the range; otherwise
+ *      an error will result. It is only possible to munmap() the entire
+ *      contiguously-allocated range at once, and not any subrange thereof.
+ */
+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
+struct ioctl_gntdev_get_offset_for_vaddr {
+       /* IN parameters */
+       /* The virtual address of the first mapped page in a range. */
+       uint64_t vaddr;
+       /* OUT parameters */
+       /* The offset that was used in the initial mmap() operation. */
+       uint64_t offset;
+       /* The number of pages mapped in the VM area that begins at @vaddr. */
+       uint32_t count;
+       uint32_t pad;
+};
+
+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c  Fri Mar 30 17:18:42 2007 -0600
+++ b/linux-2.6-xen-sparse/mm/memory.c  Tue Apr 03 13:04:51 2007 -0600
@@ -659,8 +659,12 @@ static unsigned long zap_pte_range(struc
                                     page->index > details->last_index))
                                        continue;
                        }
-                       ptent = ptep_get_and_clear_full(mm, addr, pte,
-                                                       tlb->fullmm);
+                       if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
+                               ptent = vma->vm_ops->zap_pte(vma, addr, pte,
+                                                            tlb->fullmm);
+                       else
+                               ptent = ptep_get_and_clear_full(mm, addr, pte,
+                                                               tlb->fullmm);
                        tlb_remove_tlb_entry(tlb, pte, addr);
                        if (unlikely(!page))
                                continue;
@@ -755,6 +759,7 @@ static unsigned long unmap_page_range(st
                details = NULL;
 
        BUG_ON(addr >= end);
+
        tlb_start_vma(tlb, vma);
        pgd = pgd_offset(vma->vm_mm, addr);
        do {
diff -r fc9e2f7920c9 -r f378c424e0ce tools/blktap/drivers/qcow2raw.c
--- a/tools/blktap/drivers/qcow2raw.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/blktap/drivers/qcow2raw.c   Tue Apr 03 13:04:51 2007 -0600
@@ -51,7 +51,6 @@
 #define BLOCK_PROCESSSZ 4096
 
 static int maxfds, *qcowio_fd, *aio_fd, running = 1, complete = 0; 
-static int read_complete = 0, write_complete = 0;
 static int returned_read_events = 0, returned_write_events = 0;
 static int submit_events = 0;
 static uint32_t read_idx = 0, write_idx = 0;
@@ -109,8 +108,6 @@ static int send_write_responses(struct d
        written += BLOCK_PROCESSSZ;
        returned_write_events++;
        write_idx = idx;
-       if (complete && (returned_write_events == submit_events)) 
-               write_complete = 1;
 
        debug_output(written, dd->td_state->size << 9);
        free(private);
@@ -126,8 +123,6 @@ static int send_read_responses(struct di
        
        returned_read_events++;
        read_idx = idx;
-       if (complete && (returned_read_events == submit_events)) 
-               read_complete = 1;
        
        ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, 
private, 
                                        send_write_responses, idx, private);
@@ -136,7 +131,7 @@ static int send_read_responses(struct di
                return 0;
        }
 
-       if ( (complete && returned_read_events == submit_events) || 
+       if ( (returned_read_events == submit_events) || 
             (returned_read_events % 10 == 0) ) {
                ddaio.drv->td_submit(&ddaio);
        }
@@ -299,6 +294,7 @@ int main(int argc, char *argv[])
                        }
                
                        /*Attempt to read 4k sized blocks*/
+                       submit_events++;
                        ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9,
                                                        BLOCK_PROCESSSZ>>9, 
buf, 
                                                        send_read_responses, 
i>>9, buf);
@@ -309,7 +305,6 @@ int main(int argc, char *argv[])
                                exit(-1);
                        } else {
                                i += BLOCK_PROCESSSZ;
-                               submit_events++;
                        }
 
                        if (i >= ddqcow.td_state->size<<9) {
diff -r fc9e2f7920c9 -r f378c424e0ce tools/examples/xmexample.hvm
--- a/tools/examples/xmexample.hvm      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/examples/xmexample.hvm      Tue Apr 03 13:04:51 2007 -0600
@@ -180,6 +180,10 @@ serial='pty'
 
 
 #-----------------------------------------------------------------------------
+#    set the real time clock offset in seconds [default=0 i.e. same as dom0]
+#rtc_timeoffset=3600
+
+#-----------------------------------------------------------------------------
 #    start in full screen
 #full-screen=1   
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c      Tue Apr 03 13:04:51 2007 -0600
@@ -73,6 +73,8 @@ int vcpus = 1;
 int vcpus = 1;
 
 int xc_handle;
+
+long time_offset = 0;
 
 shared_iopage_t *shared_page = NULL;
 
@@ -439,6 +441,34 @@ void cpu_ioreq_xor(CPUState *env, ioreq_
     req->data = tmp1;
 }
 
+void timeoffset_get()
+{
+    char *p;
+
+    p = xenstore_vm_read(domid, "rtc/timeoffset", NULL);
+    if (!p)
+       return;
+
+    if (sscanf(p, "%ld", &time_offset) == 1)
+       fprintf(logfile, "Time offset set %ld\n", time_offset);
+    else
+       time_offset = 0;
+
+    xc_domain_set_time_offset(xc_handle, domid, time_offset);
+
+    free(p);
+}
+
+void cpu_ioreq_timeoffset(CPUState *env, ioreq_t *req)
+{
+    char b[64];
+
+    time_offset += (ulong)req->data;
+
+    sprintf(b, "%ld", time_offset);
+    xenstore_vm_write(domid, "rtc/timeoffset", b);
+}
+
 void cpu_ioreq_xchg(CPUState *env, ioreq_t *req)
 {
     unsigned long tmp1;
@@ -478,6 +508,9 @@ void __handle_ioreq(CPUState *env, ioreq
     case IOREQ_TYPE_XCHG:
         cpu_ioreq_xchg(env, req);
         break;
+    case IOREQ_TYPE_TIMEOFFSET:
+       cpu_ioreq_timeoffset(env, req);
+       break;
     default:
         hw_error("Invalid ioreq type 0x%x\n", req->type);
     }
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/vl.c  Tue Apr 03 13:04:51 2007 -0600
@@ -6670,6 +6670,9 @@ int main(int argc, char **argv)
     }
     free(page_array);
 #endif
+
+    timeoffset_get();
+
 #else  /* !CONFIG_DM */
 
     phys_ram_base = qemu_vmalloc(phys_ram_size);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/vl.h
--- a/tools/ioemu/vl.h  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/vl.h  Tue Apr 03 13:04:51 2007 -0600
@@ -1276,6 +1276,12 @@ int xenstore_unsubscribe_from_hotplug_st
                                              const char *inst,
                                              const char *token);
 
+int xenstore_vm_write(int domid, char *key, char *val);
+char *xenstore_vm_read(int domid, char *key, int *len);
+
+/* helper2.c */
+extern long time_offset;
+void timeoffset_get(void);
 
 /* xen_platform.c */
 void pci_xen_platform_init(PCIBus *bus);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/ioemu/xenstore.c
--- a/tools/ioemu/xenstore.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/ioemu/xenstore.c    Tue Apr 03 13:04:51 2007 -0600
@@ -567,3 +567,72 @@ int xenstore_unsubscribe_from_hotplug_st
 
     return rc;
 }
+
+char *xenstore_vm_read(int domid, char *key, int *len)
+{
+    char *buf = NULL, *path = NULL, *value = NULL;
+
+    if (xsh == NULL)
+       goto out;
+
+    path = xs_get_domain_path(xsh, domid);
+    if (path == NULL) {
+       fprintf(logfile, "xs_get_domain_path(%d): error\n", domid);
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/vm", path);
+    free(path);
+    path = xs_read(xsh, XBT_NULL, buf, NULL);
+    if (path == NULL) {
+       fprintf(logfile, "xs_read(%s): read error\n", buf);
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/%s", path, key);
+    value = xs_read(xsh, XBT_NULL, buf, len);
+    if (value == NULL) {
+       fprintf(logfile, "xs_read(%s): read error\n", buf);
+       goto out;
+    }
+
+ out:
+    free(path);
+    free(buf);
+    return value;
+}
+
+int xenstore_vm_write(int domid, char *key, char *value)
+{
+    char *buf = NULL, *path = NULL;
+    int rc = -1;
+
+    if (xsh == NULL)
+       goto out;
+
+    path = xs_get_domain_path(xsh, domid);
+    if (path == NULL) {
+       fprintf(logfile, "xs_get_domain_path(%d): error\n");
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/vm", path);
+    free(path);
+    path = xs_read(xsh, XBT_NULL, buf, NULL);
+    if (path == NULL) {
+       fprintf(logfile, "xs_read(%s): read error\n", buf);
+       goto out;
+    }
+
+    pasprintf(&buf, "%s/%s", path, key);
+    rc = xs_write(xsh, XBT_NULL, buf, value, strlen(value));
+    if (rc) {
+       fprintf(logfile, "xs_write(%s, %s): write error\n", buf, key);
+       goto out;
+    }
+
+ out:
+    free(path);
+    free(buf);
+    return rc;
+}
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/ia64/xc_ia64_linux_restore.c
--- a/tools/libxc/ia64/xc_ia64_linux_restore.c  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/ia64/xc_ia64_linux_restore.c  Tue Apr 03 13:04:51 2007 -0600
@@ -14,8 +14,14 @@
 
 #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
 
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
+static unsigned long nr_pfns;
+
+/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
+static unsigned long max_nr_pfns;
 
 static ssize_t
 read_exact(int fd, void *buf, size_t count)
@@ -57,9 +63,9 @@ read_page(int xc_handle, int io_fd, uint
 
 int
 xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
-                 unsigned long nr_pfns, unsigned int store_evtchn,
-                 unsigned long *store_mfn, unsigned int console_evtchn,
-                 unsigned long *console_mfn)
+                 unsigned long p2msize, unsigned long maxnrpfns,
+                 unsigned int store_evtchn, unsigned long *store_mfn,
+                 unsigned int console_evtchn, unsigned long *console_mfn)
 {
     DECLARE_DOMCTL;
     int rc = 1, i;
@@ -79,10 +85,13 @@ xc_linux_restore(int xc_handle, int io_f
     /* A temporary mapping of the guest's start_info page. */
     start_info_t *start_info;
 
-    max_pfn = nr_pfns;
-
-    DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn);
-
+    p2m_size = p2msize;
+    max_nr_pfns = maxnrpfns;
+
+    /* For info only */
+    nr_pfns = 0;
+
+    DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
 
     if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
        ERROR("Error when reading version");
@@ -99,29 +108,29 @@ xc_linux_restore(int xc_handle, int io_f
         return 1;
     }
 
-    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
         errno = ENOMEM;
         goto out;
     }
 
     /* Get pages.  */
-    page_array = malloc(max_pfn * sizeof(unsigned long));
+    page_array = malloc(p2m_size * sizeof(unsigned long));
     if (page_array == NULL) {
         ERROR("Could not allocate memory");
         goto out;
     }
 
-    for ( i = 0; i < max_pfn; i++ )
+    for ( i = 0; i < p2m_size; i++ )
         page_array[i] = i;
 
-    if ( xc_domain_memory_populate_physmap(xc_handle, dom, max_pfn,
+    if ( xc_domain_memory_populate_physmap(xc_handle, dom, p2m_size,
                                            0, 0, page_array) )
     {
         ERROR("Failed to allocate memory for %ld KB to dom %d.\n",
-              PFN_TO_KB(max_pfn), dom);
-        goto out;
-    }
-    DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(max_pfn));
+              PFN_TO_KB(p2m_size), dom);
+        goto out;
+    }
+    DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(p2m_size));
 
     if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) 
{
         ERROR("read: domain setup");
@@ -131,9 +140,9 @@ xc_linux_restore(int xc_handle, int io_f
     /* Build firmware (will be overwritten).  */
     domctl.domain = (domid_t)dom;
     domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query;
-    domctl.u.arch_setup.bp = ((nr_pfns - 3) << PAGE_SHIFT)
+    domctl.u.arch_setup.bp = ((p2m_size - 3) << PAGE_SHIFT)
                            + sizeof (start_info_t);
-    domctl.u.arch_setup.maxmem = (nr_pfns - 3) << PAGE_SHIFT;
+    domctl.u.arch_setup.maxmem = (p2m_size - 3) << PAGE_SHIFT;
     
     domctl.cmd = XEN_DOMCTL_arch_setup;
     if (xc_domctl(xc_handle, &domctl))
@@ -157,8 +166,6 @@ xc_linux_restore(int xc_handle, int io_f
         }
        if (gmfn == INVALID_MFN)
                break;
-
-       //DPRINTF("xc_linux_restore: page %lu/%lu at %lx\n", gmfn, max_pfn, 
pfn);
 
        if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
                goto out;
@@ -281,7 +288,7 @@ xc_linux_restore(int xc_handle, int io_f
     /* Uncanonicalise the suspend-record frame number and poke resume rec. */
     start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
                                       PROT_READ | PROT_WRITE, gmfn);
-    start_info->nr_pages = max_pfn;
+    start_info->nr_pages = p2m_size;
     start_info->shared_info = shared_info_frame << PAGE_SHIFT;
     start_info->flags = 0;
     *store_mfn = start_info->store_mfn;
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_core.c     Tue Apr 03 13:04:51 2007 -0600
@@ -312,7 +312,7 @@ xc_domain_dumpcore_via_callback(int xc_h
 
     int auto_translated_physmap;
     xen_pfn_t *p2m = NULL;
-    unsigned long max_pfn = 0;
+    unsigned long p2m_size = 0;
     struct xen_dumpcore_p2m *p2m_array = NULL;
 
     uint64_t *pfn_array = NULL;
@@ -396,7 +396,7 @@ xc_domain_dumpcore_via_callback(int xc_h
         }
 
         sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
-                                   &p2m, &max_pfn);
+                                   &p2m, &p2m_size);
         if ( sts != 0 )
             goto out;
     }
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_core_x86.c
--- a/tools/libxc/xc_core_x86.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_core_x86.c Tue Apr 03 13:04:51 2007 -0600
@@ -38,7 +38,7 @@ xc_core_arch_memory_map_get(int xc_handl
                             xc_core_memory_map_t **mapp,
                             unsigned int *nr_entries)
 {
-    unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
+    unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
     xc_core_memory_map_t *map;
 
     map = malloc(sizeof(*map));
@@ -49,7 +49,7 @@ xc_core_arch_memory_map_get(int xc_handl
     }
 
     map->addr = 0;
-    map->size = max_pfn << PAGE_SHIFT;
+    map->size = p2m_size << PAGE_SHIFT;
 
     *mapp = map;
     *nr_entries = 1;
@@ -65,13 +65,13 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
     xen_pfn_t *live_p2m_frame_list_list = NULL;
     xen_pfn_t *live_p2m_frame_list = NULL;
     uint32_t dom = info->domid;
-    unsigned long max_pfn = max_gpfn(xc_handle, info->domid);
+    unsigned long p2m_size = max_gpfn(xc_handle, info->domid);
     int ret = -1;
     int err;
 
-    if ( max_pfn < info->nr_pages  )
+    if ( p2m_size < info->nr_pages  )
     {
-        ERROR("max_pfn < nr_pages -1 (%lx < %lx", max_pfn, info->nr_pages - 1);
+        ERROR("p2m_size < nr_pages -1 (%lx < %lx", p2m_size, info->nr_pages - 
1);
         goto out;
     }
 
@@ -106,7 +106,7 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
         goto out;
     }
 
-    *pfnp = max_pfn;
+    *pfnp = p2m_size;
 
     ret = 0;
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_hvm_restore.c
--- a/tools/libxc/xc_hvm_restore.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_hvm_restore.c      Tue Apr 03 13:04:51 2007 -0600
@@ -95,7 +95,7 @@ int xc_hvm_restore(int xc_handle, int io
     unsigned long pfn_array_size = max_pfn + 1;
 
     /* Number of pages of memory the guest has.  *Not* the same as max_pfn. */
-    unsigned long nr_pages = max_pfn + 1;
+    unsigned long nr_pages = max_pfn;
     /* MMIO hole doesn't contain RAM */
     if ( nr_pages >= HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT ) 
         nr_pages -= HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; 
@@ -270,7 +270,6 @@ int xc_hvm_restore(int xc_handle, int io
 
     }/*while 1*/
     
-/*    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);*/
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
 
@@ -279,13 +278,22 @@ int xc_hvm_restore(int xc_handle, int io
     else
         shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
 
+    /* Ensure we clear these pages */
+    if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
+         xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
+         xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) {
+        rc = -1;
+        goto out;
+    }
+
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
 
     /* caculate the store_mfn , wrong val cause hang when introduceDomain */
     *store_mfn = (v_end >> PAGE_SHIFT) - 2;
-    DPRINTF("hvm restore:calculate new store_mfn=0x%lx,v_end=0x%llx..\n", 
*store_mfn, v_end);
+    DPRINTF("hvm restore: calculate new store_mfn=0x%lx, v_end=0x%llx.\n", 
+            *store_mfn, v_end);
 
     if (!read_exact(io_fd, &nr_vcpus, sizeof(uint32_t))) {
         ERROR("error read nr vcpu !\n");
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_hvm_save.c
--- a/tools/libxc/xc_hvm_save.c Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_hvm_save.c Tue Apr 03 13:04:51 2007 -0600
@@ -332,10 +332,10 @@ int xc_hvm_save(int xc_handle, int io_fd
 
     unsigned long total_sent    = 0;
 
-    DPRINTF("xc_hvm_save:dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, 
live=%d, debug=%d.\n",
-            dom, max_iters, max_factor, flags,
+    DPRINTF("xc_hvm_save: dom=%d, max_iters=%d, max_factor=%d, flags=0x%x, "
+            "live=%d, debug=%d.\n", dom, max_iters, max_factor, flags,
             live, debug);
-
+    
     /* If no explicit control parameters given, use defaults */
     if(!max_iters)
         max_iters = DEF_MAX_ITERS;
@@ -382,7 +382,6 @@ int xc_hvm_save(int xc_handle, int io_fd
         ERROR("HVM: Could not read magic PFN parameters");
         goto out;
     }
-
     DPRINTF("saved hvm domain info:max_memkb=0x%lx, max_mfn=0x%lx, "
             "nr_pages=0x%lx\n", info.max_memkb, max_mfn, info.nr_pages); 
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux.c    Tue Apr 03 13:04:51 2007 -0600
@@ -2,6 +2,9 @@
  *
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * xc_gnttab functions:
+ * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -13,6 +16,7 @@
 
 #include <xen/memory.h>
 #include <xen/sys/evtchn.h>
+#include <xen/sys/gntdev.h>
 #include <unistd.h>
 #include <fcntl.h>
 
@@ -361,6 +365,158 @@ void discard_file_cache(int fd, int flus
 
  out:
     errno = saved_errno;
+}
+
+#define GNTTAB_DEV_NAME "/dev/xen/gntdev"
+
+int xc_gnttab_open(void)
+{
+    struct stat st;
+    int fd;
+    int devnum;
+    
+    devnum = xc_find_device_number("gntdev");
+    
+    /* Make sure any existing device file links to correct device. */
+    if ( (lstat(GNTTAB_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
+         (st.st_rdev != devnum) )
+        (void)unlink(GNTTAB_DEV_NAME);
+    
+reopen:
+    if ( (fd = open(GNTTAB_DEV_NAME, O_RDWR)) == -1 )
+    {
+        if ( (errno == ENOENT) &&
+             ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+             (mknod(GNTTAB_DEV_NAME, S_IFCHR|0600, devnum) == 0) )
+            goto reopen;
+        
+        PERROR("Could not open grant table interface");
+        return -1;
+    }
+    
+    return fd;
+}
+
+int xc_gnttab_close(int xcg_handle)
+{
+    return close(xcg_handle);
+}
+
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+                              uint32_t domid,
+                              uint32_t ref,
+                              int prot)
+{
+    struct ioctl_gntdev_map_grant_ref map;
+    void *addr;
+    
+    map.count = 1;
+    map.refs[0].domid = domid;
+    map.refs[0].ref   = ref;
+
+    if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
+        return NULL;
+    
+    addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, xcg_handle, map.index);
+    if ( addr == MAP_FAILED )
+    {
+        int saved_errno = errno;
+        struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+        /* Unmap the driver slots used to store the grant information. */
+        unmap_grant.index = map.index;
+        unmap_grant.count = 1;
+        ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
+        errno = saved_errno;
+        return NULL;
+    }
+    
+    return addr;
+}
+
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+                               uint32_t count,
+                               uint32_t *domids,
+                               uint32_t *refs,
+                               int prot)
+{
+    struct ioctl_gntdev_map_grant_ref *map;
+    void *addr = NULL;
+    int i;
+    
+    map = malloc(sizeof(*map) +
+                 (count-1) * sizeof(struct ioctl_gntdev_map_grant_ref));
+    if ( map == NULL )
+        return NULL;
+
+    for ( i = 0; i < count; i++ )
+    {
+        map->refs[i].domid = domids[i];
+        map->refs[i].ref   = refs[i];
+    }
+
+    map->count = count;
+    
+    if ( ioctl(xcg_handle, IOCTL_GNTDEV_MAP_GRANT_REF, &map) )
+        goto out;
+
+    addr = mmap(NULL, PAGE_SIZE * count, prot, MAP_SHARED, xcg_handle,
+                map->index);
+    if ( addr == MAP_FAILED )
+    {
+        int saved_errno = errno;
+        struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+        /* Unmap the driver slots used to store the grant information. */
+        unmap_grant.index = map->index;
+        unmap_grant.count = count;
+        ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
+        errno = saved_errno;
+        addr = NULL;
+    }
+
+ out:
+    free(map);
+    return addr;
+}
+
+int xc_gnttab_munmap(int xcg_handle,
+                     void *start_address,
+                     uint32_t count)
+{
+    struct ioctl_gntdev_get_offset_for_vaddr get_offset;
+    struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+    int rc;
+
+    if ( start_address == NULL )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    /* First, it is necessary to get the offset which was initially used to
+     * mmap() the pages.
+     */
+    get_offset.vaddr = (unsigned long)start_address;
+    if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR, 
+                     &get_offset)) )
+        return rc;
+
+    if ( get_offset.count != count )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    /* Next, unmap the memory. */
+    if ( (rc = munmap(start_address, count * getpagesize())) )
+        return rc;
+    
+    /* Finally, unmap the driver slots used to store the grant information. */
+    unmap_grant.index = get_offset.offset;
+    unmap_grant.count = count;
+    if ( (rc = ioctl(xcg_handle, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant)) )
+        return rc;
+
+    return 0;
 }
 
 /*
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux_restore.c    Tue Apr 03 13:04:51 2007 -0600
@@ -22,8 +22,14 @@ static unsigned long hvirt_start;
 /* #levels of page tables used by the current guest */
 static unsigned int pt_levels;
 
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
+
+/* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
+static unsigned long nr_pfns;
+
+/* largest possible value of nr_pfns (i.e. domain's maximum memory size) */
+static unsigned long max_nr_pfns;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
 static xen_pfn_t *live_p2m = NULL;
@@ -33,7 +39,6 @@ static xen_pfn_t *p2m = NULL;
 
 /* A table of P2M mappings in the current region */
 static xen_pfn_t *p2m_batch = NULL;
-
 
 static ssize_t
 read_exact(int fd, void *buf, size_t count)
@@ -85,11 +90,11 @@ static int uncanonicalize_pagetable(int 
         
         pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
         
-        if(pfn >= max_pfn) {
+        if(pfn >= p2m_size) {
             /* This "page table page" is probably not one; bail. */
             ERROR("Frame number in type %lu page table is out of range: "
-                  "i=%d pfn=0x%lx max_pfn=%lu",
-                  type >> 28, i, pfn, max_pfn);
+                  "i=%d pfn=0x%lx p2m_size=%lu",
+                  type >> 28, i, pfn, p2m_size);
             return 0;
         }
         
@@ -138,8 +143,9 @@ static int uncanonicalize_pagetable(int 
     return 1;
 }
 
-int xc_linux_restore(int xc_handle, int io_fd,
-                     uint32_t dom, unsigned long nr_pfns,
+
+int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
+                     unsigned long p2msize, unsigned long maxnrpfns,
                      unsigned int store_evtchn, unsigned long *store_mfn,
                      unsigned int console_evtchn, unsigned long *console_mfn)
 {
@@ -191,9 +197,13 @@ int xc_linux_restore(int xc_handle, int 
     unsigned int max_vcpu_id = 0;
     int new_ctxt_format = 0;
 
-    max_pfn = nr_pfns;
-
-    DPRINTF("xc_linux_restore start: max_pfn = %lx\n", max_pfn);
+    p2m_size    = p2msize;
+    max_nr_pfns = maxnrpfns;
+
+    /* For info only */
+    nr_pfns = 0;
+
+    DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size);
 
     /*
      * XXX For now, 32bit dom0's can only save/restore 32bit domUs
@@ -294,8 +304,8 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     /* We want zeroed memory so use calloc rather than malloc. */
-    p2m        = calloc(max_pfn, sizeof(xen_pfn_t));
-    pfn_type   = calloc(max_pfn, sizeof(unsigned long));
+    p2m        = calloc(p2m_size, sizeof(xen_pfn_t));
+    pfn_type   = calloc(p2m_size, sizeof(unsigned long));
     region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
     p2m_batch  = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t));
 
@@ -325,13 +335,13 @@ int xc_linux_restore(int xc_handle, int 
     }
     shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
 
-    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_nr_pfns)) != 0) {
         errno = ENOMEM;
         goto out;
     }
 
     /* Mark all PFNs as invalid; we allocate on demand */
-    for ( pfn = 0; pfn < max_pfn; pfn++ )
+    for ( pfn = 0; pfn < p2m_size; pfn++ )
         p2m[pfn] = INVALID_P2M_ENTRY;
 
     if(!(mmu = xc_init_mmu_updates(xc_handle, dom))) {
@@ -352,7 +362,7 @@ int xc_linux_restore(int xc_handle, int 
 
         int j, nr_mfns = 0; 
 
-        this_pc = (n * 100) / max_pfn;
+        this_pc = (n * 100) / p2m_size;
         if ( (this_pc - prev_pc) >= 5 )
         {
             PPRINTF("\b\b\b\b%3d%%", this_pc);
@@ -436,6 +446,7 @@ int xc_linux_restore(int xc_handle, int 
                 if (p2m[pfn] == INVALID_P2M_ENTRY) {
                     /* We just allocated a new mfn above; update p2m */
                     p2m[pfn] = p2m_batch[nr_mfns++]; 
+                    nr_pfns++; 
                 }
 
                 /* setup region_mfn[] for batch map */
@@ -465,7 +476,7 @@ int xc_linux_restore(int xc_handle, int 
                 /* a bogus/unmapped page: skip it */
                 continue;
 
-            if ( pfn > max_pfn )
+            if ( pfn > p2m_size )
             {
                 ERROR("pfn out of range");
                 goto out;
@@ -518,7 +529,7 @@ int xc_linux_restore(int xc_handle, int 
             else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
             {
                 ERROR("Bogus page type %lx page table is out of range: "
-                    "i=%d max_pfn=%lu", pagetype, i, max_pfn);
+                    "i=%d p2m_size=%lu", pagetype, i, p2m_size);
                 goto out;
 
             }
@@ -598,7 +609,7 @@ int xc_linux_restore(int xc_handle, int 
         int j, k;
         
         /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for ( i = 0; i < max_pfn; i++ )
+        for ( i = 0; i < p2m_size; i++ )
         {
             if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
                   XEN_DOMCTL_PFINFO_L3TAB) &&
@@ -646,7 +657,7 @@ int xc_linux_restore(int xc_handle, int 
         /* Second pass: find all L1TABs and uncanonicalize them */
         j = 0;
 
-        for ( i = 0; i < max_pfn; i++ )
+        for ( i = 0; i < p2m_size; i++ )
         {
             if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
                   XEN_DOMCTL_PFINFO_L1TAB) )
@@ -655,7 +666,7 @@ int xc_linux_restore(int xc_handle, int 
                 j++;
             }
 
-            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
+            if(i == (p2m_size-1) || j == MAX_BATCH_SIZE) {
 
                 if (!(region_base = xc_map_foreign_batch(
                           xc_handle, dom, PROT_READ | PROT_WRITE,
@@ -689,7 +700,7 @@ int xc_linux_restore(int xc_handle, int 
      * will barf when doing the type-checking.
      */
     nr_pins = 0;
-    for ( i = 0; i < max_pfn; i++ )
+    for ( i = 0; i < p2m_size; i++ )
     {
         if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
             continue;
@@ -736,7 +747,7 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("\b\b\b\b100%%\n");
-    DPRINTF("Memory reloaded.\n");
+    DPRINTF("Memory reloaded (%ld pages of max %ld)\n", nr_pfns, max_nr_pfns);
 
     /* Get the list of PFNs that are not in the psuedo-phys map */
     {
@@ -808,7 +819,7 @@ int xc_linux_restore(int xc_handle, int 
              * resume record.
              */
             pfn = ctxt.user_regs.edx;
-            if ((pfn >= max_pfn) ||
+            if ((pfn >= p2m_size) ||
                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
                 ERROR("Suspend record frame number is bad");
                 goto out;
@@ -816,7 +827,7 @@ int xc_linux_restore(int xc_handle, int 
             ctxt.user_regs.edx = mfn = p2m[pfn];
             start_info = xc_map_foreign_range(
                 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
-            start_info->nr_pages = max_pfn;
+            start_info->nr_pages = p2m_size;
             start_info->shared_info = shared_info_frame << PAGE_SHIFT;
             start_info->flags = 0;
             *store_mfn = start_info->store_mfn = p2m[start_info->store_mfn];
@@ -835,7 +846,7 @@ int xc_linux_restore(int xc_handle, int 
 
         for (j = 0; (512*j) < ctxt.gdt_ents; j++) {
             pfn = ctxt.gdt_frames[j];
-            if ((pfn >= max_pfn) ||
+            if ((pfn >= p2m_size) ||
                 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
                 ERROR("GDT frame number is bad");
                 goto out;
@@ -846,16 +857,16 @@ int xc_linux_restore(int xc_handle, int 
         /* Uncanonicalise the page table base pointer. */
         pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]);
 
-        if (pfn >= max_pfn) {
-            ERROR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
-                  pfn, max_pfn, pfn_type[pfn]);
+        if (pfn >= p2m_size) {
+            ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+                  pfn, p2m_size, pfn_type[pfn]);
             goto out;
         }
 
         if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
              ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
             ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                  pfn, max_pfn, pfn_type[pfn],
+                  pfn, p2m_size, pfn_type[pfn],
                   (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
             goto out;
         }
@@ -867,16 +878,16 @@ int xc_linux_restore(int xc_handle, int 
         {
             pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
 
-            if (pfn >= max_pfn) {
-                ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
-                      pfn, max_pfn, pfn_type[pfn]);
+            if (pfn >= p2m_size) {
+                ERROR("User PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
+                      pfn, p2m_size, pfn_type[pfn]);
                 goto out;
             }
 
             if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
                  ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
                 ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
-                      pfn, max_pfn, pfn_type[pfn],
+                      pfn, p2m_size, pfn_type[pfn],
                       (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
                 goto out;
             }
@@ -915,7 +926,7 @@ int xc_linux_restore(int xc_handle, int 
     /* Uncanonicalise the pfn-to-mfn table frame-number list. */
     for (i = 0; i < P2M_FL_ENTRIES; i++) {
         pfn = p2m_frame_list[i];
-        if ((pfn >= max_pfn) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
+        if ((pfn >= p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB)) {
             ERROR("PFN-to-MFN frame number is bad");
             goto out;
         }
@@ -930,8 +941,8 @@ int xc_linux_restore(int xc_handle, int 
         goto out;
     }
 
-    memcpy(live_p2m, p2m, P2M_SIZE);
-    munmap(live_p2m, P2M_SIZE);
+    memcpy(live_p2m, p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
+    munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 
     DPRINTF("Domain ready to be built.\n");
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_linux_save.c       Tue Apr 03 13:04:51 2007 -0600
@@ -25,7 +25,7 @@
 **
 */
 #define DEF_MAX_ITERS   29   /* limit us to 30 times round loop   */
-#define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns   */
+#define DEF_MAX_FACTOR   3   /* never send more than 3x p2m_size  */
 
 
 /* max mfn of the whole machine */
@@ -37,8 +37,8 @@ static unsigned long hvirt_start;
 /* #levels of page tables used by the current guest */
 static unsigned int pt_levels;
 
-/* total number of pages used by the current guest */
-static unsigned long max_pfn;
+/* number of pfns this guest has (i.e. number of entries in the P2M) */
+static unsigned long p2m_size;
 
 /* Live mapping of the table mapping each PFN to its current MFN. */
 static xen_pfn_t *live_p2m = NULL;
@@ -57,7 +57,7 @@ static unsigned long m2p_mfn0;
  */
 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn)          \
 (((_mfn) < (max_mfn)) &&                        \
- ((mfn_to_pfn(_mfn) < (max_pfn)) &&               \
+ ((mfn_to_pfn(_mfn) < (p2m_size)) &&               \
   (live_p2m[mfn_to_pfn(_mfn)] == (_mfn))))
 
 
@@ -79,7 +79,7 @@ static unsigned long m2p_mfn0;
 */
 
 #define BITS_PER_LONG (sizeof(unsigned long) * 8)
-#define BITMAP_SIZE   ((max_pfn + BITS_PER_LONG - 1) / 8)
+#define BITMAP_SIZE   ((p2m_size + BITS_PER_LONG - 1) / 8)
 
 #define BITMAP_ENTRY(_nr,_bmap) \
    ((volatile unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
@@ -343,7 +343,7 @@ static int print_stats(int xc_handle, ui
 }
 
 
-static int analysis_phase(int xc_handle, uint32_t domid, int max_pfn,
+static int analysis_phase(int xc_handle, uint32_t domid, int p2m_size,
                           unsigned long *arr, int runs)
 {
     long long start, now;
@@ -356,7 +356,7 @@ static int analysis_phase(int xc_handle,
         int i;
 
         xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-                          arr, max_pfn, NULL, 0, NULL);
+                          arr, p2m_size, NULL, 0, NULL);
         DPRINTF("#Flush\n");
         for ( i = 0; i < 40; i++ ) {
             usleep(50000);
@@ -682,7 +682,7 @@ int xc_linux_save(int xc_handle, int io_
     /* base of the region in which domain memory is mapped */
     unsigned char *region_base = NULL;
 
-    /* power of 2 order of max_pfn */
+    /* power of 2 order of p2m_size */
     int order_nr;
 
     /* bitmap of pages:
@@ -730,7 +730,7 @@ int xc_linux_save(int xc_handle, int io_
         goto out;
     }
 
-    max_pfn = live_shinfo->arch.max_pfn;
+    p2m_size = live_shinfo->arch.max_pfn;
 
     live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
                                                    live_shinfo);
@@ -777,7 +777,7 @@ int xc_linux_save(int xc_handle, int io_
     memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE);
 
     /* Canonicalise the pfn-to-mfn table frame-number list. */
-    for (i = 0; i < max_pfn; i += fpp) {
+    for (i = 0; i < p2m_size; i += fpp) {
         if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) {
             ERROR("Frame# in pfn-to-mfn frame list is not in pseudophys");
             ERROR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp,
@@ -813,12 +813,12 @@ int xc_linux_save(int xc_handle, int io_
     }
 
     /* pretend we sent all the pages last iteration */
-    sent_last_iter = max_pfn;
-
-
-    /* calculate the power of 2 order of max_pfn, e.g.
+    sent_last_iter = p2m_size;
+
+
+    /* calculate the power of 2 order of p2m_size, e.g.
        15->4 16->4 17->5 */
-    for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
+    for (i = p2m_size-1, order_nr = 0; i ; i >>= 1, order_nr++)
         continue;
 
     /* Setup to_send / to_fix and to_skip bitmaps */
@@ -844,7 +844,7 @@ int xc_linux_save(int xc_handle, int io_
         return 1;
     }
 
-    analysis_phase(xc_handle, dom, max_pfn, to_skip, 0);
+    analysis_phase(xc_handle, dom, p2m_size, to_skip, 0);
 
     /* We want zeroed memory so use calloc rather than malloc. */
     pfn_type   = calloc(MAX_BATCH_SIZE, sizeof(*pfn_type));
@@ -867,7 +867,7 @@ int xc_linux_save(int xc_handle, int io_
     {
         int err=0;
         unsigned long mfn;
-        for (i = 0; i < max_pfn; i++) {
+        for (i = 0; i < p2m_size; i++) {
 
             mfn = live_p2m[i];
             if((mfn != INVALID_P2M_ENTRY) && (mfn_to_pfn(mfn) != i)) {
@@ -882,8 +882,8 @@ int xc_linux_save(int xc_handle, int io_
 
     /* Start writing out the saved-domain record. */
 
-    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
-        ERROR("write: max_pfn");
+    if (!write_exact(io_fd, &p2m_size, sizeof(unsigned long))) {
+        ERROR("write: p2m_size");
         goto out;
     }
 
@@ -929,9 +929,9 @@ int xc_linux_save(int xc_handle, int io_
 
         DPRINTF("Saving memory pages: iter %d   0%%", iter);
 
-        while( N < max_pfn ){
-
-            unsigned int this_pc = (N * 100) / max_pfn;
+        while( N < p2m_size ){
+
+            unsigned int this_pc = (N * 100) / p2m_size;
 
             if ((this_pc - prev_pc) >= 5) {
                 DPRINTF("\b\b\b\b%3d%%", this_pc);
@@ -942,7 +942,7 @@ int xc_linux_save(int xc_handle, int io_
                but this is fast enough for the moment. */
             if (!last_iter && xc_shadow_control(
                     xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK,
-                    to_skip, max_pfn, NULL, 0, NULL) != max_pfn) {
+                    to_skip, p2m_size, NULL, 0, NULL) != p2m_size) {
                 ERROR("Error peeking shadow bitmap");
                 goto out;
             }
@@ -950,9 +950,9 @@ int xc_linux_save(int xc_handle, int io_
 
             /* load pfn_type[] with the mfn of all the pages we're doing in
                this batch. */
-            for (batch = 0; batch < MAX_BATCH_SIZE && N < max_pfn ; N++) {
-
-                int n = permute(N, max_pfn, order_nr);
+            for (batch = 0; batch < MAX_BATCH_SIZE && N < p2m_size ; N++) {
+
+                int n = permute(N, p2m_size, order_nr);
 
                 if (debug) {
                     DPRINTF("%d pfn= %08lx mfn= %08lx %d  [mfn]= %08lx\n",
@@ -1123,7 +1123,7 @@ int xc_linux_save(int xc_handle, int io_
             print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 
             DPRINTF("Total pages sent= %ld (%.2fx)\n",
-                    total_sent, ((float)total_sent)/max_pfn );
+                    total_sent, ((float)total_sent)/p2m_size );
             DPRINTF("(of which %ld were fixups)\n", needed_to_fix  );
         }
 
@@ -1150,7 +1150,7 @@ int xc_linux_save(int xc_handle, int io_
             if (((sent_this_iter > sent_last_iter) && RATE_IS_MAX()) ||
                 (iter >= max_iters) ||
                 (sent_this_iter+skip_this_iter < 50) ||
-                (total_sent > max_pfn*max_factor)) {
+                (total_sent > p2m_size*max_factor)) {
                 DPRINTF("Start last iteration\n");
                 last_iter = 1;
 
@@ -1168,7 +1168,7 @@ int xc_linux_save(int xc_handle, int io_
 
             if (xc_shadow_control(xc_handle, dom, 
                                   XEN_DOMCTL_SHADOW_OP_CLEAN, to_send, 
-                                  max_pfn, NULL, 0, &stats) != max_pfn) {
+                                  p2m_size, NULL, 0, &stats) != p2m_size) {
                 ERROR("Error flushing shadow PT");
                 goto out;
             }
@@ -1220,7 +1220,7 @@ int xc_linux_save(int xc_handle, int io_
         unsigned int i,j;
         unsigned long pfntab[1024];
 
-        for (i = 0, j = 0; i < max_pfn; i++) {
+        for (i = 0, j = 0; i < p2m_size; i++) {
             if (!is_mapped(live_p2m[i]))
                 j++;
         }
@@ -1230,13 +1230,13 @@ int xc_linux_save(int xc_handle, int io_
             goto out;
         }
 
-        for (i = 0, j = 0; i < max_pfn; ) {
+        for (i = 0, j = 0; i < p2m_size; ) {
 
             if (!is_mapped(live_p2m[i]))
                 pfntab[j++] = i;
 
             i++;
-            if (j == 1024 || i == max_pfn) {
+            if (j == 1024 || i == p2m_size) {
                 if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) {
                     ERROR("Error when writing to state file (6b) (errno %d)",
                           errno);
@@ -1333,7 +1333,7 @@ int xc_linux_save(int xc_handle, int io_
         munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
 
     if (live_p2m)
-        munmap(live_p2m, P2M_SIZE);
+        munmap(live_p2m, ROUNDUP(p2m_size * sizeof(xen_pfn_t), PAGE_SHIFT));
 
     if (live_m2p)
         munmap(live_m2p, M2P_SIZE(max_mfn));
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xc_resume.c
--- a/tools/libxc/xc_resume.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xc_resume.c   Tue Apr 03 13:04:51 2007 -0600
@@ -46,7 +46,7 @@ static int xc_domain_resume_any(int xc_h
     xc_dominfo_t info;
     int i, rc = -1;
 #if defined(__i386__) || defined(__x86_64__)
-    unsigned long mfn, max_pfn = 0;
+    unsigned long mfn, p2m_size = 0;
     vcpu_guest_context_t ctxt;
     start_info_t *start_info;
     shared_info_t *shinfo = NULL;
@@ -74,7 +74,7 @@ static int xc_domain_resume_any(int xc_h
         goto out;
     }
 
-    max_pfn = shinfo->arch.max_pfn;
+    p2m_size = shinfo->arch.max_pfn;
 
     p2m_frame_list_list =
         xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ,
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xenctrl.h     Tue Apr 03 13:04:51 2007 -0600
@@ -4,6 +4,9 @@
  * A library for low-level access to the Xen control interfaces.
  *
  * Copyright (c) 2003-2004, K A Fraser.
+ *
+ * xc_gnttab functions:
+ * Copyright (c) 2007, D G Murray <Derek.Murray@xxxxxxxxxxxx>
  */
 
 #ifndef XENCTRL_H
@@ -740,6 +743,62 @@ evtchn_port_t xc_evtchn_pending(int xce_
  */
 int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
 
+/**************************
+ * GRANT TABLE OPERATIONS *
+ **************************/
+
+/*
+ * Return a handle to the grant table driver, or -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_gnttab_open(void);
+
+/*
+ * Close a handle previously allocated with xc_gnttab_open().
+ */
+int xc_gnttab_close(int xcg_handle);
+
+/*
+ * Memory maps a grant reference from one domain to a local address range.
+ * Mappings should be unmapped with xc_gnttab_munmap.  Returns NULL on failure.
+ *
+ * @parm xcg_handle a handle on an open grant table interface
+ * @parm domid the domain to map memory from
+ * @parm ref the grant reference ID to map
+ * @parm prot same flag as in mmap()
+ */
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+                              uint32_t domid,
+                              uint32_t ref,
+                              int prot);
+
+/**
+ * Memory maps one or more grant references from one or more domains to a
+ * contiguous local address range. Mappings should be unmapped with
+ * xc_gnttab_munmap.  Returns NULL on failure.
+ *
+ * @parm xcg_handle a handle on an open grant table interface
+ * @parm count the number of grant references to be mapped
+ * @parm domids an array of @count domain IDs by which the corresponding @refs
+ *              were granted
+ * @parm refs an array of @count grant references to be mapped
+ * @parm prot same flag as in mmap()
+ */
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+                               uint32_t count,
+                               uint32_t *domids,
+                               uint32_t *refs,
+                               int prot);
+
+/*
+ * Unmaps the @count pages starting at @start_address, which were mapped by a
+ * call to xc_gnttab_map_grant_ref or xc_gnttab_map_grant_refs. Returns zero
+ * on success, otherwise sets errno and returns non-zero.
+ */
+int xc_gnttab_munmap(int xcg_handle,
+                     void *start_address,
+                     uint32_t count);
+
 int xc_hvm_set_pci_intx_level(
     int xc_handle, domid_t dom,
     uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx,
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xenguest.h    Tue Apr 03 13:04:51 2007 -0600
@@ -43,15 +43,16 @@ int xc_hvm_save(int xc_handle, int io_fd
  * @parm xc_handle a handle to an open hypervisor interface
  * @parm fd the file descriptor to restore a domain from
  * @parm dom the id of the domain
- * @parm nr_pfns the number of pages
+ * @parm p2m_size number of pages the guest has (i.e. number entries in P2M)
+ * @parm max_nr_pfns domains maximum real memory allocation, in pages
  * @parm store_evtchn the store event channel for this domain to use
  * @parm store_mfn returned with the mfn of the store page
  * @return 0 on success, -1 on failure
  */
 int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
-                     unsigned long nr_pfns, unsigned int store_evtchn,
-                     unsigned long *store_mfn, unsigned int console_evtchn,
-                     unsigned long *console_mfn);
+                     unsigned long p2m_size, unsigned long max_nr_pfns,
+                     unsigned int store_evtchn, unsigned long *store_mfn,
+                     unsigned int console_evtchn, unsigned long *console_mfn);
 
 /**
  * This function will restore a saved hvm domain running unmodified guest.
diff -r fc9e2f7920c9 -r f378c424e0ce tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/libxc/xg_private.h  Tue Apr 03 13:04:51 2007 -0600
@@ -148,17 +148,16 @@ typedef l4_pgentry_64_t l4_pgentry_t;
 
 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
 
-/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
-#define P2M_SIZE        ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT)
-
 /* Number of xen_pfn_t in a page */
 #define fpp             (PAGE_SIZE/sizeof(xen_pfn_t))
 
+/* XXX SMH: following 3 skanky macros rely on variable p2m_size being set */
+
 /* Number of entries in the pfn_to_mfn_frame_list_list */
-#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp))
+#define P2M_FLL_ENTRIES (((p2m_size)+(fpp*fpp)-1)/(fpp*fpp))
 
 /* Number of entries in the pfn_to_mfn_frame_list */
-#define P2M_FL_ENTRIES  (((max_pfn)+fpp-1)/fpp)
+#define P2M_FL_ENTRIES  (((p2m_size)+fpp-1)/fpp)
 
 /* Size in bytes of the pfn_to_mfn_frame_list     */
 #define P2M_FL_SIZE     ((P2M_FL_ENTRIES)*sizeof(unsigned long))
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/lowlevel/scf/scf.c
--- a/tools/python/xen/lowlevel/scf/scf.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/lowlevel/scf/scf.c       Tue Apr 03 13:04:51 2007 -0600
@@ -26,7 +26,7 @@
 #include <libscf.h>
 #include <stdio.h>
 
-#define        XEND_FMRI "svc:/system/xen/xend:default"
+#define        XEND_FMRI "svc:/system/xctl/xend:default"
 #define        XEND_PG "config"
 
 static PyObject *scf_exc;
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py   Tue Apr 03 13:04:51 2007 -0600
@@ -187,6 +187,7 @@ def restore(xd, fd, dominfo = None, paus
     assert console_port
 
     nr_pfns = (dominfo.getMemoryTarget() + 3) / 4 
+    max_nr_pfns = (dominfo.getMemoryMaximum() + 3) / 4 
 
     # if hvm, pass mem size to calculate the store_mfn
     image_cfg = dominfo.info.get('image', {})
@@ -203,17 +204,17 @@ def restore(xd, fd, dominfo = None, paus
     try:
         l = read_exact(fd, sizeof_unsigned_long,
                        "not a valid guest state file: pfn count read")
-        max_pfn = unpack("L", l)[0]    # native sizeof long
-
-        if max_pfn > 16*1024*1024:     # XXX 
+        p2m_size = unpack("L", l)[0]    # native sizeof long
+
+        if p2m_size > 16*1024*1024:     # XXX 
             raise XendError(
                 "not a valid guest state file: pfn count out of range")
 
         shadow = dominfo.info['shadow_memory']
         log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, "
-                  "nr_pfns=0x%x.", dominfo.info['shadow_memory'],
+                  "p2m_size=0x%x.", dominfo.info['shadow_memory'],
                   dominfo.info['memory_static_max'],
-                  dominfo.info['memory_static_min'], nr_pfns)
+                  dominfo.info['memory_static_min'], p2m_size)
 
         balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
 
@@ -221,7 +222,7 @@ def restore(xd, fd, dominfo = None, paus
         dominfo.info['shadow_memory'] = shadow_cur
 
         cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
-                        fd, dominfo.getDomid(), max_pfn,
+                        fd, dominfo.getDomid(), p2m_size, max_nr_pfns, 
                         store_port, console_port, int(is_hvm), pae, apic])
         log.debug("[xc_restore]: %s", string.join(cmd))
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py       Tue Apr 03 13:04:51 2007 -0600
@@ -118,7 +118,7 @@ LEGACY_CFG_TO_XENAPI_CFG = reverse_dict(
 # Platform configuration keys.
 XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 'boot', 'device_model', 'display', 
                         'fda', 'fdb', 'keymap', 'isa', 'localtime',
-                        'nographic', 'pae', 'serial', 'sdl',
+                        'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
                         'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
                         'vncconsole', 'vncdisplay', 'vnclisten',
                         'vncpasswd', 'vncunused', 'xauthority']
@@ -203,6 +203,7 @@ LEGACY_CFG_TYPES = {
     'on_xend_stop':  str,
     'on_xend_start': str,
     'online_vcpus':  int,
+    'rtc/timeoffset': str,
 }
 
 # Values that should be stored in xenstore's /vm/<uuid> that is used
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Tue Apr 03 13:04:51 2007 -0600
@@ -859,7 +859,8 @@ class XendDomainInfo:
         # Check whether values in the configuration have
         # changed in Xenstore.
         
-        cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash']
+        cfg_vm = ['name', 'on_poweroff', 'on_reboot', 'on_crash',
+                  'rtc/timeoffset']
         
         vm_details = self._readVMDetails([(k,XendConfig.LEGACY_CFG_TYPES[k])
                                            for k in cfg_vm])
@@ -888,6 +889,11 @@ class XendDomainInfo:
             self.info.update_with_image_sxp(sxp.from_string(image_sxp))
             changed = True
 
+        # Check if the rtc offset has changes
+        if vm_details.get("rtc/timeoffset", 0) != 
self.info["platform"].get("rtc_timeoffset", 0):
+            self.info["platform"]["rtc_timeoffset"] = 
vm_details.get("rtc/timeoffset", 0)
+            changed = True
+ 
         if changed:
             # Update the domain section of the store, as this contains some
             # parameters derived from the VM configuration.
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/balloon.py  Tue Apr 03 13:04:51 2007 -0600
@@ -25,9 +25,7 @@ import XendOptions
 import XendOptions
 from XendLogging import log
 from XendError import VmError
-
-
-PROC_XEN_BALLOON = '/proc/xen/balloon'
+import osdep
 
 RETRY_LIMIT = 20
 RETRY_LIMIT_INCR = 5
@@ -51,19 +49,7 @@ def _get_proc_balloon(label):
     """Returns the value for the named label.  Returns None if the label was
        not found or the value was non-numeric."""
 
-    f = file(PROC_XEN_BALLOON, 'r')
-    try:
-        for line in f:
-            keyvalue = line.split(':')
-            if keyvalue[0] == label:
-                values = keyvalue[1].split()
-                if values[0].isdigit():
-                    return int(values[0])
-                else:
-                    return None
-        return None
-    finally:
-        f.close()
+    return osdep.lookup_balloon_stat(label)
 
 def get_dom0_current_alloc():
     """Returns the current memory allocation (in KiB) of dom0."""
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/image.py    Tue Apr 03 13:04:51 2007 -0600
@@ -256,9 +256,12 @@ class HVMImageHandler(ImageHandler):
         self.xauthority = vmConfig['platform'].get('xauthority')
         self.vncconsole = vmConfig['platform'].get('vncconsole')
 
+        rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset')
+
         self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
                         ("image/device-model", self.device_model),
                         ("image/display", self.display))
+        self.vm.storeVm(("rtc/timeoffset", rtc_timeoffset))
 
         self.pid = None
 
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/osdep.py
--- a/tools/python/xen/xend/osdep.py    Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/osdep.py    Tue Apr 03 13:04:51 2007 -0600
@@ -41,6 +41,55 @@ _vif_script = {
     "SunOS": "vif-vnic"
 }
 
+def _linux_balloon_stat(label):
+    """Returns the value for the named label, or None if an error occurs."""
+
+    PROC_XEN_BALLOON = '/proc/xen/balloon'
+    f = file(PROC_XEN_BALLOON, 'r')
+    try:
+        for line in f:
+            keyvalue = line.split(':')
+            if keyvalue[0] == label:
+                values = keyvalue[1].split()
+                if values[0].isdigit():
+                    return int(values[0])
+                else:
+                    return None
+        return None
+    finally:
+        f.close()
+
+def _solaris_balloon_stat(label):
+    """Returns the value for the named label, or None if an error occurs."""
+
+    import fcntl
+    import array
+    DEV_XEN_BALLOON = '/dev/xen/balloon'
+    BLN_IOCTL_CURRENT = 0x4201
+    BLN_IOCTL_TARGET = 0x4202
+    BLN_IOCTL_LOW = 0x4203
+    BLN_IOCTL_HIGH = 0x4204
+    BLN_IOCTL_LIMIT = 0x4205
+    label_to_ioctl = { 'Current allocation'    : BLN_IOCTL_CURRENT,
+                       'Requested target'      : BLN_IOCTL_TARGET,
+                       'Low-mem balloon'       : BLN_IOCTL_LOW,
+                       'High-mem balloon'      : BLN_IOCTL_HIGH,
+                       'Xen hard limit'        : BLN_IOCTL_LIMIT }
+
+    f = file(DEV_XEN_BALLOON, 'r')
+    try:
+        values = array.array('L', [0])
+        if fcntl.ioctl(f.fileno(), label_to_ioctl[label], values, 1) == 0:
+            return values[0]
+        else:
+            return None
+    finally:
+        f.close()
+
+_balloon_stat = {
+    "SunOS": _solaris_balloon_stat
+}
+
 def _get(var, default=None):
     return var.get(os.uname()[0], default)
 
@@ -49,3 +98,4 @@ pygrub_path = _get(_pygrub_path, "/usr/b
 pygrub_path = _get(_pygrub_path, "/usr/bin/pygrub")
 netback_type = _get(_netback_type, "netfront")
 vif_script = _get(_vif_script, "vif-bridge")
+lookup_balloon_stat = _get(_balloon_stat, _linux_balloon_stat)
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/server/SrvServer.py Tue Apr 03 13:04:51 2007 -0600
@@ -212,8 +212,8 @@ def _loadConfig(servers, root, reload):
                     if server_cfg[1] in [XendAPI.AUTH_PAM, XendAPI.AUTH_NONE]:
                         auth_method = server_cfg[1]
 
-                if len(server_cfg) > 2:
-                    hosts_allowed = server_cfg[2] or None
+                if len(server_cfg) > 2 and len(server_cfg[2]):
+                    hosts_allowed = map(re.compile, server_cfg[2].split(' '))
 
                 if len(server_cfg) > 4:
                     # SSL key and cert file
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xend/server/relocate.py
--- a/tools/python/xen/xend/server/relocate.py  Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xend/server/relocate.py  Tue Apr 03 13:04:51 2007 -0600
@@ -106,8 +106,12 @@ class RelocationProtocol(protocol.Protoc
     def op_receive(self, name, _):
         if self.transport:
             self.send_reply(["ready", name])
-            XendDomain.instance().domain_restore_fd(
-                self.transport.sock.fileno())
+            try:
+                XendDomain.instance().domain_restore_fd(
+                    self.transport.sock.fileno())
+            except:
+                self.send_error()
+                self.close()
         else:
             log.error(name + ": no transport")
             raise XendError(name + ": no transport")
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/create.py     Tue Apr 03 13:04:51 2007 -0600
@@ -185,6 +185,10 @@ gopts.var('cpus', val='CPUS',
 gopts.var('cpus', val='CPUS',
           fn=set_value, default=None,
           use="CPUS to run the domain on.")
+
+gopts.var('rtc_timeoffset', val='RTC_TIMEOFFSET',
+          fn=set_value, default="0",
+          use="Set RTC offset.")
 
 gopts.var('pae', val='PAE',
           fn=set_int, default=1,
@@ -717,7 +721,7 @@ def configure_hvm(config_image, vals):
     args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb',
              'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
              'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
-             'sdl', 'display', 'xauthority',
+             'sdl', 'display', 'xauthority', 'rtc_timeoffset',
              'acpi', 'apic', 'usb', 'usbdevice', 'keymap' ]
     for a in args:
         if a in vals.__dict__ and vals.__dict__[a] is not None:
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/main.py       Tue Apr 03 13:04:51 2007 -0600
@@ -929,10 +929,10 @@ def xm_label_list(doms):
             if security.active_policy not in ['INACTIVE', 'NULL', 'DEFAULT']:
                 if not d['seclabel']:
                     d['seclabel'] = 'ERROR'
-                elif security.active_policy in ['DEFAULT']:
-                    d['seclabel'] = 'DEFAULT'
-                else:
-                    d['seclabel'] = 'INACTIVE'
+            elif security.active_policy in ['DEFAULT']:
+                d['seclabel'] = 'DEFAULT'
+            else:
+                d['seclabel'] = 'INACTIVE'
 
             output.append((format % d, d['seclabel']))
         
diff -r fc9e2f7920c9 -r f378c424e0ce tools/python/xen/xm/xenapi_create.py
--- a/tools/python/xen/xm/xenapi_create.py      Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/python/xen/xm/xenapi_create.py      Tue Apr 03 13:04:51 2007 -0600
@@ -20,7 +20,6 @@
 
 from xen.xm.main import server, get_default_SR
 from xml.dom.minidom import parse, getDOMImplementation
-from xml.dom.ext import PrettyPrint
 from xml.parsers.xmlproc import xmlproc, xmlval, xmldtd
 from xen.xend import sxp
 from xen.xend.XendAPIConstants import XEN_API_ON_NORMAL_EXIT, \
diff -r fc9e2f7920c9 -r f378c424e0ce tools/xcutils/xc_restore.c
--- a/tools/xcutils/xc_restore.c        Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/xcutils/xc_restore.c        Tue Apr 03 13:04:51 2007 -0600
@@ -18,15 +18,14 @@ int
 int
 main(int argc, char **argv)
 {
-    unsigned int xc_fd, io_fd, domid, max_pfn, store_evtchn, console_evtchn;
+    unsigned int xc_fd, io_fd, domid, store_evtchn, console_evtchn;
     unsigned int hvm, pae, apic;
     int ret;
-    unsigned long store_mfn, console_mfn;
+    unsigned long p2m_size, max_nr_pfns, store_mfn, console_mfn;
 
-    if (argc != 9)
-       errx(1,
-            "usage: %s iofd domid max_pfn store_evtchn console_evtchn hvm pae 
apic",
-            argv[0]);
+    if (argc != 10)
+        errx(1, "usage: %s iofd domid p2m_size max_nr_pfns store_evtchn "
+             "console_evtchn hvm pae apic", argv[0]);
 
     xc_fd = xc_interface_open();
     if (xc_fd < 0)
@@ -34,19 +33,21 @@ main(int argc, char **argv)
 
     io_fd = atoi(argv[1]);
     domid = atoi(argv[2]);
-    max_pfn = atoi(argv[3]);
-    store_evtchn = atoi(argv[4]);
-    console_evtchn = atoi(argv[5]);
-    hvm  = atoi(argv[6]);
-    pae  = atoi(argv[7]);
-    apic = atoi(argv[8]);
+    p2m_size = atoi(argv[3]);
+    max_nr_pfns = atoi(argv[4]);
+    store_evtchn = atoi(argv[5]);
+    console_evtchn = atoi(argv[6]);
+    hvm  = atoi(argv[7]);
+    pae  = atoi(argv[8]);
+    apic = atoi(argv[9]);
 
     if (hvm) {
-        ret = xc_hvm_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
+        ret = xc_hvm_restore(xc_fd, io_fd, domid, max_nr_pfns, store_evtchn,
                 &store_mfn, pae, apic);
-    } else 
-        ret = xc_linux_restore(xc_fd, io_fd, domid, max_pfn, store_evtchn,
-                &store_mfn, console_evtchn, &console_mfn);
+    } else
+        ret = xc_linux_restore(xc_fd, io_fd, domid, p2m_size,
+                               max_nr_pfns, store_evtchn, &store_mfn,
+                               console_evtchn, &console_mfn);
 
     if (ret == 0) {
        printf("store-mfn %li\n", store_mfn);
diff -r fc9e2f7920c9 -r f378c424e0ce tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c     Fri Mar 30 17:18:42 2007 -0600
+++ b/tools/xenstat/xentop/xentop.c     Tue Apr 03 13:04:51 2007 -0600
@@ -984,6 +984,8 @@ static void top(void)
 
        if(!batch)
        do_bottom_line();
+
+       free(domains);
 }
 
 int main(int argc, char **argv)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Tue Apr 03 13:04:51 2007 -0600
@@ -59,9 +59,6 @@ struct hvm_function_table hvm_funcs __re
 /* I/O permission bitmap is globally shared by all HVM guests. */
 char __attribute__ ((__section__ (".bss.page_aligned")))
     hvm_io_bitmap[3*PAGE_SIZE];
-/* MSR permission bitmap is globally shared by all HVM guests. */
-char __attribute__ ((__section__ (".bss.page_aligned")))
-    hvm_msr_bitmap[PAGE_SIZE];
 
 void hvm_enable(struct hvm_function_table *fns)
 {
@@ -74,9 +71,6 @@ void hvm_enable(struct hvm_function_tabl
      */
     memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
     clear_bit(0x80, hvm_io_bitmap);
-
-    /* All MSR accesses are intercepted by default. */
-    memset(hvm_msr_bitmap, ~0, sizeof(hvm_msr_bitmap));
 
     hvm_funcs   = *fns;
     hvm_enabled = 1;
@@ -378,6 +372,9 @@ void hvm_send_assist_req(struct vcpu *v)
 void hvm_send_assist_req(struct vcpu *v)
 {
     ioreq_t *p;
+
+    if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
+        return; /* implicitly bins the i/o operation */
 
     p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq;
     if ( unlikely(p->state != STATE_IOREQ_NONE) )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/intercept.c      Tue Apr 03 13:04:51 2007 -0600
@@ -155,28 +155,13 @@ static inline void hvm_mmio_access(struc
     }
 }
 
-int hvm_buffered_io_intercept(ioreq_t *p)
+int hvm_buffered_io_send(ioreq_t *p)
 {
     struct vcpu *v = current;
     spinlock_t  *buffered_io_lock;
     buffered_iopage_t *buffered_iopage =
         (buffered_iopage_t *)(v->domain->arch.hvm_domain.buffered_io_va);
     unsigned long tmp_write_pointer = 0;
-    int i;
-
-    /* ignore READ ioreq_t! */
-    if ( p->dir == IOREQ_READ )
-        return 0;
-
-    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
-        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
-             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
-                                     hvm_buffered_io_ranges[i]->length )
-            break;
-    }
-
-    if ( i == HVM_BUFFERED_IO_RANGE_NR )
-        return 0;
 
     buffered_io_lock = &v->domain->arch.hvm_domain.buffered_io_lock;
     spin_lock(buffered_io_lock);
@@ -205,6 +190,27 @@ int hvm_buffered_io_intercept(ioreq_t *p
     return 1;
 }
 
+int hvm_buffered_io_intercept(ioreq_t *p)
+{
+    int i;
+
+    /* ignore READ ioreq_t! */
+    if ( p->dir == IOREQ_READ )
+        return 0;
+
+    for ( i = 0; i < HVM_BUFFERED_IO_RANGE_NR; i++ ) {
+        if ( p->addr >= hvm_buffered_io_ranges[i]->start_addr &&
+             p->addr + p->size - 1 < hvm_buffered_io_ranges[i]->start_addr +
+                                     hvm_buffered_io_ranges[i]->length )
+            break;
+    }
+
+    if ( i == HVM_BUFFERED_IO_RANGE_NR )
+        return 0;
+
+    return hvm_buffered_io_send(p);
+}
+
 int hvm_mmio_intercept(ioreq_t *p)
 {
     struct vcpu *v = current;
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/io.c     Tue Apr 03 13:04:51 2007 -0600
@@ -771,10 +771,11 @@ void hvm_io_assist(struct vcpu *v)
     struct cpu_user_regs *regs;
     struct hvm_io_op *io_opp;
     unsigned long gmfn;
+    struct domain *d = v->domain;
 
     io_opp = &v->arch.hvm_vcpu.io_op;
     regs   = &io_opp->io_context;
-    vio    = get_vio(v->domain, v->vcpu_id);
+    vio    = get_vio(d, v->vcpu_id);
 
     p = &vio->vp_ioreq;
     if ( p->state != STATE_IORESP_READY )
@@ -797,11 +798,13 @@ void hvm_io_assist(struct vcpu *v)
     memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
 
     /* Has memory been dirtied? */
-    if ( p->dir == IOREQ_READ && p->data_is_ptr )
+    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
     {
         gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
-        mark_dirty(v->domain, gmfn);
-    }
+        mark_dirty(d, gmfn);
+    }
+
+    vcpu_end_shutdown_deferral(v);
 }
 
 /*
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/platform.c       Tue Apr 03 13:04:51 2007 -0600
@@ -921,6 +921,26 @@ static void send_mmio_req(unsigned char 
     hvm_send_assist_req(v);
 }
 
+void send_timeoffset_req(unsigned long timeoff)
+{
+    ioreq_t p[1];
+
+    if ( timeoff == 0 )
+        return;
+
+    memset(p, 0, sizeof(*p));
+
+    p->type = IOREQ_TYPE_TIMEOFFSET;
+    p->size = 4;
+    p->dir = IOREQ_WRITE;
+    p->data = timeoff;
+
+    p->state = STATE_IOREQ_READY;
+
+    if ( !hvm_buffered_io_send(p) )
+        printk("Unsuccessful timeoffset update\n");
+}
+
 static void mmio_operands(int type, unsigned long gpa,
                           struct hvm_io_op *mmio_op,
                           unsigned char op_size)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/rtc.c
--- a/xen/arch/x86/hvm/rtc.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/rtc.c    Tue Apr 03 13:04:51 2007 -0600
@@ -157,6 +157,10 @@ static void rtc_set_time(RTCState *s)
 static void rtc_set_time(RTCState *s)
 {
     struct tm *tm = &s->current_tm;
+    unsigned long before, after; /* XXX s_time_t */
+      
+    before = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
+                   tm->tm_hour, tm->tm_min, tm->tm_sec);
     
     tm->tm_sec = from_bcd(s, s->hw.cmos_data[RTC_SECONDS]);
     tm->tm_min = from_bcd(s, s->hw.cmos_data[RTC_MINUTES]);
@@ -168,6 +172,10 @@ static void rtc_set_time(RTCState *s)
     tm->tm_mday = from_bcd(s, s->hw.cmos_data[RTC_DAY_OF_MONTH]);
     tm->tm_mon = from_bcd(s, s->hw.cmos_data[RTC_MONTH]) - 1;
     tm->tm_year = from_bcd(s, s->hw.cmos_data[RTC_YEAR]) + 100;
+
+    after = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
+                   tm->tm_hour, tm->tm_min, tm->tm_sec);
+    send_timeoffset_req(after - before);
 }
 
 static void rtc_copy_date(RTCState *s)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Tue Apr 03 13:04:51 2007 -0600
@@ -79,6 +79,30 @@ struct host_save_area *alloc_host_save_a
     return hsa;
 }
 
+static void disable_intercept_for_msr(char *msr_bitmap, u32 msr)
+{
+    /*
+     * See AMD64 Programmers Manual, Vol 2, Section 15.10 (MSR-Bitmap Address).
+     */
+    if ( msr <= 0x1fff )
+    {
+        __clear_bit(msr*2, msr_bitmap + 0x000); 
+        __clear_bit(msr*2+1, msr_bitmap + 0x000); 
+    }
+    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
+    {
+        msr &= 0x1fff;
+        __clear_bit(msr*2, msr_bitmap + 0x800);
+        __clear_bit(msr*2+1, msr_bitmap + 0x800);
+    } 
+    else if ( (msr >= 0xc001000) && (msr <= 0xc0011fff) )
+    {
+        msr &= 0x1fff;
+        __clear_bit(msr*2, msr_bitmap + 0x1000);
+        __clear_bit(msr*2+1, msr_bitmap + 0x1000);
+    }
+}
+
 static int construct_vmcb(struct vcpu *v)
 {
     struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
@@ -114,6 +138,10 @@ static int construct_vmcb(struct vcpu *v
     if ( arch_svm->msrpm == NULL )
         return -ENOMEM;
     memset(arch_svm->msrpm, 0xff, MSRPM_SIZE);
+
+    disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_FS_BASE);
+    disable_intercept_for_msr((char *)arch_svm->msrpm, MSR_GS_BASE);
+
     vmcb->msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm);
     vmcb->iopm_base_pa  = (u64)virt_to_maddr(hvm_io_bitmap);
 
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Tue Apr 03 13:04:51 2007 -0600
@@ -289,7 +289,7 @@ static void construct_vmcs(struct vcpu *
     v->arch.hvm_vcpu.u.vmx.exec_control = vmx_cpu_based_exec_control;
 
     if ( cpu_has_vmx_msr_bitmap )
-        __vmwrite(MSR_BITMAP, virt_to_maddr(hvm_msr_bitmap));
+        __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
 
     /* I/O access bitmap. */
     __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue Apr 03 13:04:51 2007 -0600
@@ -51,6 +51,8 @@
 #include <public/hvm/save.h>
 #include <asm/hvm/trace.h>
 
+char *vmx_msr_bitmap;
+
 static void vmx_ctxt_switch_from(struct vcpu *v);
 static void vmx_ctxt_switch_to(struct vcpu *v);
 
@@ -1005,14 +1007,14 @@ static void disable_intercept_for_msr(u3
      */
     if ( msr <= 0x1fff )
     {
-        __clear_bit(msr, hvm_msr_bitmap + 0x000); /* read-low */
-        __clear_bit(msr, hvm_msr_bitmap + 0x800); /* write-low */
+        __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
+        __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
     }
     else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
     {
         msr &= 0x1fff;
-        __clear_bit(msr, hvm_msr_bitmap + 0x400); /* read-high */
-        __clear_bit(msr, hvm_msr_bitmap + 0xc00); /* write-high */
+        __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
+        __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
     }
 }
 
@@ -1105,6 +1107,9 @@ int start_vmx(void)
     if ( cpu_has_vmx_msr_bitmap )
     {
         printk("VMX: MSR intercept bitmap enabled\n");
+        vmx_msr_bitmap = alloc_xenheap_page();
+        BUG_ON(vmx_msr_bitmap == NULL);
+        memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
         disable_intercept_for_msr(MSR_FS_BASE);
         disable_intercept_for_msr(MSR_GS_BASE);
     }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm.c Tue Apr 03 13:04:51 2007 -0600
@@ -806,7 +806,8 @@ void put_page_from_l1e(l1_pgentry_t l1e,
      * (Note that the undestroyable active grants are not a security hole in
      * Xen. All active grants can safely be cleaned up when the domain dies.)
      */
-    if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && !d->is_shutdown && 
!d->is_dying )
+    if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
+         !d->is_shutting_down && !d->is_dying )
     {
         MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
                 l1e_get_intpte(l1e));
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm/hap/hap.c Tue Apr 03 13:04:51 2007 -0600
@@ -52,7 +52,7 @@
 /************************************************/
 /*             HAP SUPPORT FUNCTIONS            */
 /************************************************/
-mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
+mfn_t hap_alloc(struct domain *d)
 {
     struct page_info *sp = NULL;
     void *p;
@@ -82,43 +82,43 @@ void hap_free(struct domain *d, mfn_t sm
     list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
 }
 
-static int hap_alloc_p2m_pages(struct domain *d)
-{
-    struct page_info *pg;
-
-    ASSERT(hap_locked_by_me(d));
-
-    pg = mfn_to_page(hap_alloc(d, 0));
-    d->arch.paging.hap.p2m_pages += 1;
-    d->arch.paging.hap.total_pages -= 1;
-    
-    page_set_owner(pg, d);
-    pg->count_info = 1;
-    list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
-
-    return 1;
-}
-
 struct page_info * hap_alloc_p2m_page(struct domain *d)
 {
-    struct list_head *entry;
     struct page_info *pg;
     mfn_t mfn;
     void *p;
 
     hap_lock(d);
-    
-    if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 
-         !hap_alloc_p2m_pages(d) ) {
-        hap_unlock(d);
-        return NULL;
-    }
-    entry = d->arch.paging.hap.p2m_freelist.next;
-    list_del(entry);
-    
+
+#if CONFIG_PAGING_LEVELS == 3
+    /* Under PAE mode, top-level P2M table should be allocated below 4GB space
+     * because the size of h_cr3 is only 32-bit. We use alloc_domheap_pages to 
+     * force this requirement. This page will be de-allocated in 
+     * hap_free_p2m_page(), like other P2M pages.
+    */
+    if ( d->arch.paging.hap.p2m_pages == 0 ) 
+    {
+       pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32));
+       d->arch.paging.hap.p2m_pages += 1;
+    }
+    else
+#endif
+    {
+       pg = mfn_to_page(hap_alloc(d));
+       
+       d->arch.paging.hap.p2m_pages += 1;
+       d->arch.paging.hap.total_pages -= 1;
+    }  
+
+    if ( pg == NULL ) {
+       hap_unlock(d);
+       return NULL;
+    }   
+
     hap_unlock(d);
 
-    pg = list_entry(entry, struct page_info, list);
+    page_set_owner(pg, d);
+    pg->count_info = 1;
     mfn = page_to_mfn(pg);
     p = hap_map_domain_page(mfn);
     clear_page(p);
@@ -141,6 +141,7 @@ void hap_free_p2m_page(struct domain *d,
     page_set_owner(pg, NULL); 
     free_domheap_pages(pg, 0);
     d->arch.paging.hap.p2m_pages--;
+    ASSERT( d->arch.paging.hap.p2m_pages >= 0 );
 }
 
 /* Return the size of the pool, rounded up to the nearest MB */
@@ -320,7 +321,7 @@ mfn_t hap_make_monitor_table(struct vcpu
 #if CONFIG_PAGING_LEVELS == 4
     {
         mfn_t m4mfn;
-        m4mfn = hap_alloc(d, 0);
+        m4mfn = hap_alloc(d);
         hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
         return m4mfn;
     }
@@ -331,12 +332,12 @@ mfn_t hap_make_monitor_table(struct vcpu
         l2_pgentry_t *l2e;
         int i;
 
-        m3mfn = hap_alloc(d, 0);
+        m3mfn = hap_alloc(d);
 
         /* Install a monitor l2 table in slot 3 of the l3 table.
          * This is used for all Xen entries, including linear maps
          */
-        m2mfn = hap_alloc(d, 0);
+        m2mfn = hap_alloc(d);
         l3e = hap_map_domain_page(m3mfn);
         l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
         hap_install_xen_entries_in_l2h(v, m2mfn);
@@ -357,7 +358,7 @@ mfn_t hap_make_monitor_table(struct vcpu
     {
         mfn_t m2mfn;
         
-        m2mfn = hap_alloc(d, 0);
+        m2mfn = hap_alloc(d);
         hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
     
         return m2mfn;
@@ -390,7 +391,6 @@ void hap_domain_init(struct domain *d)
 {
     hap_lock_init(d);
     INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
-    INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
 }
 
 /* return 0 for success, -errno for failure */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/mm/shadow/multi.c    Tue Apr 03 13:04:51 2007 -0600
@@ -2823,8 +2823,8 @@ static int sh_page_fault(struct vcpu *v,
          * are OK, this can only have been caused by a failed
          * shadow_set_l*e(), which will have crashed the guest.
          * Get out of the fault handler immediately. */
-        ASSERT(d->is_shutdown);
-        unmap_walk(v, &gw); 
+        ASSERT(d->is_shutting_down);
+        unmap_walk(v, &gw);
         shadow_unlock(d);
         return 0;
     }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/setup.c      Tue Apr 03 13:04:51 2007 -0600
@@ -591,8 +591,6 @@ void __init __start_xen(multiboot_info_t
 
     numa_initmem_init(0, max_page);
 
-    end_boot_allocator();
-
     /* Initialise the Xen heap, skipping RAM holes. */
     nr_pages = 0;
     for ( i = 0; i < e820.nr_map; i++ )
@@ -617,6 +615,8 @@ void __init __start_xen(multiboot_info_t
     printk("Xen heap: %luMB (%lukB)\n", 
            nr_pages >> (20 - PAGE_SHIFT),
            nr_pages << (PAGE_SHIFT - 10));
+
+    end_boot_allocator();
 
     early_boot = 0;
 
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/time.c       Tue Apr 03 13:04:51 2007 -0600
@@ -573,7 +573,7 @@ static void init_platform_timer(void)
  * machines were long is 32-bit! (However, as time_t is signed, we
  * will already get problems at other places on 2038-01-19 03:14:08)
  */
-static inline unsigned long
+unsigned long
 mktime (unsigned int year, unsigned int mon,
         unsigned int day, unsigned int hour,
         unsigned int min, unsigned int sec)
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/traps.c      Tue Apr 03 13:04:51 2007 -0600
@@ -285,23 +285,32 @@ void show_xen_trace()
     show_trace(&regs);
 }
 
-void show_stack_overflow(unsigned long esp)
+void show_stack_overflow(unsigned int cpu, unsigned long esp)
 {
 #ifdef MEMORY_GUARD
-    unsigned long esp_top;
+    unsigned long esp_top, esp_bottom;
     unsigned long *stack, addr;
 
-    esp_top = (esp | (STACK_SIZE - 1)) - DEBUG_STACK_SIZE;
+    esp_bottom = (esp | (STACK_SIZE - 1)) + 1;
+    esp_top    = esp_bottom - DEBUG_STACK_SIZE;
+
+    printk("Valid stack range: %p-%p, sp=%p, tss.esp0=%p\n",
+           (void *)esp_top, (void *)esp_bottom, (void *)esp,
+           (void *)init_tss[cpu].esp0);
 
     /* Trigger overflow trace if %esp is within 512 bytes of the guard page. */
     if ( ((unsigned long)(esp - esp_top) > 512) &&
          ((unsigned long)(esp_top - esp) > 512) )
+    {
+        printk("No stack overflow detected. Skipping stack trace.\n");
         return;
+    }
 
     if ( esp < esp_top )
         esp = esp_top;
 
-    printk("Xen stack overflow:\n   ");
+    printk("Xen stack overflow (dumping trace %p-%p):\n   ",
+           (void *)esp, (void *)esp_bottom);
 
     stack = (unsigned long *)esp;
     while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Tue Apr 03 13:04:51 2007 -0600
@@ -139,7 +139,7 @@ void show_page_walk(unsigned long addr)
     unmap_domain_page(l1t);
 }
 
-#define DOUBLEFAULT_STACK_SIZE 1024
+#define DOUBLEFAULT_STACK_SIZE 2048
 static struct tss_struct doublefault_tss;
 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
 
@@ -167,7 +167,7 @@ asmlinkage void do_double_fault(void)
            tss->esi, tss->edi, tss->ebp, tss->esp);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   ss: %04x\n",
            tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
-    show_stack_overflow(tss->esp);
+    show_stack_overflow(cpu, tss->esp);
 
     panic("DOUBLE FAULT -- system shutdown\n");
 }
@@ -268,8 +268,7 @@ void __init percpu_traps_init(void)
     tss->ds     = __HYPERVISOR_DS;
     tss->es     = __HYPERVISOR_DS;
     tss->ss     = __HYPERVISOR_DS;
-    tss->esp    = (unsigned long)
-        &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+    tss->esp    = (unsigned long)&doublefault_stack[DOUBLEFAULT_STACK_SIZE];
     tss->__cr3  = __pa(idle_pg_table);
     tss->cs     = __HYPERVISOR_CS;
     tss->eip    = (unsigned long)do_double_fault;
diff -r fc9e2f7920c9 -r f378c424e0ce xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Tue Apr 03 13:04:51 2007 -0600
@@ -171,7 +171,7 @@ asmlinkage void do_double_fault(struct c
     printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
            regs->r12, regs->r13, regs->r14);
     printk("r15: %016lx\n", regs->r15);
-    show_stack_overflow(regs->rsp);
+    show_stack_overflow(cpu, regs->rsp);
 
     panic("DOUBLE FAULT -- system shutdown\n");
 }
@@ -270,18 +270,18 @@ void __init percpu_traps_init(void)
     stack_bottom = (char *)get_stack_bottom();
     stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
 
-    /* Double-fault handler has its own per-CPU 1kB stack. */
-    init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
+    /* Double-fault handler has its own per-CPU 2kB stack. */
+    init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
 
     /* NMI handler has its own per-CPU 1kB stack. */
-    init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
+    init_tss[cpu].ist[1] = (unsigned long)&stack[3072];
 
     /*
      * Trampoline for SYSCALL entry from long mode.
      */
 
     /* Skip the NMI and DF stacks. */
-    stack = &stack[2048];
+    stack = &stack[3072];
     wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
 
     /* movq %rsp, saversp(%rip) */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/domain.c
--- a/xen/common/domain.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/domain.c       Tue Apr 03 13:04:51 2007 -0600
@@ -59,6 +59,7 @@ struct domain *alloc_domain(domid_t domi
     atomic_set(&d->refcnt, 1);
     spin_lock_init(&d->big_lock);
     spin_lock_init(&d->page_alloc_lock);
+    spin_lock_init(&d->shutdown_lock);
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
 
@@ -83,6 +84,45 @@ void free_domain(struct domain *d)
     xfree(d);
 }
 
+static void __domain_finalise_shutdown(struct domain *d)
+{
+    struct vcpu *v;
+
+    BUG_ON(!spin_is_locked(&d->shutdown_lock));
+
+    if ( d->is_shut_down )
+        return;
+
+    for_each_vcpu ( d, v )
+        if ( !v->paused_for_shutdown )
+            return;
+
+    d->is_shut_down = 1;
+
+    for_each_vcpu ( d, v )
+        vcpu_sleep_nosync(v);
+
+    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+}
+
+static void vcpu_check_shutdown(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    spin_lock(&d->shutdown_lock);
+
+    if ( d->is_shutting_down )
+    {
+        if ( !v->paused_for_shutdown )
+            atomic_inc(&v->pause_count);
+        v->paused_for_shutdown = 1;
+        v->defer_shutdown = 0;
+        __domain_finalise_shutdown(d);
+    }
+
+    spin_unlock(&d->shutdown_lock);
+}
+
 struct vcpu *alloc_vcpu(
     struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
 {
@@ -121,6 +161,9 @@ struct vcpu *alloc_vcpu(
     d->vcpu[vcpu_id] = v;
     if ( vcpu_id != 0 )
         d->vcpu[v->vcpu_id-1]->next_in_list = v;
+
+    /* Must be called after making new vcpu visible to for_each_vcpu(). */
+    vcpu_check_shutdown(v);
 
     return v;
 }
@@ -286,7 +329,7 @@ void domain_kill(struct domain *d)
 
 void __domain_crash(struct domain *d)
 {
-    if ( d->is_shutdown )
+    if ( d->is_shutting_down )
     {
         /* Print nothing: the domain is already shutting down. */
     }
@@ -335,16 +378,73 @@ void domain_shutdown(struct domain *d, u
     if ( d->domain_id == 0 )
         dom0_shutdown(reason);
 
-    atomic_inc(&d->pause_count);
-    if ( !xchg(&d->is_shutdown, 1) )
-        d->shutdown_code = reason;
-    else
-        domain_unpause(d);
+    spin_lock(&d->shutdown_lock);
+
+    if ( d->is_shutting_down )
+    {
+        spin_unlock(&d->shutdown_lock);
+        return;
+    }
+
+    d->is_shutting_down = 1;
+    d->shutdown_code = reason;
+
+    smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
 
     for_each_vcpu ( d, v )
-        vcpu_sleep_nosync(v);
-
-    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+    {
+        if ( v->defer_shutdown )
+            continue;
+        atomic_inc(&v->pause_count);
+        v->paused_for_shutdown = 1;
+    }
+
+    __domain_finalise_shutdown(d);
+
+    spin_unlock(&d->shutdown_lock);
+}
+
+void domain_resume(struct domain *d)
+{
+    struct vcpu *v;
+
+    /*
+     * Some code paths assume that shutdown status does not get reset under
+     * their feet (e.g., some assertions make this assumption).
+     */
+    domain_pause(d);
+
+    spin_lock(&d->shutdown_lock);
+
+    d->is_shutting_down = d->is_shut_down = 0;
+
+    for_each_vcpu ( d, v )
+    {
+        if ( v->paused_for_shutdown )
+            vcpu_unpause(v);
+        v->paused_for_shutdown = 0;
+    }
+
+    spin_unlock(&d->shutdown_lock);
+
+    domain_unpause(d);
+}
+
+int vcpu_start_shutdown_deferral(struct vcpu *v)
+{
+    v->defer_shutdown = 1;
+    smp_mb(); /* set deferral status /then/ check for shutdown */
+    if ( unlikely(v->domain->is_shutting_down) )
+        vcpu_check_shutdown(v);
+    return v->defer_shutdown;
+}
+
+void vcpu_end_shutdown_deferral(struct vcpu *v)
+{
+    v->defer_shutdown = 0;
+    smp_mb(); /* clear deferral status /then/ check for shutdown */
+    if ( unlikely(v->domain->is_shutting_down) )
+        vcpu_check_shutdown(v);
 }
 
 void domain_pause_for_debugger(void)
@@ -425,7 +525,6 @@ void vcpu_pause_nosync(struct vcpu *v)
 
 void vcpu_unpause(struct vcpu *v)
 {
-    ASSERT(v != current);
     if ( atomic_dec_and_test(&v->pause_count) )
         vcpu_wake(v);
 }
@@ -445,8 +544,6 @@ void domain_unpause(struct domain *d)
 void domain_unpause(struct domain *d)
 {
     struct vcpu *v;
-
-    ASSERT(d != current->domain);
 
     if ( atomic_dec_and_test(&d->pause_count) )
         for_each_vcpu( d, v )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/domctl.c
--- a/xen/common/domctl.c       Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/domctl.c       Tue Apr 03 13:04:51 2007 -0600
@@ -115,7 +115,7 @@ void getdomaininfo(struct domain *d, str
 
     info->flags = flags |
         (d->is_dying                ? XEN_DOMINF_dying    : 0) |
-        (d->is_shutdown             ? XEN_DOMINF_shutdown : 0) |
+        (d->is_shut_down            ? XEN_DOMINF_shutdown : 0) |
         (d->is_paused_by_controller ? XEN_DOMINF_paused   : 0) |
         d->shutdown_code << XEN_DOMINF_shutdownshift;
 
@@ -287,8 +287,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         if ( d == NULL )
             break;
 
-        if ( xchg(&d->is_shutdown, 0) )
-            domain_unpause(d);
+        domain_resume(d);
         rcu_unlock_domain(d);
         ret = 0;
     }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/page_alloc.c   Tue Apr 03 13:04:51 2007 -0600
@@ -512,6 +512,14 @@ void init_heap_pages(
 
     ASSERT(zone < NR_ZONES);
 
+    if ( unlikely(avail[0] == NULL) )
+    {
+        /* Start-of-day memory node 0 initialisation. */
+        init_heap_block(&_heap0);
+        _heap[0] = &_heap0;
+        avail[0] = avail0;
+    }
+
     if ( likely(page_to_mfn(pg) != 0) )
         nid_prev = phys_to_nid(page_to_maddr(pg-1));
     else
@@ -569,10 +577,6 @@ void end_boot_allocator(void)
 {
     unsigned long i;
     int curr_free, next_free;
-
-    init_heap_block(&_heap0);
-    _heap[0] = &_heap0;
-    avail[0] = avail0;
 
     /* Pages that are free now go to the domain sub-allocator. */
     if ( (curr_free = next_free = !allocated_in_map(first_valid_mfn)) )
diff -r fc9e2f7920c9 -r f378c424e0ce xen/common/symbols.c
--- a/xen/common/symbols.c      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/common/symbols.c      Tue Apr 03 13:04:51 2007 -0600
@@ -16,6 +16,7 @@
 #include <xen/init.h>
 #include <xen/lib.h>
 #include <xen/string.h>
+#include <xen/spinlock.h>
 
 extern unsigned long symbols_addresses[];
 extern unsigned long symbols_num_syms;
@@ -140,12 +141,15 @@ void __print_symbol(const char *fmt, uns
 void __print_symbol(const char *fmt, unsigned long address)
 {
     const char *name;
-    unsigned long offset, size;
-    char namebuf[KSYM_NAME_LEN+1];
+    unsigned long offset, size, flags;
 
+    static DEFINE_SPINLOCK(lock);
+    static char namebuf[KSYM_NAME_LEN+1];
 #define BUFFER_SIZE sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \
                        2*(BITS_PER_LONG*3/10) + 1
-    char buffer[BUFFER_SIZE];
+    static char buffer[BUFFER_SIZE];
+
+    spin_lock_irqsave(&lock, flags);
 
     name = symbols_lookup(address, &size, &offset, namebuf);
 
@@ -155,4 +159,6 @@ void __print_symbol(const char *fmt, uns
         snprintf(buffer, BUFFER_SIZE, "%s+%#lx/%#lx", name, offset, size);
 
     printk(fmt, buffer);
+
+    spin_unlock_irqrestore(&lock, flags);
 }
diff -r fc9e2f7920c9 -r f378c424e0ce xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/drivers/char/console.c        Tue Apr 03 13:04:51 2007 -0600
@@ -858,19 +858,20 @@ void panic(const char *fmt, ...)
 void panic(const char *fmt, ...)
 {
     va_list args;
-    char buf[128];
     unsigned long flags;
     static DEFINE_SPINLOCK(lock);
+    static char buf[128];
     
     debugtrace_dump();
+
+    /* Protects buf[] and ensure multi-line message prints atomically. */
+    spin_lock_irqsave(&lock, flags);
 
     va_start(args, fmt);
     (void)vsnprintf(buf, sizeof(buf), fmt, args);
     va_end(args);
 
-    /* Spit out multiline message in one go. */
     console_start_sync();
-    spin_lock_irqsave(&lock, flags);
     printk("\n****************************************\n");
     printk("Panic on CPU %d:\n", smp_processor_id());
     printk(buf);
@@ -879,6 +880,7 @@ void panic(const char *fmt, ...)
         printk("Manual reset required ('noreboot' specified)\n");
     else
         printk("Reboot in five seconds...\n");
+
     spin_unlock_irqrestore(&lock, flags);
 
     debugger_trap_immediate();
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/domain.h      Tue Apr 03 13:04:51 2007 -0600
@@ -115,7 +115,6 @@ struct hap_domain {
     const char       *locker_function;
     
     struct list_head  freelists;
-    struct list_head  p2m_freelist;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages allocates to p2m */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h      Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/io.h      Tue Apr 03 13:04:51 2007 -0600
@@ -127,6 +127,7 @@ static inline int hvm_portio_intercept(i
 }
 
 extern int hvm_mmio_intercept(ioreq_t *p);
+extern int hvm_buffered_io_send(ioreq_t *p);
 extern int hvm_buffered_io_intercept(ioreq_t *p);
 
 static inline int register_portio_handler(
@@ -145,6 +146,7 @@ static inline int irq_masked(unsigned lo
 
 extern void send_pio_req(unsigned long port, unsigned long count, int size,
                          paddr_t value, int dir, int df, int value_is_ptr);
+void send_timeoffset_req(unsigned long timeoff);
 extern void handle_mmio(unsigned long gpa);
 extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
 extern void hvm_io_assist(struct vcpu *v);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/support.h Tue Apr 03 13:04:51 2007 -0600
@@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
 /* End of save/restore */
 
 extern char hvm_io_bitmap[];
-extern char hvm_msr_bitmap[];
 extern int hvm_enabled;
 
 void hvm_enable(struct hvm_function_table *);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Tue Apr 03 13:04:51 2007 -0600
@@ -121,6 +121,7 @@ extern u32 vmx_vmentry_control;
 
 #define cpu_has_vmx_msr_bitmap \
     (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
+extern char *vmx_msr_bitmap;
 
 /* VMCS Encordings */
 enum vmcs_field {
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/processor.h   Tue Apr 03 13:04:51 2007 -0600
@@ -413,9 +413,9 @@ struct tss_struct {
 struct tss_struct {
     unsigned short     back_link,__blh;
 #ifdef __x86_64__
-    u64 rsp0;
-    u64 rsp1;
-    u64 rsp2;
+    union { u64 rsp0, esp0; };
+    union { u64 rsp1, esp1; };
+    union { u64 rsp2, esp2; };
     u64 reserved1;
     u64 ist[7];
     u64 reserved2;
@@ -553,7 +553,7 @@ extern always_inline void prefetchw(cons
 
 void show_stack(struct cpu_user_regs *regs);
 void show_xen_trace(void);
-void show_stack_overflow(unsigned long esp);
+void show_stack_overflow(unsigned int cpu, unsigned long esp);
 void show_registers(struct cpu_user_regs *regs);
 void show_execution_state(struct cpu_user_regs *regs);
 void show_page_walk(unsigned long addr);
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h        Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/asm-x86/time.h        Tue Apr 03 13:04:51 2007 -0600
@@ -16,4 +16,9 @@ static inline cycles_t get_cycles(void)
     return c;
 }
 
+unsigned long
+mktime (unsigned int year, unsigned int mon,
+        unsigned int day, unsigned int hour,
+        unsigned int min, unsigned int sec);
+
 #endif /* __X86_TIME_H__ */
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h    Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/public/hvm/ioreq.h    Tue Apr 03 13:04:51 2007 -0600
@@ -39,6 +39,7 @@
 #define IOREQ_TYPE_XOR          4
 #define IOREQ_TYPE_XCHG         5
 #define IOREQ_TYPE_ADD          6
+#define IOREQ_TYPE_TIMEOFFSET   7
 
 /*
  * VMExit dispatcher should cooperate with instruction decoder to
diff -r fc9e2f7920c9 -r f378c424e0ce xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Mar 30 17:18:42 2007 -0600
+++ b/xen/include/xen/sched.h   Tue Apr 03 13:04:51 2007 -0600
@@ -114,6 +114,10 @@ struct vcpu
     bool_t           nmi_pending;
     /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
     bool_t           nmi_masked;
+    /* Require shutdown to be deferred for some asynchronous operation? */
+    bool_t           defer_shutdown;
+    /* VCPU is paused following shutdown request (d->is_shutting_down)? */
+    bool_t           paused_for_shutdown;
 
     unsigned long    pause_flags;
     atomic_t         pause_count;
@@ -193,7 +197,9 @@ struct domain
     bool_t           is_paused_by_controller;
 
     /* Guest has shut down (inc. reason code)? */
-    bool_t           is_shutdown;
+    spinlock_t       shutdown_lock;
+    bool_t           is_shutting_down; /* in process of shutting down? */
+    bool_t           is_shut_down;     /* fully shut down? */
     int              shutdown_code;
 
     atomic_t         pause_count;
@@ -331,7 +337,11 @@ void domain_destroy(struct domain *d);
 void domain_destroy(struct domain *d);
 void domain_kill(struct domain *d);
 void domain_shutdown(struct domain *d, u8 reason);
+void domain_resume(struct domain *d);
 void domain_pause_for_debugger(void);
+
+int vcpu_start_shutdown_deferral(struct vcpu *v);
+void vcpu_end_shutdown_deferral(struct vcpu *v);
 
 /*
  * Mark specified domain as crashed. This function always returns, even if the

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog