WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ppc-devel

[XenPPC] [xenppc-unstable] merge with xen-unstable.hg

To: xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
Subject: [XenPPC] [xenppc-unstable] merge with xen-unstable.hg
From: Xen patchbot-xenppc-unstable <patchbot-xenppc-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 02 Jun 2006 17:56:27 +0000
Delivery-date: Fri, 02 Jun 2006 10:58:17 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-ppc-devel-request@lists.xensource.com?subject=help>
List-id: Xen PPC development <xen-ppc-devel.lists.xensource.com>
List-post: <mailto:xen-ppc-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-ppc-devel>, <mailto:xen-ppc-devel-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-ppc-devel-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID c073ebdbde8c0f5c9437706b46c4a34f35033c0c
# Parent  9d52a66c74996a66adf5ee71a0d7f91bb880f7fb
# Parent  954f4dea9da6336aaa35d0706aed55fde7909644
merge with xen-unstable.hg
---
 linux-2.6-xen-sparse/drivers/xen/net_driver_util.c          |   58 
 linux-2.6-xen-sparse/include/asm-x86_64/e820.h              |   63 
 linux-2.6-xen-sparse/include/xen/net_driver_util.h          |   48 
 tools/xenstore/xenstored_proc.h                             |   27 
 .hgignore                                                   |    2 
 extras/mini-os/Makefile                                     |    9 
 extras/mini-os/lib/printf.c                                 |    4 
 extras/mini-os/lib/string.c                                 |    4 
 linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile         |    1 
 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c             |    2 
 linux-2.6-xen-sparse/drivers/xen/Makefile                   |    1 
 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c          |    8 
 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c          |    2 
 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c           |    2 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c        |    4 
 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c             |    2 
 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c            |    2 
 linux-2.6-xen-sparse/drivers/xen/console/console.c          |   28 
 linux-2.6-xen-sparse/drivers/xen/core/Makefile              |   11 
 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c         |  185 +
 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c              |   31 
 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c              |    3 
 linux-2.6-xen-sparse/drivers/xen/core/reboot.c              |    9 
 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c             |  215 --
 linux-2.6-xen-sparse/drivers/xen/netback/netback.c          |    4 
 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c           |   31 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c        |   56 
 linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c           |    2 
 linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c          |    4 
 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c          |   23 
 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c           |    6 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c     |    8 
 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c      |    4 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h |   63 
 linux-2.6-xen-sparse/include/xen/cpu_hotplug.h              |   42 
 linux-2.6-xen-sparse/include/xen/xenbus.h                   |    8 
 patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch           |   13 
 tools/libxc/Makefile                                        |    1 
 tools/libxc/xc_csched.c                                     |   50 
 tools/libxc/xc_linux_build.c                                |   13 
 tools/libxc/xc_linux_restore.c                              |  122 -
 tools/libxc/xc_private.c                                    |   22 
 tools/libxc/xc_ptrace.c                                     |   77 
 tools/libxc/xc_ptrace.h                                     |    3 
 tools/libxc/xc_ptrace_core.c                                |    7 
 tools/libxc/xc_tbuf.c                                       |   56 
 tools/libxc/xenctrl.h                                       |   11 
 tools/libxc/xg_private.h                                    |   10 
 tools/python/xen/lowlevel/xc/xc.c                           |   61 
 tools/python/xen/lowlevel/xs/xs.c                           |   11 
 tools/python/xen/xend/XendDomain.py                         |   22 
 tools/python/xen/xend/XendDomainInfo.py                     |   14 
 tools/python/xen/xend/balloon.py                            |   11 
 tools/python/xen/xend/server/SrvDomain.py                   |   14 
 tools/python/xen/xend/xenstore/xstransact.py                |   28 
 tools/python/xen/xm/main.py                                 |   45 
 tools/tests/test_x86_emulator.c                             |   67 
 tools/xenstore/Makefile                                     |    8 
 tools/xenstore/xenstored_core.c                             |    7 
 tools/xenstore/xenstored_core.h                             |    8 
 tools/xenstore/xenstored_domain.c                           |   37 
 tools/xenstore/xenstored_linux.c                            |   69 
 xen/arch/x86/domain_build.c                                 |    5 
 xen/arch/x86/hvm/hvm.c                                      |   16 
 xen/arch/x86/hvm/i8254.c                                    |  405 +--
 xen/arch/x86/hvm/intercept.c                                |   82 
 xen/arch/x86/hvm/svm/intr.c                                 |   47 
 xen/arch/x86/hvm/svm/svm.c                                  |   44 
 xen/arch/x86/hvm/svm/vmcb.c                                 |   14 
 xen/arch/x86/hvm/vmx/io.c                                   |   62 
 xen/arch/x86/hvm/vmx/vmx.c                                  |   37 
 xen/arch/x86/mm.c                                           |  129 +
 xen/arch/x86/traps.c                                        |    4 
 xen/arch/x86/x86_emulate.c                                  |   81 
 xen/common/Makefile                                         |    1 
 xen/common/grant_table.c                                    |   15 
 xen/common/kernel.c                                         |    5 
 xen/common/sched_credit.c                                   | 1233 ++++++++++++
 xen/common/schedule.c                                       |    5 
 xen/common/trace.c                                          |    6 
 xen/include/asm-x86/domain.h                                |   12 
 xen/include/asm-x86/hvm/domain.h                            |    6 
 xen/include/asm-x86/hvm/svm/intr.h                          |    1 
 xen/include/asm-x86/hvm/svm/svm.h                           |    1 
 xen/include/asm-x86/hvm/vcpu.h                              |    3 
 xen/include/asm-x86/hvm/vmx/vmx.h                           |    1 
 xen/include/asm-x86/hvm/vpit.h                              |   67 
 xen/include/asm-x86/string.h                                |  162 -
 xen/include/asm-x86/x86_emulate.h                           |   66 
 xen/include/public/io/xenbus.h                              |   59 
 xen/include/public/sched_ctl.h                              |    5 
 xen/include/xen/sched-if.h                                  |    2 
 xen/include/xen/softirq.h                                   |   13 
 93 files changed, 2802 insertions(+), 1546 deletions(-)

diff -r 9d52a66c7499 -r c073ebdbde8c .hgignore
--- a/.hgignore Thu May 25 15:59:18 2006 -0600
+++ b/.hgignore Fri May 26 13:41:49 2006 -0600
@@ -14,7 +14,7 @@
 .*\.orig$
 .*\.rej$
 .*/a\.out$
-.*/cscope\.*$
+.*/cscope\..*$
 ^[^/]*\.bz2$
 ^TAGS$
 ^dist/.*$
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/Makefile   Fri May 26 13:41:49 2006 -0600
@@ -13,6 +13,7 @@ override CPPFLAGS := -Iinclude $(CPPFLAG
 override CPPFLAGS := -Iinclude $(CPPFLAGS)
 ASFLAGS = -D__ASSEMBLY__
 
+LDLIBS =  -L. -lminios
 LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
 
 ifeq ($(TARGET_ARCH),x86_32)
@@ -55,11 +56,11 @@ links:
 links:
        [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
 
-libminios.a: $(OBJS) $(HEAD)
-       ar r libminios.a $(HEAD) $(OBJS)
+libminios.a: links $(OBJS) $(HEAD)
+       $(AR) r libminios.a $(HEAD) $(OBJS)
 
-$(TARGET): links libminios.a $(HEAD)
-       $(LD) $(LDFLAGS) $(HEAD) -L. -lminios -o $@.elf
+$(TARGET): libminios.a $(HEAD)
+       $(LD) $(LDFLAGS) $(HEAD) $(LDLIBS) -o $@.elf
        gzip -f -9 -c $@.elf >$@.gz
 
 .PHONY: clean
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/printf.c
--- a/extras/mini-os/lib/printf.c       Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/lib/printf.c       Fri May 26 13:41:49 2006 -0600
@@ -53,6 +53,8 @@
  *
  * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -789,4 +791,4 @@ int sscanf(const char * buf, const char 
        return i;
 }
 
-
+#endif
diff -r 9d52a66c7499 -r c073ebdbde8c extras/mini-os/lib/string.c
--- a/extras/mini-os/lib/string.c       Thu May 25 15:59:18 2006 -0600
+++ b/extras/mini-os/lib/string.c       Fri May 26 13:41:49 2006 -0600
@@ -17,6 +17,8 @@
  * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
  ****************************************************************************
  */
+
+#if !defined HAVE_LIBC
 
 #include <os.h>
 #include <types.h>
@@ -153,3 +155,5 @@ char * strstr(const char * s1,const char
         }
         return NULL;
 }
+
+#endif
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile
--- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile       Fri May 26 
13:41:49 2006 -0600
@@ -2,7 +2,6 @@ ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
 ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y)
 obj-y   += util.o
 endif
-obj-$(CONFIG_XEN_IA64_DOM0_VP) += net_driver_util.o
 
 obj-y  += core/
 #obj-y += char/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c   Fri May 26 13:41:49 
2006 -0600
@@ -329,7 +329,7 @@ out:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct tpm_private *tp = dev->data;
        DPRINTK("\n");
diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu May 25 15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Fri May 26 13:41:49 2006 -0600
@@ -1,5 +1,4 @@
 
-obj-y  += net_driver_util.o
 obj-y  += util.o
 
 obj-y  += core/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Fri May 26 
13:41:49 2006 -0600
@@ -67,7 +67,7 @@ static DECLARE_MUTEX(balloon_mutex);
  * Also protects non-atomic updates of current_pages and driver_pages, and
  * balloon lists.
  */
-spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(balloon_lock);
 
 /* We aim for 'current allocation' == 'target allocation'. */
 static unsigned long current_pages;
@@ -360,6 +360,12 @@ static void balloon_process(void *unused
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void set_new_target(unsigned long target)
 {
+       unsigned long min_target;
+
+       /* Do not allow target to reduce below 2% of maximum memory size. */
+       min_target = max_pfn / 50;
+       target = max(target, min_target);
+
        /* No need for lock. Not read-modify-write updates. */
        hard_limit   = ~0UL;
        target_pages = target;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Fri May 26 
13:41:49 2006 -0600
@@ -82,7 +82,7 @@ typedef struct {
 
 static pending_req_t *pending_reqs;
 static struct list_head pending_free;
-static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pending_free_lock);
 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
 
 #define BLKBACK_INVALID_HANDLE (~0)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -247,7 +247,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri May 26 
13:41:49 2006 -0600
@@ -247,7 +247,7 @@ fail:
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        struct blkfront_info *info = dev->data;
        struct block_device *bd;
@@ -434,7 +434,7 @@ int blkif_release(struct inode *inode, s
                   have ignored this request initially, as the device was
                   still mounted. */
                struct xenbus_device * dev = info->xbdev;
-               XenbusState state = xenbus_read_driver_state(dev->otherend);
+               enum xenbus_state state = 
xenbus_read_driver_state(dev->otherend);
 
                if (state == XenbusStateClosing)
                        blkfront_closing(dev);
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Fri May 26 13:41:49 
2006 -0600
@@ -93,7 +93,7 @@ static struct block_device_operations xl
        .ioctl  = blkif_ioctl,
 };
 
-spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(blkif_io_lock);
 
 static struct xlbd_major_info *
 xlbd_alloc_major_info(int major, int minor, int index)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Fri May 26 13:41:49 
2006 -0600
@@ -138,7 +138,7 @@ typedef struct {
  */
 static pending_req_t pending_reqs[MAX_PENDING_REQS];
 static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(pend_prod_lock);
 /* NB. We use a different index type to differentiate from shared blk rings. */
 typedef unsigned int PEND_RING_IDX;
 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Fri May 26 
13:41:49 2006 -0600
@@ -117,14 +117,17 @@ static int __init xencons_bufsz_setup(ch
 {
        unsigned int goal;
        goal = simple_strtoul(str, NULL, 0);
-       while (wbuf_size < goal)
-               wbuf_size <<= 1;
+       if (goal) {
+               goal = roundup_pow_of_two(goal);
+               if (wbuf_size < goal)
+                       wbuf_size = goal;
+       }
        return 1;
 }
 __setup("xencons_bufsz=", xencons_bufsz_setup);
 
 /* This lock protects accesses to the common transmit buffer. */
-static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(xencons_lock);
 
 /* Common transmit-kick routine. */
 static void __xencons_tx_flush(void);
@@ -133,8 +136,7 @@ static struct tty_driver *xencons_driver
 
 /******************** Kernel console driver ********************************/
 
-static void kcons_write(
-       struct console *c, const char *s, unsigned int count)
+static void kcons_write(struct console *c, const char *s, unsigned int count)
 {
        int           i = 0;
        unsigned long flags;
@@ -155,14 +157,14 @@ static void kcons_write(
        spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
-static void kcons_write_dom0(
-       struct console *c, const char *s, unsigned int count)
-{
-       int rc;
-
-       while ((count > 0) &&
-              ((rc = HYPERVISOR_console_io(
-                       CONSOLEIO_write, count, (char *)s)) > 0)) {
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int 
count)
+{
+
+       while (count > 0) {
+               int rc;
+               rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
+               if (rc <= 0)
+                       break;
                count -= rc;
                s += rc;
        }
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile    Fri May 26 13:41:49 
2006 -0600
@@ -4,8 +4,9 @@
 
 obj-y   := evtchn.o reboot.o gnttab.o features.o
 
-obj-$(CONFIG_PROC_FS) += xen_proc.o
-obj-$(CONFIG_NET)     += skbuff.o
-obj-$(CONFIG_SMP)     += smpboot.o
-obj-$(CONFIG_SYSFS)   += hypervisor_sysfs.o
-obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
+obj-$(CONFIG_PROC_FS)     += xen_proc.o
+obj-$(CONFIG_NET)         += skbuff.o
+obj-$(CONFIG_SMP)         += smpboot.o
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
+obj-$(CONFIG_SYSFS)       += hypervisor_sysfs.o
+obj-$(CONFIG_XEN_SYSFS)   += xen_sysfs.o
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c    Fri May 26 13:41:49 
2006 -0600
@@ -51,10 +51,10 @@
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
  */
-static spinlock_t irq_mapping_update_lock;
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
 
 /* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {[0 ...  NR_EVENT_CHANNELS-1] = 
-1};
 
 /* Packed IRQ information: binding type, sub-type index, and event channel. */
 static u32 irq_info[NR_IRQS];
@@ -91,13 +91,13 @@ static inline unsigned int type_from_irq
 }
 
 /* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]);
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping. */
 #ifndef NR_IPIS
 #define NR_IPIS 1
 #endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
 
 /* Reference counts for bindings to IRQs. */
 static int irq_bindcount[NR_IRQS];
@@ -751,7 +751,9 @@ void irq_resume(void)
                BUG_ON(irq_info[pirq_to_irq(pirq)] != IRQ_UNBOUND);
 
        /* Secondary CPUs must have no VIRQ or IPI bindings. */
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+       for_each_possible_cpu(cpu) {
+               if (cpu == 0)
+                       continue;
                for (virq = 0; virq < NR_VIRQS; virq++)
                        BUG_ON(per_cpu(virq_to_irq, cpu)[virq] != -1);
                for (ipi = 0; ipi < NR_IPIS; ipi++)
@@ -813,25 +815,12 @@ void __init xen_init_IRQ(void)
 void __init xen_init_IRQ(void)
 {
        int i;
-       int cpu;
-
-       spin_lock_init(&irq_mapping_update_lock);
 
        init_evtchn_cpu_bindings();
 
-       /* No VIRQ or IPI bindings. */
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               for (i = 0; i < NR_VIRQS; i++)
-                       per_cpu(virq_to_irq, cpu)[i] = -1;
-               for (i = 0; i < NR_IPIS; i++)
-                       per_cpu(ipi_to_irq, cpu)[i] = -1;
-       }
-
-       /* No event-channel -> IRQ mappings. */
-       for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-               evtchn_to_irq[i] = -1;
-               mask_evtchn(i); /* No event channels are 'live' right now. */
-       }
+       /* No event channels are 'live' right now. */
+       for (i = 0; i < NR_EVENT_CHANNELS; i++)
+               mask_evtchn(i);
 
        /* No IRQ -> event-channel mappings. */
        for (i = 0; i < NR_IRQS; i++)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Fri May 26 13:41:49 
2006 -0600
@@ -38,7 +38,6 @@
 #include <linux/vmalloc.h>
 #include <asm/pgtable.h>
 #include <xen/interface/xen.h>
-#include <asm/fixmap.h>
 #include <asm/uaccess.h>
 #include <xen/gnttab.h>
 #include <asm/synch_bitops.h>
@@ -81,7 +80,7 @@ static grant_ref_t gnttab_list[NR_GRANT_
 static grant_ref_t gnttab_list[NR_GRANT_ENTRIES];
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
-static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(gnttab_list_lock);
 
 static grant_entry_t *shared = NULL;
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/reboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c    Fri May 26 13:41:49 
2006 -0600
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <xen/gnttab.h>
 #include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 /*
@@ -80,14 +81,6 @@ static int shutting_down = SHUTDOWN_INVA
 static int shutting_down = SHUTDOWN_INVALID;
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-
-#ifdef CONFIG_SMP
-int  smp_suspend(void);
-void smp_resume(void);
-#else
-#define smp_suspend()  (0)
-#define smp_resume()   ((void)0)
-#endif
 
 /* Ensure we run on the idle task page tables so that we will
    switch page tables before running user space. This is needed
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/smpboot.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c   Fri May 26 13:41:49 
2006 -0600
@@ -23,6 +23,7 @@
 #include <asm/pgalloc.h>
 #include <xen/evtchn.h>
 #include <xen/interface/vcpu.h>
+#include <xen/cpu_hotplug.h>
 #include <xen/xenbus.h>
 
 #ifdef CONFIG_SMP_ALTERNATIVES
@@ -78,15 +79,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
 #elif !defined(CONFIG_X86_IO_APIC)
 unsigned int maxcpus = NR_CPUS;
 #endif
-
-/*
- * Set of CPUs that remote admin software will allow us to bring online.
- * Notified to us via xenbus.
- */
-static cpumask_t xenbus_allowed_cpumask;
-
-/* Set of CPUs that local admin will allow us to bring online. */
-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
 
 void __init prefill_possible_map(void)
 {
@@ -167,17 +159,17 @@ static void cpu_bringup(void)
        cpu_idle();
 }
 
-static void vcpu_prepare(int vcpu)
+void cpu_initialize_context(unsigned int cpu)
 {
        vcpu_guest_context_t ctxt;
-       struct task_struct *idle = idle_task(vcpu);
+       struct task_struct *idle = idle_task(cpu);
 #ifdef __x86_64__
-       struct desc_ptr *gdt_descr = &cpu_gdt_descr[vcpu];
+       struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
 #else
-       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, vcpu);
-#endif
-
-       if (vcpu == 0)
+       struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+#endif
+
+       if (cpu == 0)
                return;
 
        memset(&ctxt, 0, sizeof(ctxt));
@@ -226,10 +218,10 @@ static void vcpu_prepare(int vcpu)
 
        ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
 
-       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
-#endif
-
-       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
+       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+#endif
+
+       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -304,10 +296,10 @@ void __init smp_prepare_cpus(unsigned in
                cpu_set(cpu, cpu_present_map);
 #endif
 
-               vcpu_prepare(cpu);
-       }
-
-       xenbus_allowed_cpumask = cpu_present_map;
+               cpu_initialize_context(cpu);
+       }
+
+       init_xenbus_allowed_cpumask();
 
        /* Currently, Xen gives no dynamic NUMA/HT info. */
        for (cpu = 1; cpu < NR_CPUS; cpu++) {
@@ -332,15 +324,6 @@ void __devinit smp_prepare_boot_cpu(void
        cpu_online_map   = cpumask_of_cpu(0);
 }
 
-static int local_cpu_hotplug_request(void)
-{
-       /*
-        * We assume a CPU hotplug request comes from local admin if it is made
-        * via a userspace process (i.e., one with a real mm_struct).
-        */
-       return (current->mm != NULL);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
@@ -355,141 +338,6 @@ static int __init initialize_cpu_present
 }
 core_initcall(initialize_cpu_present_map);
 
-static void vcpu_hotplug(unsigned int cpu)
-{
-       int err;
-       char dir[32], state[32];
-
-       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
-               return;
-
-       sprintf(dir, "cpu/%d", cpu);
-       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
-       if (err != 1) {
-               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
-               return;
-       }
-
-       if (strcmp(state, "online") == 0) {
-               cpu_set(cpu, xenbus_allowed_cpumask);
-               (void)cpu_up(cpu);
-       } else if (strcmp(state, "offline") == 0) {
-               cpu_clear(cpu, xenbus_allowed_cpumask);
-               (void)cpu_down(cpu);
-       } else {
-               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
-                      state, cpu);
-       }
-}
-
-static void handle_vcpu_hotplug_event(
-       struct xenbus_watch *watch, const char **vec, unsigned int len)
-{
-       int cpu;
-       char *cpustr;
-       const char *node = vec[XS_WATCH_PATH];
-
-       if ((cpustr = strstr(node, "cpu/")) != NULL) {
-               sscanf(cpustr, "cpu/%d", &cpu);
-               vcpu_hotplug(cpu);
-       }
-}
-
-static int smpboot_cpu_notify(struct notifier_block *notifier,
-                             unsigned long action, void *hcpu)
-{
-       int cpu = (long)hcpu;
-
-       /*
-        * We do this in a callback notifier rather than __cpu_disable()
-        * because local_cpu_hotplug_request() does not work in the latter
-        * as it's always executed from within a stopmachine kthread.
-        */
-       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
-               cpu_clear(cpu, local_allowed_cpumask);
-
-       return NOTIFY_OK;
-}
-
-static int setup_cpu_watcher(struct notifier_block *notifier,
-                             unsigned long event, void *data)
-{
-       int i;
-
-       static struct xenbus_watch cpu_watch = {
-               .node = "cpu",
-               .callback = handle_vcpu_hotplug_event,
-               .flags = XBWF_new_thread };
-       (void)register_xenbus_watch(&cpu_watch);
-
-       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
-               for_each_cpu(i)
-                       vcpu_hotplug(i);
-               printk(KERN_INFO "Brought up %ld CPUs\n",
-                      (long)num_online_cpus());
-       }
-
-       return NOTIFY_DONE;
-}
-
-static int __init setup_vcpu_hotplug_event(void)
-{
-       static struct notifier_block hotplug_cpu = {
-               .notifier_call = smpboot_cpu_notify };
-       static struct notifier_block xsn_cpu = {
-               .notifier_call = setup_cpu_watcher };
-
-       register_cpu_notifier(&hotplug_cpu);
-       register_xenstore_notifier(&xsn_cpu);
-
-       return 0;
-}
-
-arch_initcall(setup_vcpu_hotplug_event);
-
-int smp_suspend(void)
-{
-       int i, err;
-
-       lock_cpu_hotplug();
-
-       /*
-        * Take all other CPUs offline. We hold the hotplug mutex to
-        * avoid other processes bringing up CPUs under our feet.
-        */
-       while (num_online_cpus() > 1) {
-               unlock_cpu_hotplug();
-               for_each_online_cpu(i) {
-                       if (i == 0)
-                               continue;
-                       err = cpu_down(i);
-                       if (err) {
-                               printk(KERN_CRIT "Failed to take all CPUs "
-                                      "down: %d.\n", err);
-                               for_each_cpu(i)
-                                       vcpu_hotplug(i);
-                               return err;
-                       }
-               }
-               lock_cpu_hotplug();
-       }
-
-       return 0;
-}
-
-void smp_resume(void)
-{
-       int i;
-
-       for_each_cpu(i)
-               vcpu_prepare(i);
-
-       unlock_cpu_hotplug();
-
-       for_each_cpu(i)
-               vcpu_hotplug(i);
-}
-
 static void
 remove_siblinginfo(int cpu)
 {
@@ -536,20 +384,6 @@ void __cpu_die(unsigned int cpu)
 
 #else /* !CONFIG_HOTPLUG_CPU */
 
-int smp_suspend(void)
-{
-       if (num_online_cpus() > 1) {
-               printk(KERN_WARNING "Can't suspend SMP guests "
-                      "without CONFIG_HOTPLUG_CPU\n");
-               return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-void smp_resume(void)
-{
-}
-
 int __cpu_disable(void)
 {
        return -ENOSYS;
@@ -566,17 +400,9 @@ int __devinit __cpu_up(unsigned int cpu)
 {
        int rc;
 
-       if (local_cpu_hotplug_request()) {
-               cpu_set(cpu, local_allowed_cpumask);
-               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
-                       printk("%s: attempt to bring up CPU %u disallowed by "
-                              "remote admin.\n", __FUNCTION__, cpu);
-                       return -EBUSY;
-               }
-       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
-                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
-               return -EBUSY;
-       }
+       rc = cpu_up_is_allowed(cpu);
+       if (rc)
+               return rc;
 
 #ifdef CONFIG_SMP_ALTERNATIVES
        if (num_online_cpus() == 1)
@@ -591,8 +417,7 @@ int __devinit __cpu_up(unsigned int cpu)
        cpu_set(cpu, cpu_online_map);
 
        rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
-       if (rc != 0)
-               BUG();
+       BUG_ON(rc);
 
        return 0;
 }
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Fri May 26 
13:41:49 2006 -0600
@@ -99,7 +99,7 @@ static spinlock_t net_schedule_list_lock
 #define MAX_MFN_ALLOC 64
 static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
-static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(mfn_lock);
 
 static unsigned long alloc_mfn(void)
 {
@@ -691,7 +691,7 @@ static void net_tx_action(unsigned long 
 
 static void netif_idx_release(u16 pending_idx)
 {
-       static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+       static DEFINE_SPINLOCK(_lock);
        unsigned long flags;
 
        spin_lock_irqsave(&_lock, flags);
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -17,13 +17,10 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
-
 #include <stdarg.h>
 #include <linux/module.h>
 #include <xen/xenbus.h>
-#include <xen/net_driver_util.h>
 #include "common.h"
-
 
 #if 0
 #undef DPRINTK
@@ -31,22 +28,19 @@
     printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
 #endif
 
-
 struct backend_info
 {
        struct xenbus_device *dev;
        netif_t *netif;
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
-
 
 static int connect_rings(struct backend_info *);
 static void connect(struct backend_info *);
 static void maybe_connect(struct backend_info *);
 static void backend_changed(struct xenbus_watch *, const char **,
                            unsigned int);
-
 
 static int netback_remove(struct xenbus_device *dev)
 {
@@ -191,7 +185,7 @@ static void backend_changed(struct xenbu
  * Callback received when the frontend's state changes.
  */
 static void frontend_changed(struct xenbus_device *dev,
-                            XenbusState frontend_state)
+                            enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
 
@@ -273,6 +267,27 @@ static void xen_net_read_rate(struct xen
        kfree(ratestr);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 static void connect(struct backend_info *be)
 {
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri May 26 
13:41:49 2006 -0600
@@ -60,7 +60,6 @@
 #include <asm/uaccess.h>
 #include <xen/interface/grant_table.h>
 #include <xen/gnttab.h>
-#include <xen/net_driver_util.h>
 
 #define GRANT_INVALID_REF      0
 
@@ -88,12 +87,6 @@ struct netfront_info {
 
        unsigned int handle;
        unsigned int evtchn, irq;
-
-       /* What is the status of our connection to the remote backend? */
-#define BEST_CLOSED       0
-#define BEST_DISCONNECTED 1
-#define BEST_CONNECTED    2
-       unsigned int backend_state;
 
        /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
@@ -143,14 +136,6 @@ static inline unsigned short get_id_from
        list[0] = list[id];
        return id;
 }
-
-#ifdef DEBUG
-static const char *be_state_name[] = {
-       [BEST_CLOSED]       = "closed",
-       [BEST_DISCONNECTED] = "disconnected",
-       [BEST_CONNECTED]    = "connected",
-};
-#endif
 
 #define DPRINTK(fmt, args...) pr_debug("netfront (%s:%d) " fmt, \
                                        __FUNCTION__, __LINE__, ##args)
@@ -247,6 +232,27 @@ static int netfront_resume(struct xenbus
        return talk_to_backend(dev, info);
 }
 
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
 
 /* Common code used when first setting up, and when resuming. */
 static int talk_to_backend(struct xenbus_device *dev,
@@ -342,7 +348,6 @@ static int setup_device(struct xenbus_de
        }
        memset(txs, 0, PAGE_SIZE);
        memset(rxs, 0, PAGE_SIZE);
-       info->backend_state = BEST_DISCONNECTED;
 
        SHARED_RING_INIT(txs);
        FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
@@ -384,7 +389,7 @@ static int setup_device(struct xenbus_de
  * Callback received when the backend's state changes.
  */
 static void backend_changed(struct xenbus_device *dev,
-                           XenbusState backend_state)
+                           enum xenbus_state backend_state)
 {
        DPRINTK("\n");
 
@@ -465,7 +470,7 @@ static void network_tx_buf_gc(struct net
        struct netfront_info *np = netdev_priv(dev);
        struct sk_buff *skb;
 
-       if (np->backend_state != BEST_CONNECTED)
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        do {
@@ -527,7 +532,7 @@ static void network_alloc_rx_buffers(str
        struct xen_memory_reservation reservation;
        grant_ref_t ref;
 
-       if (unlikely(np->backend_state != BEST_CONNECTED))
+       if (unlikely(!netif_carrier_ok(dev)))
                return;
 
        /*
@@ -662,7 +667,7 @@ static int network_start_xmit(struct sk_
 
        spin_lock_irq(&np->tx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock_irq(&np->tx_lock);
                goto drop;
        }
@@ -748,7 +753,7 @@ static int netif_poll(struct net_device 
 
        spin_lock(&np->rx_lock);
 
-       if (np->backend_state != BEST_CONNECTED) {
+       if (unlikely(!netif_carrier_ok(dev))) {
                spin_unlock(&np->rx_lock);
                return 0;
        }
@@ -1041,7 +1046,7 @@ static void network_connect(struct net_d
         * domain a kick because we've probably just requeued some
         * packets.
         */
-       np->backend_state = BEST_CONNECTED;
+       netif_carrier_on(dev);
        notify_remote_via_irq(np->irq);
        network_tx_buf_gc(dev);
 
@@ -1055,7 +1060,7 @@ static void show_device(struct netfront_
        if (np) {
                IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
                        np->handle,
-                       be_state_name[np->backend_state],
+                       netif_carrier_ok(np->netdev) ? "on" : "off",
                        netif_running(np->netdev) ? "open" : "closed",
                        np->evtchn,
                        np->tx,
@@ -1241,9 +1246,10 @@ static struct net_device * __devinit cre
        }
 
        np                = netdev_priv(netdev);
-       np->backend_state = BEST_CLOSED;
        np->handle        = handle;
        np->xbdev         = dev;
+
+       netif_carrier_off(netdev);
 
        spin_lock_init(&np->tx_lock);
        spin_lock_init(&np->rx_lock);
@@ -1392,7 +1398,7 @@ static void netif_disconnect_backend(str
        /* Stop old i/f to prevent errors whilst we rebuild the state. */
        spin_lock_irq(&info->tx_lock);
        spin_lock(&info->rx_lock);
-       info->backend_state = BEST_DISCONNECTED;
+       netif_carrier_off(info->netdev);
        spin_unlock(&info->rx_lock);
        spin_unlock_irq(&info->tx_lock);
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -166,7 +166,7 @@ static int pciback_attach(struct pciback
 }
 
 static void pciback_frontend_changed(struct xenbus_device *xdev,
-                                    XenbusState fe_state)
+                                    enum xenbus_state fe_state)
 {
        struct pciback_device *pdev = xdev->data;
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c        Fri May 26 
13:41:49 2006 -0600
@@ -196,7 +196,7 @@ static int pcifront_try_disconnect(struc
 static int pcifront_try_disconnect(struct pcifront_device *pdev)
 {
        int err = 0;
-       XenbusState prev_state;
+       enum xenbus_state prev_state;
 
        spin_lock(&pdev->dev_lock);
 
@@ -214,7 +214,7 @@ static int pcifront_try_disconnect(struc
 }
 
 static void pcifront_backend_changed(struct xenbus_device *xdev,
-                                    XenbusState be_state)
+                                    enum xenbus_state be_state)
 {
        struct pcifront_device *pdev = xdev->data;
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Fri May 26 
13:41:49 2006 -0600
@@ -159,10 +159,6 @@ static int privcmd_ioctl(struct inode *i
        break;
 
        case IOCTL_PRIVCMD_MMAPBATCH: {
-#ifndef __ia64__
-               mmu_update_t u;
-               uint64_t ptep;
-#endif
                privcmd_mmapbatch_t m;
                struct vm_area_struct *vma = NULL;
                unsigned long __user *p;
@@ -200,24 +196,12 @@ static int privcmd_ioctl(struct inode *i
                for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
                        if (get_user(mfn, p))
                                return -EFAULT;
-#ifdef __ia64__
+
                        ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
-                                                    mfn, 1 << PAGE_SHIFT,
+                                                    mfn, PAGE_SIZE,
                                                     vma->vm_page_prot, m.dom);
                        if (ret < 0)
-                           goto batch_err;
-#else
-
-                       ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
-                       if (ret)
-                               goto batch_err;
-
-                       u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
-                       u.ptr = ptep;
-
-                       if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
                                put_user(0xF0000000 | mfn, p);
-#endif
                }
 
                ret = 0;
@@ -283,6 +267,9 @@ static int __init privcmd_init(void)
        set_bit(__HYPERVISOR_mmuext_op,        hypercall_permission_map);
        set_bit(__HYPERVISOR_xen_version,      hypercall_permission_map);
        set_bit(__HYPERVISOR_sched_op,         hypercall_permission_map);
+       set_bit(__HYPERVISOR_sched_op_compat,  hypercall_permission_map);
+       set_bit(__HYPERVISOR_event_channel_op_compat,
+               hypercall_permission_map);
 
        privcmd_intf = create_xen_proc_entry("privcmd", 0400);
        if (privcmd_intf != NULL)
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu May 25 15:59:18 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Fri May 26 13:41:49 
2006 -0600
@@ -34,7 +34,7 @@ struct backend_info
 
        /* watch front end for changes */
        struct xenbus_watch backend_watch;
-       XenbusState frontend_state;
+       enum xenbus_state frontend_state;
 };
 
 static void maybe_connect(struct backend_info *be);
@@ -43,7 +43,7 @@ static void backend_changed(struct xenbu
 static void backend_changed(struct xenbus_watch *watch,
                             const char **vec, unsigned int len);
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state);
+                             enum xenbus_state frontend_state);
 
 static int tpmback_remove(struct xenbus_device *dev)
 {
@@ -129,7 +129,7 @@ static void backend_changed(struct xenbu
 
 
 static void frontend_changed(struct xenbus_device *dev,
-                             XenbusState frontend_state)
+                             enum xenbus_state frontend_state)
 {
        struct backend_info *be = dev->data;
        int err;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c   Fri May 26 
13:41:49 2006 -0600
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 EXPORT_SYMBOL_GPL(xenbus_watch_path2);
 
 
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState state)
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
 {
        /* We check whether the state is currently set to the given value, and
           if not, then the state is set.  We don't want to unconditionally
@@ -269,9 +269,9 @@ int xenbus_free_evtchn(struct xenbus_dev
 }
 
 
-XenbusState xenbus_read_driver_state(const char *path)
-{
-       XenbusState result;
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+       enum xenbus_state result;
        int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL);
        if (err)
                result = XenbusStateClosed;
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu May 25 
15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri May 26 
13:41:49 2006 -0600
@@ -284,7 +284,7 @@ static void otherend_changed(struct xenb
        struct xenbus_device *dev =
                container_of(watch, struct xenbus_device, otherend_watch);
        struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
-       XenbusState state;
+       enum xenbus_state state;
 
        /* Protect us against watches firing on old details when the otherend
           details change, say immediately after a resume. */
@@ -539,7 +539,7 @@ static int xenbus_probe_node(struct xen_
        size_t stringlen;
        char *tmpstring;
 
-       XenbusState state = xenbus_read_driver_state(nodename);
+       enum xenbus_state state = xenbus_read_driver_state(nodename);
 
        if (state != XenbusStateInitialising) {
                /* Device is not new, so ignore it.  This can happen if a
diff -r 9d52a66c7499 -r c073ebdbde8c linux-2.6-xen-sparse/include/xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h Thu May 25 15:59:18 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri May 26 13:41:49 2006 -0600
@@ -75,7 +75,7 @@ struct xenbus_device {
        int otherend_id;
        struct xenbus_watch otherend_watch;
        struct device dev;
-       XenbusState state;
+       enum xenbus_state state;
        void *data;
 };
 
@@ -98,7 +98,7 @@ struct xenbus_driver {
        int (*probe)(struct xenbus_device *dev,
                     const struct xenbus_device_id *id);
        void (*otherend_changed)(struct xenbus_device *dev,
-                                XenbusState backend_state);
+                                enum xenbus_state backend_state);
        int (*remove)(struct xenbus_device *dev);
        int (*suspend)(struct xenbus_device *dev);
        int (*resume)(struct xenbus_device *dev);
@@ -207,7 +207,7 @@ int xenbus_watch_path2(struct xenbus_dev
  * Return 0 on success, or -errno on error.  On error, the device will switch
  * to XenbusStateClosing, and the error will be saved in the store.
  */
-int xenbus_switch_state(struct xenbus_device *dev, XenbusState new_state);
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state 
new_state);
 
 
 /**
@@ -273,7 +273,7 @@ int xenbus_free_evtchn(struct xenbus_dev
  * Return the state of the driver rooted at the given store path, or
  * XenbusStateClosed if no state can be read.
  */
-XenbusState xenbus_read_driver_state(const char *path);
+enum xenbus_state xenbus_read_driver_state(const char *path);
 
 
 /***
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/Makefile      Fri May 26 13:41:49 2006 -0600
@@ -20,6 +20,7 @@ SRCS       += xc_physdev.c
 SRCS       += xc_physdev.c
 SRCS       += xc_private.c
 SRCS       += xc_sedf.c
+SRCS       += xc_csched.c
 SRCS       += xc_tbuf.c
 
 ifeq ($(patsubst x86%,x86,$(XEN_TARGET_ARCH)),x86)
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_linux_build.c      Fri May 26 13:41:49 2006 -0600
@@ -268,21 +268,10 @@ static int setup_pg_tables_pae(int xc_ha
     l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
     l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
     uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab;
-    unsigned long ppt_alloc, count, nmfn;
+    unsigned long ppt_alloc, count;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
-
-    if ( page_array[ppt_alloc] > 0xfffff )
-    {
-        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
-        if ( nmfn == 0 )
-        {
-            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-            goto error_out;
-        }
-        page_array[ppt_alloc] = nmfn;
-    }
 
     alloc_pt(l3tab, vl3tab, pl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c    Fri May 26 13:41:49 2006 -0600
@@ -331,25 +331,17 @@ int xc_linux_restore(int xc_handle, int 
                 ** A page table page - need to 'uncanonicalize' it, i.e.
                 ** replace all the references to pfns with the corresponding
                 ** mfns for the new domain.
-                **
-                ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
-                ** so we may need to update the p2m after the main loop.
-                ** Hence we defer canonicalization of L1s until then.
                 */
-                if(pt_levels != 3 || pagetype != L1TAB) {
-
-                    if(!uncanonicalize_pagetable(pagetype, page)) {
-                        /*
-                        ** Failing to uncanonicalize a page table can be ok
-                        ** under live migration since the pages type may have
-                        ** changed by now (and we'll get an update later).
-                        */
-                        DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
-                                pagetype >> 28, pfn, mfn);
-                        nraces++;
-                        continue;
-                    }
-
+                if(!uncanonicalize_pagetable(pagetype, page)) {
+                    /*
+                    ** Failing to uncanonicalize a page table can be ok
+                    ** under live migration since the pages type may have
+                    ** changed by now (and we'll get an update later).
+                    */
+                    DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
+                            pagetype >> 28, pfn, mfn);
+                    nraces++;
+                    continue;
                 }
 
             } else if(pagetype != NOTAB) {
@@ -397,100 +389,6 @@ int xc_linux_restore(int xc_handle, int 
     }
 
     DPRINTF("Received all pages (%d races)\n", nraces);
-
-    if(pt_levels == 3) {
-
-        /*
-        ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
-        ** is a little awkward and involves (a) finding all such PGDs and
-        ** replacing them with 'lowmem' versions; (b) upating the p2m[]
-        ** with the new info; and (c) canonicalizing all the L1s using the
-        ** (potentially updated) p2m[].
-        **
-        ** This is relatively slow (and currently involves two passes through
-        ** the pfn_type[] array), but at least seems to be correct. May wish
-        ** to consider more complex approaches to optimize this later.
-        */
-
-        int j, k;
-
-        /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
-        for (i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
-
-                unsigned long new_mfn;
-                uint64_t l3ptes[4];
-                uint64_t *l3tab;
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3ptes[j] = l3tab[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-                if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
-                    ERR("Couldn't get a page below 4GB :-(");
-                    goto out;
-                }
-
-                p2m[i] = new_mfn;
-                if (xc_add_mmu_update(xc_handle, mmu,
-                                      (((unsigned long long)new_mfn)
-                                       << PAGE_SHIFT) |
-                                      MMU_MACHPHYS_UPDATE, i)) {
-                    ERR("Couldn't m2p on PAE root pgdir");
-                    goto out;
-                }
-
-                l3tab = (uint64_t *)
-                    xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                         PROT_READ | PROT_WRITE, p2m[i]);
-
-                for(j = 0; j < 4; j++)
-                    l3tab[j] = l3ptes[j];
-
-                munmap(l3tab, PAGE_SIZE);
-
-            }
-        }
-
-        /* Second pass: find all L1TABs and uncanonicalize them */
-        j = 0;
-
-        for(i = 0; i < max_pfn; i++) {
-
-            if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
-                region_mfn[j] = p2m[i];
-                j++;
-            }
-
-            if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
-
-                if (!(region_base = xc_map_foreign_batch(
-                          xc_handle, dom, PROT_READ | PROT_WRITE,
-                          region_mfn, j))) {
-                    ERR("map batch failed");
-                    goto out;
-                }
-
-                for(k = 0; k < j; k++) {
-                    if(!uncanonicalize_pagetable(L1TAB,
-                                                 region_base + k*PAGE_SIZE)) {
-                        ERR("failed uncanonicalize pt!");
-                        goto out;
-                    }
-                }
-
-                munmap(region_base, j*PAGE_SIZE);
-                j = 0;
-            }
-        }
-
-    }
 
 
     if (xc_finish_mmu_updates(xc_handle, mmu)) {
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_private.c  Fri May 26 13:41:49 2006 -0600
@@ -430,28 +430,6 @@ int xc_version(int xc_handle, int cmd, v
     return rc;
 }
 
-unsigned long xc_make_page_below_4G(
-    int xc_handle, uint32_t domid, unsigned long mfn)
-{
-    unsigned long new_mfn;
-
-    if ( xc_domain_memory_decrease_reservation(
-        xc_handle, domid, 1, 0, &mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    if ( xc_domain_memory_increase_reservation(
-        xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
-    {
-        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
-        return 0;
-    }
-
-    return new_mfn;
-}
-
 /*
  * Local variables:
  * mode: C
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace.c   Fri May 26 13:41:49 2006 -0600
@@ -185,7 +185,7 @@ map_domain_va_32(
     void *guest_va,
     int perm)
 {
-    unsigned long l1p, p, va = (unsigned long)guest_va;
+    unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
     uint32_t *l2, *l1;
     static void *v[MAX_VIRT_CPUS];
 
@@ -194,18 +194,20 @@ map_domain_va_32(
     if ( l2 == NULL )
         return NULL;
 
-    l1p = to_ma(cpu, l2[l2_table_offset(va)]);
+    l2e = l2[l2_table_offset_i386(va)];
     munmap(l2, PAGE_SIZE);
-    if ( !(l1p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
     l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l1p >> PAGE_SHIFT);
     if ( l1 == NULL )
         return NULL;
 
-    p = to_ma(cpu, l1[l1_table_offset(va)]);
+    l1e = l1[l1_table_offset_i386(va)];
     munmap(l1, PAGE_SIZE);
-    if ( !(p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
     if ( v[cpu] != NULL )
         munmap(v[cpu], PAGE_SIZE);
     v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
@@ -223,7 +225,7 @@ map_domain_va_pae(
     void *guest_va,
     int perm)
 {
-    unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+    unsigned long l3e, l2e, l1e, l2p, l1p, p, va = (unsigned long)guest_va;
     uint64_t *l3, *l2, *l1;
     static void *v[MAX_VIRT_CPUS];
 
@@ -232,26 +234,29 @@ map_domain_va_pae(
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset_pae(va)]);
+    l3e = l3[l3_table_offset_pae(va)];
     munmap(l3, PAGE_SIZE);
-    if ( !(l2p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
     if ( l2 == NULL )
         return NULL;
 
-    l1p = to_ma(cpu, l2[l2_table_offset_pae(va)]);
+    l2e = l2[l2_table_offset_pae(va)];
     munmap(l2, PAGE_SIZE);
-    if ( !(l1p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
     l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, l1p 
>> PAGE_SHIFT);
     if ( l1 == NULL )
         return NULL;
 
-    p = to_ma(cpu, l1[l1_table_offset_pae(va)]);
+    l1e = l1[l1_table_offset_pae(va)];
     munmap(l1, PAGE_SIZE);
-    if ( !(p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l1e & _PAGE_PRESENT) )
+        return NULL;
+    p = to_ma(cpu, l1e);
     if ( v[cpu] != NULL )
         munmap(v[cpu], PAGE_SIZE);
     v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p 
>> PAGE_SHIFT);
@@ -269,9 +274,10 @@ map_domain_va_64(
     void *guest_va,
     int perm)
 {
-    unsigned long l3p, l2p, l1p, l1e, p, va = (unsigned long)guest_va;
+    unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned 
long)guest_va;
     uint64_t *l4, *l3, *l2, *l1;
     static void *v[MAX_VIRT_CPUS];
+
 
     if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
         return map_domain_va_32(xc_handle, cpu, guest_va, perm);
@@ -281,40 +287,41 @@ map_domain_va_64(
     if ( l4 == NULL )
         return NULL;
 
-    l3p = to_ma(cpu, l4[l4_table_offset(va)]);
+    l4e = l4[l4_table_offset(va)];
     munmap(l4, PAGE_SIZE);
-    if ( !(l3p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l4e & _PAGE_PRESENT) )
+        return NULL;
+    l3p = to_ma(cpu, l4e);
     l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l3p >> PAGE_SHIFT);
     if ( l3 == NULL )
         return NULL;
 
-    l2p = to_ma(cpu, l3[l3_table_offset(va)]);
+    l3e = l3[l3_table_offset(va)];
     munmap(l3, PAGE_SIZE);
-    if ( !(l2p & _PAGE_PRESENT) )
-        return NULL;
+    if ( !(l3e & _PAGE_PRESENT) )
+        return NULL;
+    l2p = to_ma(cpu, l3e);
     l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, 
l2p >> PAGE_SHIFT);
     if ( l2 == NULL )
         return NULL;
 
     l1 = NULL;
-    l1e = to_ma(cpu, l2[l2_table_offset(va)]);
-    if ( !(l1e & _PAGE_PRESENT) )
-    {
-        munmap(l2, PAGE_SIZE);
-        return NULL;
-    }
-    l1p = l1e >> PAGE_SHIFT;
-    if (l1e & 0x80)  { /* 2M pages */
+    l2e = l2[l2_table_offset(va)];
+    munmap(l2, PAGE_SIZE);
+    if ( !(l2e & _PAGE_PRESENT) )
+        return NULL;
+    l1p = to_ma(cpu, l2e);
+    if (l2e & 0x80)  { /* 2M pages */
         p = to_ma(cpu, (l1p + l1_table_offset(va)) << PAGE_SHIFT);
     } else { /* 4K pages */
-        l1p = to_ma(cpu, l1p);
         l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, 
l1p >> PAGE_SHIFT);
-        munmap(l2, PAGE_SIZE);
         if ( l1 == NULL )
             return NULL;
 
-        p = to_ma(cpu, l1[l1_table_offset(va)]);
+        l1e = l1[l1_table_offset(va)];
+        if ( !(l1e & _PAGE_PRESENT) )
+            return NULL;
+        p = to_ma(cpu, l1e);
     }
     if ( v[cpu] != NULL )
         munmap(v[cpu], PAGE_SIZE);
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace.h
--- a/tools/libxc/xc_ptrace.h   Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace.h   Fri May 26 13:41:49 2006 -0600
@@ -7,7 +7,6 @@
 #define X86_CR0_PE              0x00000001 /* Enable Protected Mode    (RW) */
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
 #define BSD_PAGE_MASK (PAGE_SIZE-1)
-#define PDRSHIFT        22
 #define PSL_T  0x00000100 /* trace enable bit */
 
 #ifdef __x86_64__
@@ -162,8 +161,6 @@ struct gdb_regs {
 #endif
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define vtopdi(va) ((va) >> PDRSHIFT)
-#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
 #endif
 
 typedef void (*thr_ev_handler_t)(long);
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_ptrace_core.c      Fri May 26 13:41:49 2006 -0600
@@ -3,6 +3,7 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include "xc_private.h"
+#include "xg_private.h"
 #include "xc_ptrace.h"
 #include <time.h>
 
@@ -54,7 +55,7 @@ map_domain_va_core(unsigned long domfd, 
         }
         cr3_virt[cpu] = v;
     }
-    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
+    if ((pde = cr3_virt[cpu][l2_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -70,7 +71,7 @@ map_domain_va_core(unsigned long domfd, 
             return NULL;
         pde_virt[cpu] = v;
     }
-    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
+    if ((page = pde_virt[cpu][l1_table_offset_i386(va)]) == 0) /* logical 
address */
         return NULL;
     if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
@@ -84,7 +85,7 @@ map_domain_va_core(unsigned long domfd, 
             map_mtop_offset(page_phys[cpu]));
         if (v == MAP_FAILED)
         {
-            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
+            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
l1_table_offset_i386(va));
             page_phys[cpu] = 0;
             return NULL;
         }
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_tbuf.c
--- a/tools/libxc/xc_tbuf.c     Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xc_tbuf.c     Fri May 26 13:41:49 2006 -0600
@@ -18,53 +18,57 @@
 
 static int tbuf_enable(int xc_handle, int enable)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  if (enable)
-    op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
-  else
-    op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    if (enable)
+        op.u.tbufcontrol.op  = DOM0_TBUF_ENABLE;
+    else
+        op.u.tbufcontrol.op  = DOM0_TBUF_DISABLE;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_set_size(int xc_handle, unsigned long size)
 {
-  DECLARE_DOM0_OP;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
-  op.u.tbufcontrol.size = size;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_SET_SIZE;
+    op.u.tbufcontrol.size = size;
 
-  return xc_dom0_op(xc_handle, &op);
+    return xc_dom0_op(xc_handle, &op);
 }
 
 int xc_tbuf_get_size(int xc_handle, unsigned long *size)
 {
-  int rc;
-  DECLARE_DOM0_OP;
+    int rc;
+    DECLARE_DOM0_OP;
 
-  op.cmd = DOM0_TBUFCONTROL;
-  op.interface_version = DOM0_INTERFACE_VERSION;
-  op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
 
-  rc = xc_dom0_op(xc_handle, &op);
-  if (rc == 0)
-    *size = op.u.tbufcontrol.size;
-  return rc;
+    rc = xc_dom0_op(xc_handle, &op);
+    if (rc == 0)
+        *size = op.u.tbufcontrol.size;
+    return rc;
 }
 
 int xc_tbuf_enable(int xc_handle, size_t cnt, unsigned long *mfn,
-    unsigned long *size)
+                   unsigned long *size)
 {
     DECLARE_DOM0_OP;
     int rc;
 
-    if ( xc_tbuf_set_size(xc_handle, cnt) != 0 )
-        return -1;
+    /*
+     * Ignore errors (at least for now) as we get an error if size is already
+     * set (since trace buffers cannot be reallocated). If we really have no
+     * buffers at all then tbuf_enable() will fail, so this is safe.
+     */
+    (void)xc_tbuf_set_size(xc_handle, cnt);
 
     if ( tbuf_enable(xc_handle, 1) != 0 )
         return -1;
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xenctrl.h     Fri May 26 13:41:49 2006 -0600
@@ -354,6 +354,14 @@ int xc_sedf_domain_get(int xc_handle,
                        uint64_t *latency, uint16_t *extratime,
                        uint16_t *weight);
 
+int xc_csched_domain_set(int xc_handle,
+                         uint32_t domid,
+                         struct csched_domain *sdom);
+
+int xc_csched_domain_get(int xc_handle,
+                         uint32_t domid,
+                         struct csched_domain *sdom);
+
 typedef evtchn_status_t xc_evtchn_status_t;
 
 /*
@@ -444,9 +452,6 @@ int xc_domain_iomem_permission(int xc_ha
                                unsigned long first_mfn,
                                unsigned long nr_mfns,
                                uint8_t allow_access);
-
-unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid,
-                                    unsigned long mfn);
 
 typedef dom0_perfc_desc_t xc_perfc_desc_t;
 /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h  Thu May 25 15:59:18 2006 -0600
+++ b/tools/libxc/xg_private.h  Fri May 26 13:41:49 2006 -0600
@@ -48,6 +48,8 @@ unsigned long csum_page (void * page);
 #define L2_PAGETABLE_SHIFT_PAE   21
 #define L3_PAGETABLE_SHIFT_PAE   30
 
+#define L2_PAGETABLE_SHIFT_I386  22
+
 #if defined(__i386__)
 #define L1_PAGETABLE_SHIFT       12
 #define L2_PAGETABLE_SHIFT       22
@@ -61,6 +63,9 @@ unsigned long csum_page (void * page);
 #define L1_PAGETABLE_ENTRIES_PAE  512
 #define L2_PAGETABLE_ENTRIES_PAE  512
 #define L3_PAGETABLE_ENTRIES_PAE    4
+
+#define L1_PAGETABLE_ENTRIES_I386 1024
+#define L2_PAGETABLE_ENTRIES_I386 1024
 
 #if defined(__i386__)
 #define L1_PAGETABLE_ENTRIES   1024
@@ -95,6 +100,11 @@ typedef unsigned long l4_pgentry_t;
 #define l3_table_offset_pae(_a) \
   (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1))
 
+#define l1_table_offset_i386(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES_I386 - 1))
+#define l2_table_offset_i386(_a) \
+  (((_a) >> L2_PAGETABLE_SHIFT_I386) & (L2_PAGETABLE_ENTRIES_I386 - 1))
+
 #if defined(__i386__)
 #define l1_table_offset(_a) \
           (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/lowlevel/xc/xc.c Fri May 26 13:41:49 2006 -0600
@@ -716,6 +716,49 @@ static PyObject *pyxc_sedf_domain_get(Xc
                          "weight",    weight);
 }
 
+static PyObject *pyxc_csched_domain_set(XcObject *self,
+                                        PyObject *args,
+                                        PyObject *kwds)
+{
+    uint32_t domid;
+    uint16_t weight;
+    uint16_t cap;
+    static char *kwd_list[] = { "dom", "weight", "cap", NULL };
+    static char kwd_type[] = "I|HH";
+    struct csched_domain sdom;
+    
+    weight = 0;
+    cap = (uint16_t)~0U;
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, 
+                                     &domid, &weight, &cap) )
+        return NULL;
+
+    sdom.weight = weight;
+    sdom.cap = cap;
+
+    if ( xc_csched_domain_set(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_csched_domain_get(XcObject *self, PyObject *args)
+{
+    uint32_t domid;
+    struct csched_domain sdom;
+    
+    if( !PyArg_ParseTuple(args, "I", &domid) )
+        return NULL;
+    
+    if ( xc_csched_domain_get(self->xc_handle, domid, &sdom) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    return Py_BuildValue("{s:H,s:H}",
+                         "weight",  sdom.weight,
+                         "cap",     sdom.cap);
+}
+
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
 {
     uint32_t dom;
@@ -1040,6 +1083,24 @@ static PyMethodDef pyxc_methods[] = {
       " slice     [long]: CPU reservation per period\n"
       " latency   [long]: domain's wakeup latency hint\n"
       " extratime [int]:  domain aware of extratime?\n"},
+    
+    { "csched_domain_set",
+      (PyCFunction)pyxc_csched_domain_set,
+      METH_KEYWORDS, "\n"
+      "Set the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to set\n"
+      " weight    [short]: domain's scheduling weight\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "csched_domain_get",
+      (PyCFunction)pyxc_csched_domain_get,
+      METH_VARARGS, "\n"
+      "Get the scheduling parameters for a domain when running with the\n"
+      "SMP credit scheduler.\n"
+      " domid     [int]:   domain id to get\n"
+      "Returns:   [dict]\n"
+      " weight    [short]: domain's scheduling weight\n"},
 
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/lowlevel/xs/xs.c Fri May 26 13:41:49 2006 -0600
@@ -272,11 +272,12 @@ static PyObject *xspy_get_permissions(Xs
 
     if (perms) {
         PyObject *val = PyList_New(perms_n);
-        for (i = 0; i < perms_n; i++, perms++) {
-            PyObject *p = Py_BuildValue("{s:i,s:i,s:i}",
-                                        "dom",  perms->id,
-                                        "read", perms->perms & XS_PERM_READ,
-                                        "write",perms->perms & XS_PERM_WRITE);
+        for (i = 0; i < perms_n; i++) {
+            PyObject *p =
+                Py_BuildValue("{s:i,s:i,s:i}",
+                              "dom",   perms[i].id,
+                              "read",  perms[i].perms & XS_PERM_READ,
+                              "write", perms[i].perms & XS_PERM_WRITE);
             PyList_SetItem(val, i, p);
         }
 
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/XendDomain.py       Fri May 26 13:41:49 2006 -0600
@@ -522,6 +522,28 @@ class XendDomain:
         except Exception, ex:
             raise XendError(str(ex))
 
+    def domain_csched_get(self, domid):
+        """Get credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.csched_domain_get(dominfo.getDomid())
+        except Exception, ex:
+            raise XendError(str(ex))
+    
+    def domain_csched_set(self, domid, weight, cap):
+        """Set credit scheduler parameters for a domain.
+        """
+        dominfo = self.domain_lookup_by_name_or_id_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            return xc.csched_domain_set(dominfo.getDomid(), weight, cap)
+        except Exception, ex:
+            raise XendError(str(ex))
+
     def domain_maxmem_set(self, domid, mem):
         """Set the memory limit for a domain.
 
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Fri May 26 13:41:49 2006 -0600
@@ -701,6 +701,16 @@ class XendDomainInfo:
         log.debug("Storing VM details: %s", to_store)
 
         self.writeVm(to_store)
+        self.setVmPermissions()
+
+
+    def setVmPermissions(self):
+        """Allow the guest domain to read its UUID.  We don't allow it to
+        access any other entry, for security."""
+        xstransact.SetPermissions('%s/uuid' % self.vmpath,
+                                  { 'dom' : self.domid,
+                                    'read' : True,
+                                    'write' : False })
 
 
     def storeDomDetails(self):
@@ -1535,6 +1545,10 @@ class XendDomainInfo:
 
         self.configure_bootloader()
         config = self.sxpr()
+
+        if self.infoIsSet('cpus') and len(self.info['cpus']) != 0:
+            config.append(['cpus', reduce(lambda x, y: str(x) + "," + str(y),
+                                          self.info['cpus'])])
 
         if self.readVm(RESTART_IN_PROGRESS):
             log.error('Xend failed during restart of domain %d.  '
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/balloon.py  Fri May 26 13:41:49 2006 -0600
@@ -32,6 +32,7 @@ BALLOON_OUT_SLACK = 1 # MiB.  We need th
 BALLOON_OUT_SLACK = 1 # MiB.  We need this because the physinfo details are
                       # rounded.
 RETRY_LIMIT = 10
+RETRY_LIMIT_INCR = 5
 ##
 # The time to sleep between retries grows linearly, using this value (in
 # seconds).  When the system is lightly loaded, memory should be scrubbed and
@@ -118,7 +119,8 @@ def free(required):
         retries = 0
         sleep_time = SLEEP_TIME_GROWTH
         last_new_alloc = None
-        while retries < RETRY_LIMIT:
+        rlimit = RETRY_LIMIT
+        while retries < rlimit:
             free_mem = xc.physinfo()['free_memory']
 
             if free_mem >= need_mem:
@@ -127,7 +129,9 @@ def free(required):
                 return
 
             if retries == 0:
-                log.debug("Balloon: free %d; need %d.", free_mem, need_mem)
+                rlimit += ((need_mem - free_mem)/1024) * RETRY_LIMIT_INCR
+                log.debug("Balloon: free %d; need %d; retries: %d.", 
+                          free_mem, need_mem, rlimit)
 
             if dom0_min_mem > 0:
                 dom0_alloc = get_dom0_current_alloc()
@@ -143,8 +147,9 @@ def free(required):
                     # Continue to retry, waiting for ballooning.
 
             time.sleep(sleep_time)
+            if retries < 2 * RETRY_LIMIT:
+                sleep_time += SLEEP_TIME_GROWTH
             retries += 1
-            sleep_time += SLEEP_TIME_GROWTH
 
         # Not enough memory; diagnose the problem.
         if dom0_min_mem == 0:
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xend/server/SrvDomain.py Fri May 26 13:41:49 2006 -0600
@@ -129,6 +129,20 @@ class SrvDomain(SrvDir):
                     ['latency', 'int'],
                     ['extratime', 'int'],
                     ['weight', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+    
+    def op_domain_csched_get(self, _, req):
+        fn = FormFn(self.xd.domain_csched_get,
+                    [['dom', 'int']])
+        val = fn(req.args, {'dom': self.dom.domid})
+        return val
+
+
+    def op_domain_csched_set(self, _, req):
+        fn = FormFn(self.xd.domain_csched_set,
+                    [['dom', 'int'],
+                     ['weight', 'int']])
         val = fn(req.args, {'dom': self.dom.domid})
         return val
 
diff -r 9d52a66c7499 -r c073ebdbde8c 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Thu May 25 15:59:18 
2006 -0600
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Fri May 26 13:41:49 
2006 -0600
@@ -221,6 +221,34 @@ class xstransact:
                 xshandle().mkdir(self.transaction, self.prependPath(key))
 
 
+    def get_permissions(self, *args):
+        """If no arguments are given, return the permissions at this
+        transaction's path.  If one argument is given, treat that argument as
+        a subpath to this transaction's path, and return the permissions at
+        that path.  Otherwise, treat each argument as a subpath to this
+        transaction's path, and return a list composed of the permissions at
+        each of those instead.
+        """
+        if len(args) == 0:
+            return xshandle().get_permissions(self.transaction, self.path)
+        if len(args) == 1:
+            return self._get_permissions(args[0])
+        ret = []
+        for key in args:
+            ret.append(self._get_permissions(key))
+        return ret
+
+
+    def _get_permissions(self, key):
+        path = self.prependPath(key)
+        try:
+            return xshandle().get_permissions(self.transaction, path)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               '%s, while getting permissions from %s' %
+                               (ex.args[1], path))
+
+
     def set_permissions(self, *args):
         if len(args) == 0:
             raise TypeError
diff -r 9d52a66c7499 -r c073ebdbde8c tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu May 25 15:59:18 2006 -0600
+++ b/tools/python/xen/xm/main.py       Fri May 26 13:41:49 2006 -0600
@@ -99,6 +99,7 @@ sched_sedf_help = "sched-sedf [DOM] [OPT
                                     specifies another way of setting a 
domain's\n\
                                     cpu period/slice."
 
+csched_help = "csched                           Set or get credit scheduler 
parameters"
 block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode>
                 [BackDomId]         Create a new virtual block device"""
 block_detach_help = """block-detach  <DomId> <DevId>    Destroy a domain's 
virtual block device,
@@ -174,6 +175,7 @@ host_commands = [
     ]
 
 scheduler_commands = [
+    "csched",
     "sched-bvt",
     "sched-bvt-ctxallow",
     "sched-sedf",
@@ -735,6 +737,48 @@ def xm_sched_sedf(args):
         else:
             print_sedf(sedf_info)
 
+def xm_csched(args):
+    usage_msg = """Csched:     Set or get credit scheduler parameters
+ Usage:
+
+        csched -d domain [-w weight] [-c cap]
+    """
+    try:
+        opts, args = getopt.getopt(args[0:], "d:w:c:",
+            ["domain=", "weight=", "cap="])
+    except getopt.GetoptError:
+        # print help information and exit:
+        print usage_msg
+        sys.exit(1)
+
+    domain = None
+    weight = None
+    cap = None
+
+    for o, a in opts:
+        if o == "-d":
+            domain = a
+        elif o == "-w":
+            weight = int(a)
+        elif o == "-c":
+            cap = int(a);
+
+    if domain is None:
+        # place holder for system-wide scheduler parameters
+        print usage_msg
+        sys.exit(1)
+
+    if weight is None and cap is None:
+        print server.xend.domain.csched_get(domain)
+    else:
+        if weight is None:
+            weight = int(0)
+        if cap is None:
+            cap = int(~0)
+
+        err = server.xend.domain.csched_set(domain, weight, cap)
+        if err != 0:
+            print err
 
 def xm_info(args):
     arg_check(args, "info", 0)
@@ -1032,6 +1076,7 @@ commands = {
     "sched-bvt": xm_sched_bvt,
     "sched-bvt-ctxallow": xm_sched_bvt_ctxallow,
     "sched-sedf": xm_sched_sedf,
+    "csched": xm_csched,
     # block
     "block-attach": xm_block_attach,
     "block-detach": xm_block_detach,
diff -r 9d52a66c7499 -r c073ebdbde8c tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Thu May 25 15:59:18 2006 -0600
+++ b/tools/tests/test_x86_emulator.c   Fri May 26 13:41:49 2006 -0600
@@ -17,7 +17,8 @@ static int read_any(
 static int read_any(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -32,7 +33,8 @@ static int write_any(
 static int write_any(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -48,7 +50,8 @@ static int cmpxchg_any(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     switch ( bytes )
     {
@@ -65,34 +68,38 @@ static int cmpxchg8b_any(
     unsigned long old_lo,
     unsigned long old_hi,
     unsigned long new_lo,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     ((unsigned long *)addr)[0] = new_lo;
     ((unsigned long *)addr)[1] = new_hi;
     return X86EMUL_CONTINUE;
 }
 
-static struct x86_mem_emulator emulops = {
+static struct x86_emulate_ops emulops = {
     read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
 };
 
 int main(int argc, char **argv)
 {
+    struct x86_emulate_ctxt ctxt;
     struct cpu_user_regs regs;
     char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
     unsigned int res = 0x7FFFFFFF;
     u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 };
-    unsigned long cr2;
     int rc;
+
+    ctxt.regs = &regs;
+    ctxt.mode = X86EMUL_MODE_PROT32;
 
     printf("%-40s", "Testing addl %%ecx,(%%eax)...");
     instr[0] = 0x01; instr[1] = 0x08;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x7FFFFFFF;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.eflags != 0xa94) ||
@@ -109,8 +116,8 @@ int main(int argc, char **argv)
 #else
     regs.ecx    = 0x12345678UL;
 #endif
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x92345677) || 
          (regs.ecx != 0x8000000FUL) ||
@@ -124,8 +131,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          (regs.eflags != 0x244) ||
@@ -140,8 +147,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x923456AA) || 
          ((regs.eflags&0x240) != 0x200) ||
@@ -156,8 +163,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x12345678) || 
          (regs.eflags != 0x200) ||
@@ -173,8 +180,8 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
-    cr2         = (unsigned long)&res;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = (unsigned long)&res;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0xDDEEFF00) || 
          (regs.eflags != 0x244) ||
@@ -192,8 +199,8 @@ int main(int argc, char **argv)
     regs.esi    = (unsigned long)&res + 0;
     regs.edi    = (unsigned long)&res + 2;
     regs.error_code = 0; /* read fault */
-    cr2         = regs.esi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.esi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x44554455) ||
          (regs.eflags != 0x200) ||
@@ -210,8 +217,8 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)&res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);    
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
@@ -228,8 +235,8 @@ int main(int argc, char **argv)
     regs.ecx    = 0xCCCCFFFF;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -242,8 +249,8 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)cmpxchg8b_res;
-    cr2         = regs.edi;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    ctxt.cr2    = regs.edi;
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (cmpxchg8b_res[0] != 0x9999AAAA) ||
          (cmpxchg8b_res[1] != 0xCCCCFFFF) ||
@@ -258,9 +265,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x82) ||
          (regs.ecx != 0xFFFFFF82) ||
@@ -273,9 +280,9 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    cr2         = (unsigned long)&res;
+    ctxt.cr2    = (unsigned long)&res;
     res         = 0x1234aa82;
-    rc = x86_emulate_memop(&regs, cr2, &emulops, X86EMUL_MODE_PROT32);
+    rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) ||
          (res != 0x1234aa82) ||
          (regs.ecx != 0xaa82) ||
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/Makefile   Fri May 26 13:41:49 2006 -0600
@@ -27,6 +27,12 @@ CLIENTS += xenstore-write
 CLIENTS += xenstore-write
 CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS))
 
+XENSTORED_OBJS = xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+
+XENSTORED_Linux = xenstored_linux.o
+
+XENSTORED_OBJS += $(XENSTORED_$(OS))
+
 .PHONY: all
 all: libxenstore.so xenstored $(CLIENTS) xs_tdb_dump xenstore-control 
xenstore-ls
 
@@ -36,7 +42,7 @@ test_interleaved_transactions: test_inte
 .PHONY: testcode
 testcode: xs_test xenstored_test xs_random
 
-xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o 
xenstored_transaction.o xs_lib.o talloc.o utils.o tdb.o hashtable.o
+xenstored: $(XENSTORED_OBJS)
        $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@
 
 $(CLIENTS): xenstore-%: xenstore_%.o libxenstore.so
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_core.c   Fri May 26 13:41:49 2006 -0600
@@ -451,6 +451,11 @@ static struct node *read_node(struct con
 
 static bool write_node(struct connection *conn, const struct node *node)
 {
+       /*
+        * conn will be null when this is called from manual_node.
+        * tdb_context copes with this.
+        */
+
        TDB_DATA key, data;
        void *p;
 
@@ -478,7 +483,7 @@ static bool write_node(struct connection
 
        /* TDB should set errno, but doesn't even set ecode AFAICT. */
        if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
-               corrupt(conn, "Write of %s = %s failed", key, data);
+               corrupt(conn, "Write of %s failed", key.dptr);
                goto error;
        }
        return true;
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_core.h   Fri May 26 13:41:49 2006 -0600
@@ -19,6 +19,8 @@
 
 #ifndef _XENSTORED_CORE_H
 #define _XENSTORED_CORE_H
+
+#include <xenctrl.h>
 
 #include <sys/types.h>
 #include <dirent.h>
@@ -163,6 +165,12 @@ void trace(const char *fmt, ...);
 
 extern int event_fd;
 
+/* Map the kernel's xenstore page. */
+void *xenbus_map(void);
+
+/* Return the event channel used by xenbus. */
+evtchn_port_t xenbus_evtchn(void);
+
 #endif /* _XENSTORED_CORE_H */
 
 /*
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu May 25 15:59:18 2006 -0600
+++ b/tools/xenstore/xenstored_domain.c Fri May 26 13:41:49 2006 -0600
@@ -33,12 +33,11 @@
 #include "talloc.h"
 #include "xenstored_core.h"
 #include "xenstored_domain.h"
-#include "xenstored_proc.h"
 #include "xenstored_watch.h"
 #include "xenstored_test.h"
 
 #include <xenctrl.h>
-#include <xen/linux/evtchn.h>
+#include <xen/sys/evtchn.h>
 
 static int *xc_handle;
 static evtchn_port_t virq_port;
@@ -476,44 +475,24 @@ void restore_existing_connections(void)
 
 static int dom0_init(void) 
 { 
-       int rc, fd;
-       evtchn_port_t port; 
-       char str[20]; 
-       struct domain *dom0; 
-
-       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
-       if (fd == -1)
+       evtchn_port_t port;
+       struct domain *dom0;
+
+       port = xenbus_evtchn();
+       if (port == -1)
                return -1;
 
-       rc = read(fd, str, sizeof(str)); 
-       if (rc == -1)
-               goto outfd;
-       str[rc] = '\0'; 
-       port = strtoul(str, NULL, 0); 
-
-       close(fd); 
-
        dom0 = new_domain(NULL, 0, port); 
 
-       fd = open(XENSTORED_PROC_KVA, O_RDWR);
-       if (fd == -1)
+       dom0->interface = xenbus_map();
+       if (dom0->interface == NULL)
                return -1;
 
-       dom0->interface = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
-                              MAP_SHARED, fd, 0);
-       if (dom0->interface == MAP_FAILED)
-               goto outfd;
-
-       close(fd);
-
        talloc_steal(dom0->conn, dom0); 
 
        evtchn_notify(dom0->port); 
 
        return 0; 
-outfd:
-       close(fd);
-       return -1;
 }
 
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/domain_build.c       Fri May 26 13:41:49 2006 -0600
@@ -367,7 +367,10 @@ int construct_dom0(struct domain *d,
     if ( (1UL << order) > nr_pages )
         panic("Domain 0 allocation is too small for kernel image.\n");
 
-    /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */
+    /*
+     * Allocate from DMA pool: on i386 this ensures that our low-memory 1:1
+     * mapping covers the allocation.
+     */
     if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL )
         panic("Not enough RAM for domain 0 allocation.\n");
     alloc_spfn = page_to_mfn(page);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Fri May 26 13:41:49 2006 -0600
@@ -185,8 +185,9 @@ void hvm_setup_platform(struct domain* d
 void hvm_setup_platform(struct domain* d)
 {
     struct hvm_domain *platform;
-
-    if ( !hvm_guest(current) || (current->vcpu_id != 0) )
+    struct vcpu *v=current;
+
+    if ( !hvm_guest(v) || (v->vcpu_id != 0) )
         return;
 
     if ( shadow_direct_map_init(d) == 0 )
@@ -208,7 +209,8 @@ void hvm_setup_platform(struct domain* d
         hvm_vioapic_init(d);
     }
 
-    pit_init(&platform->vpit, current);
+    init_timer(&platform->pl_time.periodic_tm.timer, pt_timer_fn, v, 
v->processor);
+    pit_init(v, cpu_khz);
 }
 
 void pic_irq_request(void *data, int level)
@@ -238,6 +240,14 @@ void hvm_pic_assist(struct vcpu *v)
         } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
         do_pic_irqs(pic, irqs);
     }
+}
+
+u64 hvm_get_guest_time(struct vcpu *v)
+{
+    u64    host_tsc;
+    
+    rdtscll(host_tsc);
+    return host_tsc + v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 int cpu_get_interrupt(struct vcpu *v, int *type)
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c  Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/i8254.c  Fri May 26 13:41:49 2006 -0600
@@ -22,11 +22,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-/* Edwin Zhai <edwin.zhai@xxxxxxxxx>
+/* Edwin Zhai <edwin.zhai@xxxxxxxxx>, Eddie Dong <eddie.dong@xxxxxxxxx>
  * Ported to xen:
- * use actimer for intr generation;
+ * Add a new layer of periodic time on top of PIT;
  * move speaker io access to hypervisor;
- * use new method for counter/intrs calculation
  */
 
 #include <xen/config.h>
@@ -42,184 +41,117 @@
 #include <asm/hvm/vpit.h>
 #include <asm/current.h>
 
-/*#define DEBUG_PIT*/
+/* Enable DEBUG_PIT may cause guest calibration inaccuracy */
+/* #define DEBUG_PIT */
 
 #define RW_STATE_LSB 1
 #define RW_STATE_MSB 2
 #define RW_STATE_WORD0 3
 #define RW_STATE_WORD1 4
 
-#ifndef NSEC_PER_SEC
-#define NSEC_PER_SEC (1000000000ULL)
-#endif
-
-#ifndef TIMER_SLOP 
-#define TIMER_SLOP (50*1000) /* ns */
-#endif
-
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time);
-
-s_time_t hvm_get_clock(void)
-{
-    /* TODO: add pause/unpause support */
-    return NOW();
+#define ticks_per_sec(v)      (v->domain->arch.hvm_domain.tsc_frequency)
+static int handle_pit_io(ioreq_t *p);
+static int handle_speaker_io(ioreq_t *p);
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+    union {
+        uint64_t ll;
+        struct {
+#ifdef WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif            
+        } l;
+    } u, res;
+    uint64_t rl, rh;
+
+    u.ll = a;
+    rl = (uint64_t)u.l.low * (uint64_t)b;
+    rh = (uint64_t)u.l.high * (uint64_t)b;
+    rh += (rl >> 32);
+    res.l.high = rh / c;
+    res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+    return res.ll;
+}
+
+/*
+ * get processor time.
+ * unit: TSC
+ */
+int64_t hvm_get_clock(struct vcpu *v)
+{
+    uint64_t  gtsc;
+    gtsc = hvm_get_guest_time(v);
+    return gtsc;
 }
 
 static int pit_get_count(PITChannelState *s)
 {
-    u64 d;
-    u64 counter;
-
-    d = hvm_get_clock() - s->count_load_time;
+    uint64_t d;
+    int  counter;
+
+    d = muldiv64(hvm_get_clock(s->vcpu) - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     case 0:
     case 1:
     case 4:
     case 5:
-        counter = (s->period - d) & 0xffff;
+        counter = (s->count - d) & 0xffff;
         break;
     case 3:
         /* XXX: may be incorrect for odd counts */
-        counter = s->period - ((2 * d) % s->period);
+        counter = s->count - ((2 * d) % s->count);
         break;
     default:
-        /* mod 2 counter handle */
-        d = hvm_get_clock() - s->hvm_time->count_point;
-        d += s->hvm_time->count_advance;
-        counter = s->period - (d % s->period);
-        break;
-    }
-    /* change from ns to pit counter */
-    counter = DIV_ROUND( (counter * PIT_FREQ), NSEC_PER_SEC);
+        counter = s->count - (d % s->count);
+        break;
+    }
     return counter;
 }
 
 /* get pit output bit */
-static int pit_get_out1(PITChannelState *s, s64 current_time)
-{
-    u64 d;
+static int pit_get_out1(PITChannelState *s, int64_t current_time)
+{
+    uint64_t d;
     int out;
 
-    d = current_time - s->count_load_time;
+    d = muldiv64(current_time - s->count_load_time, PIT_FREQ, 
ticks_per_sec(s->vcpu));
     switch(s->mode) {
     default:
     case 0:
-        out = (d >= s->period);
+        out = (d >= s->count);
         break;
     case 1:
-        out = (d < s->period);
+        out = (d < s->count);
         break;
     case 2:
-        /* mod2 out is no meaning, since intr are generated in background */
-        if ((d % s->period) == 0 && d != 0)
+        if ((d % s->count) == 0 && d != 0)
             out = 1;
         else
             out = 0;
         break;
     case 3:
-        out = (d % s->period) < ((s->period + 1) >> 1);
+        out = (d % s->count) < ((s->count + 1) >> 1);
         break;
     case 4:
     case 5:
-        out = (d == s->period);
+        out = (d == s->count);
         break;
     }
     return out;
 }
 
-int pit_get_out(hvm_virpit *pit, int channel, s64 current_time)
+int pit_get_out(PITState *pit, int channel, int64_t current_time)
 {
     PITChannelState *s = &pit->channels[channel];
     return pit_get_out1(s, current_time);
 }
 
-static __inline__ s64 missed_ticks(PITChannelState *s, s64 current_time)
-{
-    struct hvm_time_info *hvm_time = s->hvm_time;
-    struct domain *d = (void *) s - 
-        offsetof(struct domain, arch.hvm_domain.vpit.channels[0]);
-
-    /* ticks from current time(expected time) to NOW */ 
-    int missed_ticks;
-    /* current_time is expected time for next intr, check if it's true
-     * (actimer has a TIMER_SLOP in advance)
-     */
-    s64 missed_time = hvm_get_clock() + TIMER_SLOP - current_time;
-
-    if (missed_time >= 0) {
-        missed_ticks = missed_time/(s_time_t)s->period + 1;
-        if (test_bit(_DOMF_debugging, &d->domain_flags)) {
-            hvm_time->pending_intr_nr++;
-        } else {
-            hvm_time->pending_intr_nr += missed_ticks;
-        }
-        s->next_transition_time = current_time + (missed_ticks ) * s->period;
-    }
-
-    return s->next_transition_time;
-}
-
-/* only rearm the actimer when return value > 0
- *  -2: init state
- *  -1: the mode has expired
- *   0: current VCPU is not running
- *  >0: the next fired time
- */
-s64 pit_get_next_transition_time(PITChannelState *s, 
-                                            s64 current_time)
-{
-    s64 d, next_time, base;
-    int period2;
-    struct hvm_time_info *hvm_time = s->hvm_time;
-
-    d = current_time - s->count_load_time;
-    switch(s->mode) {
-    default:
-    case 0:
-    case 1:
-        if (d < s->period)
-            next_time = s->period;
-        else
-            return -1;
-        break;
-    case 2:
-        next_time = missed_ticks(s, current_time);
-        if ( !test_bit(_VCPUF_running, &(hvm_time->vcpu->vcpu_flags)) )
-            return 0;
-        break;
-    case 3:
-        base = (d / s->period) * s->period;
-        period2 = ((s->period + 1) >> 1);
-        if ((d - base) < period2) 
-            next_time = base + period2;
-        else
-            next_time = base + s->period;
-        break;
-    case 4:
-    case 5:
-        if (d < s->period)
-            next_time = s->period;
-        else if (d == s->period)
-            next_time = s->period + 1;
-        else
-            return -1;
-        break;
-    case 0xff:
-        return -2;      /* for init state */ 
-        break;
-    }
-    /* XXX: better solution: use a clock at PIT_FREQ Hz */
-    if (next_time <= current_time){
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:next_time <= current_time. next=0x%llx, 
current=0x%llx!\n",next_time, current_time);
-#endif
-        next_time = current_time + 1;
-    }
-    return next_time;
-}
-
 /* val must be 0 or 1 */
-void pit_set_gate(hvm_virpit *pit, int channel, int val)
+void pit_set_gate(PITState *pit, int channel, int val)
 {
     PITChannelState *s = &pit->channels[channel];
 
@@ -233,16 +165,16 @@ void pit_set_gate(hvm_virpit *pit, int c
     case 5:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         break;
     case 2:
     case 3:
         if (s->gate < val) {
             /* restart counting on rising edge */
-            s->count_load_time = hvm_get_clock();
-            pit_irq_timer_update(s, s->count_load_time);
+            s->count_load_time = hvm_get_clock(s->vcpu);
+//            pit_irq_timer_update(s, s->count_load_time);
         }
         /* XXX: disable/enable counting */
         break;
@@ -250,7 +182,7 @@ void pit_set_gate(hvm_virpit *pit, int c
     s->gate = val;
 }
 
-int pit_get_gate(hvm_virpit *pit, int channel)
+int pit_get_gate(PITState *pit, int channel)
 {
     PITChannelState *s = &pit->channels[channel];
     return s->gate;
@@ -258,37 +190,37 @@ int pit_get_gate(hvm_virpit *pit, int ch
 
 static inline void pit_load_count(PITChannelState *s, int val)
 {
+    u32   period;
     if (val == 0)
         val = 0x10000;
-
-    s->count_load_time = hvm_get_clock();
+    s->count_load_time = hvm_get_clock(s->vcpu);
     s->count = val;
-    s->period = DIV_ROUND(((s->count) * NSEC_PER_SEC), PIT_FREQ);
+    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
 
 #ifdef DEBUG_PIT
-    printk("HVM_PIT: pit-load-counter, count=0x%x,period=0x%u us,mode=%d, 
load_time=%lld\n",
+    printk("HVM_PIT: pit-load-counter(%p), count=0x%x, period=%uns mode=%d, 
load_time=%lld\n",
+            s,
             val,
-            s->period / 1000,
+            period,
             s->mode,
-            s->count_load_time);
+            (long long)s->count_load_time);
 #endif
 
-    if (s->mode == HVM_PIT_ACCEL_MODE) {
-        if (!s->hvm_time) {
-            printk("HVM_PIT:guest should only set mod 2 on channel 0!\n");
-            return;
-        }
-        s->hvm_time->period_cycles = (u64)s->period * cpu_khz / 1000000L;
-        s->hvm_time->first_injected = 0;
-
-        if (s->period < 900000) { /* < 0.9 ms */
-            printk("HVM_PIT: guest programmed too small an count: %x\n",
-                    s->count);
-            s->period = 1000000;
-        }
-    }
-        
-    pit_irq_timer_update(s, s->count_load_time);
+    switch (s->mode) {
+        case 2:
+            /* create periodic time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 0);
+            break;
+        case 1:
+            /* create one shot time */
+            s->pt = create_periodic_time (s->vcpu, period, 0, 1);
+#ifdef DEBUG_PIT
+            printk("HVM_PIT: create one shot time.\n");
+#endif
+            break;
+        default:
+            break;
+    }
 }
 
 /* if already latched, do not latch again */
@@ -300,9 +232,9 @@ static void pit_latch_count(PITChannelSt
     }
 }
 
-static void pit_ioport_write(void *opaque, u32 addr, u32 val)
-{
-    hvm_virpit *pit = opaque;
+static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PITState *pit = opaque;
     int channel, access;
     PITChannelState *s;
     val &= 0xff;
@@ -321,7 +253,7 @@ static void pit_ioport_write(void *opaqu
                     if (!(val & 0x10) && !s->status_latched) {
                         /* status latch */
                         /* XXX: add BCD and null count */
-                        s->status =  (pit_get_out1(s, hvm_get_clock()) << 7) |
+                        s->status =  (pit_get_out1(s, hvm_get_clock(s->vcpu)) 
<< 7) |
                             (s->rw_mode << 4) |
                             (s->mode << 1) |
                             s->bcd;
@@ -366,9 +298,9 @@ static void pit_ioport_write(void *opaqu
     }
 }
 
-static u32 pit_ioport_read(void *opaque, u32 addr)
-{
-    hvm_virpit *pit = opaque;
+static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+{
+    PITState *pit = opaque;
     int ret, count;
     PITChannelState *s;
     
@@ -419,84 +351,51 @@ static u32 pit_ioport_read(void *opaque,
     return ret;
 }
 
-static void pit_irq_timer_update(PITChannelState *s, s64 current_time)
-{
-    s64 expire_time;
-    int irq_level;
-    struct vcpu *v = current;
-    struct hvm_virpic *pic= &v->domain->arch.hvm_domain.vpic;
-
-    if (!s->hvm_time || s->mode == 0xff)
-        return;
-
-    expire_time = pit_get_next_transition_time(s, current_time);
-    /* not generate intr by direct pic_set_irq in mod 2
-     * XXX:mod 3 should be same as mod 2
-     */
-    if (s->mode != HVM_PIT_ACCEL_MODE) {
-        irq_level = pit_get_out1(s, current_time);
-        pic_set_irq(pic, s->irq, irq_level);
-        s->next_transition_time = expire_time;
-#ifdef DEBUG_PIT
-        printk("HVM_PIT:irq_level=%d next_delay=%l ns\n",
-                irq_level, 
-                (expire_time - current_time));
-#endif
-    }
-
-    if (expire_time > 0)
-        set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-
-}
-
-static void pit_irq_timer(void *data)
-{
-    PITChannelState *s = data;
-
-    pit_irq_timer_update(s, s->next_transition_time);
-}
-
 static void pit_reset(void *opaque)
 {
-    hvm_virpit *pit = opaque;
+    PITState *pit = opaque;
     PITChannelState *s;
     int i;
 
     for(i = 0;i < 3; i++) {
         s = &pit->channels[i];
+        if ( s -> pt ) {
+            destroy_periodic_time (s->pt);
+            s->pt = NULL;
+        }
         s->mode = 0xff; /* the init mode */
         s->gate = (i != 2);
         pit_load_count(s, 0);
     }
 }
 
-/* hvm_io_assist light-weight version, specific to PIT DM */ 
-static void resume_pit_io(ioreq_t *p)
-{
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-    unsigned long old_eax = regs->eax;
-    p->state = STATE_INVALID;
-
-    switch(p->size) {
-    case 1:
-        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-        break;
-    case 2:
-        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-        break;
-    case 4:
-        regs->eax = (p->u.data & 0xffffffff);
-        break;
-    default:
-        BUG();
-    }
+void pit_init(struct vcpu *v, unsigned long cpu_khz)
+{
+    PITState *pit = &v->domain->arch.hvm_domain.pl_time.vpit;
+    PITChannelState *s;
+
+    s = &pit->channels[0];
+    /* the timer 0 is connected to an IRQ */
+    s->vcpu = v;
+    s++; s->vcpu = v;
+    s++; s->vcpu = v;
+
+    register_portio_handler(PIT_BASE, 4, handle_pit_io);
+    /* register the speaker port */
+    register_portio_handler(0x61, 1, handle_speaker_io);
+    ticks_per_sec(v) = cpu_khz * (int64_t)1000; 
+#ifdef DEBUG_PIT
+    printk("HVM_PIT: guest frequency =%lld\n", (long long)ticks_per_sec(v));
+#endif
+    pit_reset(pit);
+    return;
 }
 
 /* the intercept action for PIT DM retval:0--not handled; 1--handled */  
-int handle_pit_io(ioreq_t *p)
+static int handle_pit_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -508,18 +407,18 @@ int handle_pit_io(ioreq_t *p)
     if (p->dir == 0) {/* write */
         pit_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) { /* read */
-        p->u.data = pit_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
-    }
-
-    /* always return 1, since PIT sit in HV now */
+        if ( (p->addr & 3) != 3 ) {
+            p->u.data = pit_ioport_read(vpit, p->addr);
+        } else {
+            printk("HVM_PIT: read A1:A0=3!\n");
+        }
+    }
     return 1;
 }
 
 static void speaker_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
-    hvm_virpit *pit = opaque;
-    val &= 0xff;
+    PITState *pit = opaque;
     pit->speaker_data_on = (val >> 1) & 1;
     pit_set_gate(pit, 2, val & 1);
 }
@@ -527,18 +426,18 @@ static uint32_t speaker_ioport_read(void
 static uint32_t speaker_ioport_read(void *opaque, uint32_t addr)
 {
     int out;
-    hvm_virpit *pit = opaque;
-    out = pit_get_out(pit, 2, hvm_get_clock());
+    PITState *pit = opaque;
+    out = pit_get_out(pit, 2, hvm_get_clock(pit->channels[2].vcpu));
     pit->dummy_refresh_clock ^= 1;
 
     return (pit->speaker_data_on << 1) | pit_get_gate(pit, 2) | (out << 5) |
       (pit->dummy_refresh_clock << 4);
 }
 
-int handle_speaker_io(ioreq_t *p)
+static int handle_speaker_io(ioreq_t *p)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct PITState *vpit = &(v->domain->arch.hvm_domain.pl_time.vpit);
 
     if (p->size != 1 ||
         p->pdata_valid ||
@@ -551,45 +450,7 @@ int handle_speaker_io(ioreq_t *p)
         speaker_ioport_write(vpit, p->addr, p->u.data);
     } else if (p->dir == 1) {/* read */
         p->u.data = speaker_ioport_read(vpit, p->addr);
-        resume_pit_io(p);
     }
 
     return 1;
 }
-
-/* pick up missed timer ticks at deactive time */
-void pickup_deactive_ticks(struct hvm_virpit *vpit)
-{
-    s64 next_time;
-    PITChannelState *s = &(vpit->channels[0]);
-    if ( !active_timer(&(vpit->time_info.pit_timer)) ) {
-        next_time = pit_get_next_transition_time(s, s->next_transition_time); 
-        if (next_time >= 0)
-            set_timer(&(s->hvm_time->pit_timer), s->next_transition_time);
-    }
-}
-
-void pit_init(struct hvm_virpit *pit, struct vcpu *v)
-{
-    PITChannelState *s;
-    struct hvm_time_info *hvm_time;
-
-    s = &pit->channels[0];
-    /* the timer 0 is connected to an IRQ */
-    s->irq = 0;
-    /* channel 0 need access the related time info for intr injection */
-    hvm_time = s->hvm_time = &pit->time_info;
-    hvm_time->vcpu = v;
-
-    init_timer(&(hvm_time->pit_timer), pit_irq_timer, s, v->processor);
-
-    register_portio_handler(PIT_BASE, 4, handle_pit_io);
-
-    /* register the speaker port */
-    register_portio_handler(0x61, 1, handle_speaker_io);
-
-    pit_reset(pit);
-
-    return;
-
-}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/intercept.c      Fri May 26 13:41:49 2006 -0600
@@ -214,6 +214,88 @@ void hlt_timer_fn(void *data)
     evtchn_set_pending(v, iopacket_port(v));
 }
 
+static __inline__ void missed_ticks(struct periodic_time *pt)
+{
+    int missed_ticks;
+
+    missed_ticks = (NOW() - pt->scheduled)/(s_time_t) pt->period;
+    if ( missed_ticks++ >= 0 ) {
+        if ( missed_ticks > 1000 ) {
+            /* TODO: Adjust guest time togther */
+            pt->pending_intr_nr ++;
+        }
+        else {
+            pt->pending_intr_nr += missed_ticks;
+        }
+        pt->scheduled += missed_ticks * pt->period;
+    }
+}
+
+/* hook function for the platform periodic time */
+void pt_timer_fn(void *data)
+{
+    struct vcpu *v = data;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    /* pick up missed timer tick */
+    missed_ticks(pt);
+    if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/* pick up missed timer ticks at deactive time */
+void pickup_deactive_ticks(struct periodic_time *pt)
+{
+    if ( !active_timer(&(pt->timer)) ) {
+        missed_ticks(pt);
+        set_timer(&pt->timer, pt->scheduled);
+    }
+}
+
+/*
+ * period: fire frequency in ns.
+ */
+struct periodic_time * create_periodic_time(
+        struct vcpu *v, 
+        u32 period, 
+        char irq,
+        char one_shot)
+{
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+    if ( pt->enabled ) {
+        if ( v->vcpu_id != 0 ) {
+            printk("HVM_PIT: start 2nd periodic time on non BSP!\n");
+        }
+        stop_timer (&pt->timer);
+        pt->enabled = 0;
+    }
+    pt->pending_intr_nr = 0;
+    pt->first_injected = 0;
+    if (period < 900000) { /* < 0.9 ms */
+        printk("HVM_PlatformTime: program too small period %u\n",period);
+        period = 900000;   /* force to 0.9ms */
+    }
+    pt->period = period;
+    pt->irq = irq;
+    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
+    pt->one_shot = one_shot;
+    if ( one_shot ) {
+        printk("HVM_PL: No support for one shot platform time yet\n");
+    }
+    pt->scheduled = NOW() + period;
+    set_timer (&pt->timer,pt->scheduled);
+    pt->enabled = 1;
+    return pt;
+}
+
+void destroy_periodic_time(struct periodic_time *pt)
+{
+    if ( pt->enabled ) {
+        stop_timer(&pt->timer);
+        pt->enabled = 0;
+    }
+}
 
 /*
  * Local variables:
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Fri May 26 13:41:49 2006 -0600
@@ -44,45 +44,33 @@
  */
 #define BSP_CPU(v)    (!(v->vcpu_id))
 
-u64 svm_get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void svm_set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    v->arch.hvm_svm.vmcb->tsc_offset = time_info->cache_tsc_offset;
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    v->arch.hvm_svm.vmcb->tsc_offset = v->arch.hvm_vcpu.cache_tsc_offset;
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct  periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = svm_get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            svm_set_guest_time(v, pt->last_plt_gtime);
         }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        svm_set_guest_time(v, time_info->last_pit_gtime);
     }
 
     switch(type)
@@ -121,8 +109,7 @@ asmlinkage void svm_intr_assist(void)
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain; 
-    struct hvm_virpit *vpit = &plat->vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     int intr_type = VLAPIC_DELIV_MODE_EXT;
     int intr_vector = -1;
@@ -174,9 +161,9 @@ asmlinkage void svm_intr_assist(void)
       if ( cpu_has_pending_irq(v) ) {
            intr_vector = cpu_get_interrupt(v, &intr_type);
       }
-      else  if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-          pic_set_irq(pic, 0, 0);
-          pic_set_irq(pic, 0, 1);
+      else  if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+          pic_set_irq(pic, pt->irq, 0);
+          pic_set_irq(pic, pt->irq, 1);
           intr_vector = cpu_get_interrupt(v, &intr_type);
       }
     }
@@ -190,7 +177,7 @@ asmlinkage void svm_intr_assist(void)
             /* Re-injecting a PIT interruptt? */
             if (re_injecting && 
                 is_pit_irq(v, intr_vector, intr_type)) {
-                    ++time_info->pending_intr_nr;
+                    ++pt->pending_intr_nr;
             }
             /* let's inject this interrupt */
             TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Fri May 26 13:41:49 2006 -0600
@@ -51,13 +51,6 @@
 
 #define SVM_EXTRA_DEBUG
 
-#ifdef TRACE_BUFFER
-static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
-#else
-#define TRACE_VMEXIT(index,value) ((void)0)
-#endif
-
 /* Useful define */
 #define MAX_INST_SIZE  15
 
@@ -672,12 +665,11 @@ static void arch_svm_do_launch(struct vc
 
 static void svm_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = svm_get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -754,7 +746,7 @@ static void svm_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -784,10 +776,12 @@ void arch_svm_do_resume(struct vcpu *v)
 
 void svm_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_svm.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer( &pt->timer, v->processor );
+        migrate_timer( &v->arch.hvm_svm.hlt_timer, v->processor );
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC( v ))
         migrate_timer( &(VLAPIC(v)->vlapic_timer ), v->processor );
 }
@@ -816,7 +810,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
 
         handle_mmio(va, va);
-        TRACE_VMEXIT(2,2);
         return 1;
     }
 
@@ -842,7 +835,6 @@ static int svm_do_page_fault(unsigned lo
             return 1;
         }
 
-        TRACE_VMEXIT (2,2);
         handle_mmio(va, gpa);
 
         return 1;
@@ -854,8 +846,6 @@ static int svm_do_page_fault(unsigned lo
         /* Let's make sure that the Guest TLB is flushed */
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
-
-    TRACE_VMEXIT (2,result);
 
     return result;
 }
@@ -1901,14 +1891,8 @@ static inline void svm_do_msr_access(str
         regs->edx = 0;
         switch (regs->ecx) {
         case MSR_IA32_TIME_STAMP_COUNTER:
-        {
-            struct hvm_time_info *time_info;
-
-            rdtscll(msr_content);
-            time_info = &v->domain->arch.hvm_domain.vpit.time_info;
-            msr_content += time_info->cache_tsc_offset;
+            msr_content = hvm_get_guest_time(v);
             break;
-        }
         case MSR_IA32_SYSENTER_CS:
             msr_content = vmcb->sysenter_cs;
             break;
@@ -1975,7 +1959,7 @@ static inline void svm_vmexit_do_hlt(str
 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
 {
     struct vcpu *v = current;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     s_time_t  next_pit = -1, next_wakeup;
 
     __update_guest_eip(vmcb, 1);
@@ -1985,7 +1969,7 @@ static inline void svm_vmexit_do_hlt(str
        return; 
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v, vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Fri May 26 13:41:49 2006 -0600
@@ -442,19 +442,17 @@ void svm_do_resume(struct vcpu *v)
 void svm_do_resume(struct vcpu *v) 
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &d->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &d->arch.hvm_domain.pl_time.periodic_tm;
 
     svm_stts(v);
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            svm_set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            time_info->count_point = NOW();
-            v->domain->arch.hvm_domain.guest_time = 0;
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            svm_set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
         }
-        pickup_deactive_ticks(vpit);
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/io.c
--- a/xen/arch/x86/hvm/vmx/io.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/io.c Fri May 26 13:41:49 2006 -0600
@@ -49,45 +49,33 @@ void __set_tsc_offset(u64  offset)
 #endif
 }
 
-u64 get_guest_time(struct vcpu *v)
-{
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
-    u64    host_tsc;
-    
-    rdtscll(host_tsc);
-    return host_tsc + time_info->cache_tsc_offset;
-}
-
 void set_guest_time(struct vcpu *v, u64 gtime)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
     u64    host_tsc;
    
     rdtscll(host_tsc);
     
-    time_info->cache_tsc_offset = gtime - host_tsc;
-    __set_tsc_offset(time_info->cache_tsc_offset);
+    v->arch.hvm_vcpu.cache_tsc_offset = gtime - host_tsc;
+    __set_tsc_offset(v->arch.hvm_vcpu.cache_tsc_offset);
 }
 
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
 
     if ( is_pit_irq(v, vector, type) ) {
-        if ( !time_info->first_injected ) {
-            time_info->pending_intr_nr = 0;
-            time_info->last_pit_gtime = get_guest_time(v);
-            time_info->first_injected = 1;
+        if ( !pt->first_injected ) {
+            pt->pending_intr_nr = 0;
+            pt->last_plt_gtime = hvm_get_guest_time(v);
+            pt->scheduled = NOW() + pt->period;
+            set_timer(&pt->timer, pt->scheduled);
+            pt->first_injected = 1;
         } else {
-            time_info->pending_intr_nr--;
-        }
-        time_info->count_advance = 0;
-        time_info->count_point = NOW();
-
-        time_info->last_pit_gtime += time_info->period_cycles;
-        set_guest_time(v, time_info->last_pit_gtime);
+            pt->pending_intr_nr--;
+            pt->last_plt_gtime += pt->period_cycles;
+            set_guest_time(v, pt->last_plt_gtime);
+        }
     }
 
     switch(type)
@@ -151,7 +139,7 @@ asmlinkage void vmx_intr_assist(void)
     unsigned long eflags;
     struct vcpu *v = current;
     struct hvm_domain *plat=&v->domain->arch.hvm_domain;
-    struct hvm_time_info *time_info = &plat->vpit.time_info;
+    struct periodic_time *pt = &plat->pl_time.periodic_tm;
     struct hvm_virpic *pic= &plat->vpic;
     unsigned int idtv_info_field;
     unsigned long inst_len;
@@ -160,9 +148,9 @@ asmlinkage void vmx_intr_assist(void)
     if ( v->vcpu_id == 0 )
         hvm_pic_assist(v);
 
-    if ( (v->vcpu_id == 0) && time_info->pending_intr_nr ) {
-        pic_set_irq(pic, 0, 0);
-        pic_set_irq(pic, 0, 1);
+    if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) {
+        pic_set_irq(pic, pt->irq, 0);
+        pic_set_irq(pic, pt->irq, 1);
     }
 
     has_ext_irq = cpu_has_pending_irq(v);
@@ -232,19 +220,17 @@ void vmx_do_resume(struct vcpu *v)
 void vmx_do_resume(struct vcpu *v)
 {
     struct domain *d = v->domain;
-    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
-    struct hvm_time_info *time_info = &vpit->time_info;
+    struct periodic_time *pt = &v->domain->arch.hvm_domain.pl_time.periodic_tm;
 
     vmx_stts();
 
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( time_info->first_injected ) {
-        if ( v->domain->arch.hvm_domain.guest_time ) {
-            time_info->count_point = NOW();
-            set_guest_time(v, v->domain->arch.hvm_domain.guest_time);
-            v->domain->arch.hvm_domain.guest_time = 0;
-        }
-        pickup_deactive_ticks(vpit);
+    if ( pt->enabled && pt->first_injected ) {
+        if ( v->arch.hvm_vcpu.guest_time ) {
+            set_guest_time(v, v->arch.hvm_vcpu.guest_time);
+            v->arch.hvm_vcpu.guest_time = 0;
+        }
+        pickup_deactive_ticks(pt);
     }
 
     if ( test_bit(iopacket_port(v), &d->shared_info->evtchn_pending[0]) ||
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Fri May 26 13:41:49 2006 -0600
@@ -47,7 +47,7 @@
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vlapic.h>
 
-static unsigned long trace_values[NR_CPUS][4];
+static unsigned long trace_values[NR_CPUS][5];
 #define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
 
 static void vmx_ctxt_switch_from(struct vcpu *v);
@@ -102,7 +102,7 @@ static void vmx_relinquish_guest_resourc
         }
     }
 
-    kill_timer(&d->arch.hvm_domain.vpit.time_info.pit_timer);
+    kill_timer(&d->arch.hvm_domain.pl_time.periodic_tm.timer);
 
     if ( d->arch.hvm_domain.shared_page_va )
         unmap_domain_page_global(
@@ -358,12 +358,11 @@ static inline int long_mode_do_msr_write
 
 static void vmx_freeze_time(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&(v->domain->arch.hvm_domain.vpit.time_info);
+    struct periodic_time *pt=&v->domain->arch.hvm_domain.pl_time.periodic_tm;
     
-    if ( time_info->first_injected && !v->domain->arch.hvm_domain.guest_time ) 
{
-        v->domain->arch.hvm_domain.guest_time = get_guest_time(v);
-        time_info->count_advance += (NOW() - time_info->count_point);
-        stop_timer(&(time_info->pit_timer));
+    if ( pt->enabled && pt->first_injected && !v->arch.hvm_vcpu.guest_time ) {
+        v->arch.hvm_vcpu.guest_time = hvm_get_guest_time(v);
+        stop_timer(&(pt->timer));
     }
 }
 
@@ -393,10 +392,12 @@ int vmx_initialize_guest_resources(struc
 
 void vmx_migrate_timers(struct vcpu *v)
 {
-    struct hvm_time_info *time_info = 
&v->domain->arch.hvm_domain.vpit.time_info;
-
-    migrate_timer(&time_info->pit_timer, v->processor);
-    migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
+
+    if ( pt->enabled ) {
+        migrate_timer(&pt->timer, v->processor);
+        migrate_timer(&v->arch.hvm_vmx.hlt_timer, v->processor);
+    }
     if ( hvm_apic_support(v->domain) && VLAPIC(v))
         migrate_timer(&(VLAPIC(v)->vlapic_timer), v->processor);
 }
@@ -1861,14 +1862,8 @@ static inline void vmx_do_msr_read(struc
                 (unsigned long)regs->edx);
     switch (regs->ecx) {
     case MSR_IA32_TIME_STAMP_COUNTER:
-    {
-        struct hvm_time_info *time_info;
-
-        rdtscll(msr_content);
-        time_info = &(v->domain->arch.hvm_domain.vpit.time_info);
-        msr_content += time_info->cache_tsc_offset;
-        break;
-    }
+        msr_content = hvm_get_guest_time(v);
+        break;
     case MSR_IA32_SYSENTER_CS:
         __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
         break;
@@ -1941,11 +1936,11 @@ void vmx_vmexit_do_hlt(void)
 void vmx_vmexit_do_hlt(void)
 {
     struct vcpu *v=current;
-    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    struct periodic_time *pt = 
&(v->domain->arch.hvm_domain.pl_time.periodic_tm);
     s_time_t   next_pit=-1,next_wakeup;
 
     if ( !v->vcpu_id )
-        next_pit = get_pit_scheduled(v,vpit);
+        next_pit = get_scheduled(v, pt->irq, pt);
     next_wakeup = get_apictime_scheduled(v);
     if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 )
         next_wakeup = next_pit;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/mm.c Fri May 26 13:41:49 2006 -0600
@@ -260,9 +260,42 @@ void share_xen_page_with_privileged_gues
     share_xen_page_with_guest(page, dom_xen, readonly);
 }
 
+static void __write_ptbase(unsigned long mfn)
+{
+#ifdef CONFIG_X86_PAE
+    if ( mfn >= 0x100000 )
+    {
+        l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
+        struct vcpu *v = current;
+        unsigned long flags;
+
+        /* Protects against re-entry and against __pae_flush_pgd(). */
+        local_irq_save(flags);
+
+        /* Pick an unused low-memory L3 cache slot. */
+        v->arch.lowmem_l3tab_inuse ^= 1;
+        lowmem_l3tab = v->arch.lowmem_l3tab[v->arch.lowmem_l3tab_inuse];
+        v->arch.lowmem_l3tab_high_mfn[v->arch.lowmem_l3tab_inuse] = mfn;
+
+        /* Map the guest L3 table and copy to the chosen low-memory cache. */
+        highmem_l3tab = map_domain_page(mfn);
+        memcpy(lowmem_l3tab, highmem_l3tab, sizeof(v->arch.lowmem_l3tab));
+        unmap_domain_page(highmem_l3tab);
+
+        /* Install the low-memory L3 table in CR3. */
+        write_cr3(__pa(lowmem_l3tab));
+
+        local_irq_restore(flags);
+        return;
+    }
+#endif
+
+    write_cr3(mfn << PAGE_SHIFT);
+}
+
 void write_ptbase(struct vcpu *v)
 {
-    write_cr3(pagetable_get_paddr(v->arch.monitor_table));
+    __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
 }
 
 void invalidate_shadow_ldt(struct vcpu *v)
@@ -401,6 +434,7 @@ static int get_page_and_type_from_pagenr
     return 1;
 }
 
+#ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
 /*
  * We allow root tables to map each other (a.k.a. linear page tables). It
  * needs some special care with reference counts and access permissions:
@@ -456,6 +490,7 @@ get_linear_pagetable(
 
     return 1;
 }
+#endif /* !CONFIG_X86_PAE */
 
 int
 get_page_from_l1e(
@@ -564,10 +599,6 @@ get_page_from_l3e(
     rc = get_page_and_type_from_pagenr(
         l3e_get_pfn(l3e),
         PGT_l2_page_table | vaddr, d);
-#if CONFIG_PAGING_LEVELS == 3
-    if ( unlikely(!rc) )
-        rc = get_linear_pagetable(l3e, pfn, d);
-#endif
     return rc;
 }
 #endif /* 3 level */
@@ -773,6 +804,50 @@ static int create_pae_xen_mappings(l3_pg
     return 1;
 }
 
+struct pae_flush_pgd {
+    unsigned long l3tab_mfn;
+    unsigned int  l3tab_idx;
+    l3_pgentry_t  nl3e;
+};
+
+static void __pae_flush_pgd(void *data)
+{
+    struct pae_flush_pgd *args = data;
+    struct vcpu *v = this_cpu(curr_vcpu);
+    int i = v->arch.lowmem_l3tab_inuse;
+    intpte_t _ol3e, _nl3e, _pl3e;
+    l3_pgentry_t *l3tab_ptr;
+
+    ASSERT(!local_irq_is_enabled());
+
+    if ( v->arch.lowmem_l3tab_high_mfn[i] != args->l3tab_mfn )
+        return;
+
+    l3tab_ptr = &v->arch.lowmem_l3tab[i][args->l3tab_idx];
+
+    _ol3e = l3e_get_intpte(*l3tab_ptr);
+    _nl3e = l3e_get_intpte(args->nl3e);
+    _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
+    BUG_ON(_pl3e != _ol3e);
+}
+
+/* Flush a pgdir update into low-memory caches. */
+static void pae_flush_pgd(
+    unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
+{
+    struct domain *d = page_get_owner(mfn_to_page(mfn));
+    struct pae_flush_pgd args = {
+        .l3tab_mfn = mfn,
+        .l3tab_idx = idx,
+        .nl3e      = nl3e };
+
+    /* If below 4GB then the pgdir is not shadowed in low memory. */
+    if ( mfn < 0x100000 )
+        return;
+
+    on_selected_cpus(d->domain_dirty_cpumask, __pae_flush_pgd, &args, 1, 1);
+}
+
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
 {
@@ -787,6 +862,7 @@ static inline int l1_backptr(
 
 #elif CONFIG_X86_64
 # define create_pae_xen_mappings(pl3e) (1)
+# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
 
 static inline int l1_backptr(
     unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
@@ -886,14 +962,6 @@ static int alloc_l3_table(struct page_in
 
     ASSERT(!shadow_mode_refcounts(d));
 
-#ifdef CONFIG_X86_PAE
-    if ( pfn >= 0x100000 )
-    {
-        MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
-        return 0;
-    }
-#endif
-
     pl3e = map_domain_page(pfn);
     for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
     {
@@ -1240,6 +1308,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
 
     okay = create_pae_xen_mappings(pl3e);
     BUG_ON(!okay);
+
+    pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
 
     put_page_from_l3e(ol3e, pfn);
     return 1;
@@ -3109,7 +3179,7 @@ void ptwr_flush(struct domain *d, const 
 
     if ( unlikely(d->arch.ptwr[which].vcpu != current) )
         /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
-        write_cr3(pagetable_get_paddr(
+        __write_ptbase(pagetable_get_pfn(
             d->arch.ptwr[which].vcpu->arch.guest_table));
     else
         TOGGLE_MODE();
@@ -3220,15 +3290,16 @@ static int ptwr_emulated_update(
     /* Turn a sub-word access into a full-word access. */
     if ( bytes != sizeof(paddr_t) )
     {
-        int           rc;
-        paddr_t    full;
-        unsigned int  offset = addr & (sizeof(paddr_t)-1);
+        paddr_t      full;
+        unsigned int offset = addr & (sizeof(paddr_t)-1);
 
         /* Align address; read full word. */
         addr &= ~(sizeof(paddr_t)-1);
-        if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
-                                        sizeof(paddr_t))) )
-            return rc; 
+        if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
+        {
+            propagate_page_fault(addr, 4); /* user mode, read fault */
+            return X86EMUL_PROPAGATE_FAULT;
+        }
         /* Mask out bits provided by caller. */
         full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
         /* Shift the caller value and OR in the missing bits. */
@@ -3306,7 +3377,8 @@ static int ptwr_emulated_write(
 static int ptwr_emulated_write(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, 0, val, bytes, 0);
 }
@@ -3315,7 +3387,8 @@ static int ptwr_emulated_cmpxchg(
     unsigned long addr,
     unsigned long old,
     unsigned long new,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     return ptwr_emulated_update(addr, old, new, bytes, 1);
 }
@@ -3325,7 +3398,8 @@ static int ptwr_emulated_cmpxchg8b(
     unsigned long old,
     unsigned long old_hi,
     unsigned long new,
-    unsigned long new_hi)
+    unsigned long new_hi,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( CONFIG_PAGING_LEVELS == 2 )
         return X86EMUL_UNHANDLEABLE;
@@ -3334,7 +3408,7 @@ static int ptwr_emulated_cmpxchg8b(
             addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
 }
 
-static struct x86_mem_emulator ptwr_mem_emulator = {
+static struct x86_emulate_ops ptwr_emulate_ops = {
     .read_std           = x86_emulate_read_std,
     .write_std          = x86_emulate_write_std,
     .read_emulated      = x86_emulate_read_std,
@@ -3353,6 +3427,7 @@ int ptwr_do_page_fault(struct domain *d,
     l2_pgentry_t    *pl2e, l2e;
     int              which, flags;
     unsigned long    l2_idx;
+    struct x86_emulate_ctxt emul_ctxt;
 
     if ( unlikely(shadow_mode_enabled(d)) )
         return 0;
@@ -3507,8 +3582,10 @@ int ptwr_do_page_fault(struct domain *d,
     return EXCRET_fault_fixed;
 
  emulate:
-    if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
-                           &ptwr_mem_emulator, X86EMUL_MODE_HOST) )
+    emul_ctxt.regs = guest_cpu_user_regs();
+    emul_ctxt.cr2  = addr;
+    emul_ctxt.mode = X86EMUL_MODE_HOST;
+    if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
         return 0;
     perfc_incrc(ptwr_emulations);
     return EXCRET_fault_fixed;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/traps.c      Fri May 26 13:41:49 2006 -0600
@@ -876,7 +876,7 @@ static int emulate_privileged_op(struct 
                     PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
                 break;
             }
-            regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
 
         case 0x6e: /* OUTSB */
@@ -902,7 +902,7 @@ static int emulate_privileged_op(struct 
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
-            regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
+            regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes;
             break;
         }
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/arch/x86/x86_emulate.c        Fri May 26 13:41:49 2006 -0600
@@ -363,12 +363,13 @@ do{ __asm__ __volatile__ (              
 #endif /* __i386__ */
 
 /* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _size, _eip) \
-({ unsigned long _x; \
-   if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
-       goto done; \
-   (_eip) += (_size); \
-   (_type)_x; \
+#define insn_fetch(_type, _size, _eip)                                  \
+({ unsigned long _x;                                                    \
+   rc = ops->read_std((unsigned long)(_eip), &_x, (_size), ctxt);       \
+   if ( rc != 0 )                                                       \
+       goto done;                                                       \
+   (_eip) += (_size);                                                   \
+   (_type)_x;                                                           \
 })
 
 /* Access/update address held in a register, based on addressing mode. */
@@ -426,12 +427,10 @@ decode_register(
     return p;
 }
 
-int 
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode)
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops)
 {
     uint8_t b, d, sib, twobyte = 0, rex_prefix = 0;
     uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
@@ -439,9 +438,11 @@ x86_emulate_memop(
     unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
     int rc = 0;
     struct operand src, dst;
+    unsigned long cr2 = ctxt->cr2;
+    int mode = ctxt->mode;
 
     /* Shadow copy of register state. Committed on successful emulation. */
-    struct cpu_user_regs _regs = *regs;
+    struct cpu_user_regs _regs = *ctxt->regs;
 
     switch ( mode )
     {
@@ -628,7 +629,7 @@ x86_emulate_memop(
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
              ((rc = ops->read_emulated((unsigned long)dst.ptr,
-                                       &dst.val, dst.bytes)) != 0) )
+                                       &dst.val, dst.bytes, ctxt)) != 0) )
              goto done;
         break;
     }
@@ -670,7 +671,7 @@ x86_emulate_memop(
         src.type  = OP_MEM;
         src.ptr   = (unsigned long *)cr2;
         if ( (rc = ops->read_emulated((unsigned long)src.ptr, 
-                                      &src.val, src.bytes)) != 0 )
+                                      &src.val, src.bytes, ctxt)) != 0 )
             goto done;
         src.orig_val = src.val;
         break;
@@ -776,7 +777,7 @@ x86_emulate_memop(
         if ( mode == X86EMUL_MODE_PROT64 )
             dst.bytes = 8;
         if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
-                                 &dst.val, dst.bytes)) != 0 )
+                                 &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(_regs.esp, dst.bytes);
         break;
@@ -854,12 +855,12 @@ x86_emulate_memop(
             {
                 dst.bytes = 8;
                 if ( (rc = ops->read_std((unsigned long)dst.ptr,
-                                         &dst.val, 8)) != 0 )
+                                         &dst.val, 8, ctxt)) != 0 )
                     goto done;
             }
-            register_address_increment(_regs.esp, -dst.bytes);
+            register_address_increment(_regs.esp, -(int)dst.bytes);
             if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
-                                      dst.val, dst.bytes)) != 0 )
+                                      dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
             dst.val = dst.orig_val; /* skanky: disable writeback */
             break;
@@ -887,10 +888,11 @@ x86_emulate_memop(
         case OP_MEM:
             if ( lock_prefix )
                 rc = ops->cmpxchg_emulated(
-                    (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.orig_val,
+                    dst.val, dst.bytes, ctxt);
             else
                 rc = ops->write_emulated(
-                    (unsigned long)dst.ptr, dst.val, dst.bytes);
+                    (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
             if ( rc != 0 )
                 goto done;
         default:
@@ -899,7 +901,7 @@ x86_emulate_memop(
     }
 
     /* Commit shadow register state. */
-    *regs = _regs;
+    *ctxt->regs = _regs;
 
  done:
     return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
@@ -911,11 +913,11 @@ x86_emulate_memop(
     {
         if ( _regs.ecx == 0 )
         {
-            regs->eip = _regs.eip;
+            ctxt->regs->eip = _regs.eip;
             goto done;
         }
         _regs.ecx--;
-        _regs.eip = regs->eip;
+        _regs.eip = ctxt->regs->eip;
     }
     switch ( b )
     {
@@ -928,20 +930,21 @@ x86_emulate_memop(
             dst.ptr = (unsigned long *)cr2;
             if ( (rc = ops->read_std(register_address(seg ? *seg : _regs.ds,
                                                       _regs.esi),
-                                     &dst.val, dst.bytes)) != 0 )
+                                     &dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         else
         {
             /* Read fault: source is special memory. */
             dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
-            if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+            if ( (rc = ops->read_emulated(cr2, &dst.val,
+                                          dst.bytes, ctxt)) != 0 )
                 goto done;
         }
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xa6 ... 0xa7: /* cmps */
         DPRINTF("Urk! I don't handle CMPS.\n");
@@ -952,16 +955,16 @@ x86_emulate_memop(
         dst.ptr   = (unsigned long *)cr2;
         dst.val   = _regs.eax;
         register_address_increment(
-            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
         dst.type  = OP_REG;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
         dst.ptr   = (unsigned long *)&_regs.eax;
-        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(
-            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
+            _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes);
         break;
     case 0xae ... 0xaf: /* scas */
         DPRINTF("Urk! I don't handle SCAS.\n");
@@ -1074,8 +1077,8 @@ x86_emulate_memop(
 #if defined(__i386__)
     {
         unsigned long old_lo, old_hi;
-        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4)) != 0) ||
-             ((rc = ops->read_emulated(cr2+4, &old_hi, 4)) != 0) )
+        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
+             ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
             goto done;
         if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
         {
@@ -1090,8 +1093,8 @@ x86_emulate_memop(
         }
         else
         {
-            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi,
-                                               _regs.ebx, _regs.ecx)) != 0 )
+            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
+                                               _regs.ecx, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1100,7 +1103,7 @@ x86_emulate_memop(
 #elif defined(__x86_64__)
     {
         unsigned long old, new;
-        if ( (rc = ops->read_emulated(cr2, &old, 8)) != 0 )
+        if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
             goto done;
         if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
              ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
@@ -1112,7 +1115,7 @@ x86_emulate_memop(
         else
         {
             new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
-            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8)) != 0 )
+            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1136,7 +1139,8 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
@@ -1151,7 +1155,8 @@ x86_emulate_write_std(
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes)
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/Makefile
--- a/xen/common/Makefile       Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/Makefile       Fri May 26 13:41:49 2006 -0600
@@ -13,6 +13,7 @@ obj-y += page_alloc.o
 obj-y += page_alloc.o
 obj-y += rangeset.o
 obj-y += sched_bvt.o
+obj-y += sched_credit.o
 obj-y += sched_sedf.o
 obj-y += schedule.o
 obj-y += softirq.o
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/grant_table.c
--- a/xen/common/grant_table.c  Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/grant_table.c  Fri May 26 13:41:49 2006 -0600
@@ -505,15 +505,12 @@ gnttab_setup_table(
         goto out;
     }
 
-    if ( op.nr_frames <= NR_GRANT_FRAMES )
-    {
-        ASSERT(d->grant_table != NULL);
-        op.status = GNTST_okay;
-        for ( i = 0; i < op.nr_frames; i++ )
-        {
-            gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
-            (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
-        }
+    ASSERT(d->grant_table != NULL);
+    op.status = GNTST_okay;
+    for ( i = 0; i < op.nr_frames; i++ )
+    {
+        gmfn = gnttab_shared_gmfn(d, d->grant_table, i);
+        (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1);
     }
 
     put_domain(d);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/kernel.c
--- a/xen/common/kernel.c       Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/kernel.c       Fri May 26 13:41:49 2006 -0600
@@ -191,12 +191,11 @@ long do_xen_version(int cmd, XEN_GUEST_H
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = 0;
+            fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( shadow_mode_translate(current->domain) )
                 fi.submap |= 
                     (1U << XENFEAT_writable_page_tables) |
-                    (1U << XENFEAT_auto_translated_physmap) |
-                    (1U << XENFEAT_pae_pgdir_above_4gb);
+                    (1U << XENFEAT_auto_translated_physmap);
             if ( supervisor_mode_kernel )
                 fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
             break;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/schedule.c
--- a/xen/common/schedule.c     Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/schedule.c     Fri May 26 13:41:49 2006 -0600
@@ -50,9 +50,11 @@ struct schedule_data schedule_data[NR_CP
 
 extern struct scheduler sched_bvt_def;
 extern struct scheduler sched_sedf_def;
+extern struct scheduler sched_credit_def;
 static struct scheduler *schedulers[] = { 
     &sched_bvt_def,
     &sched_sedf_def,
+    &sched_credit_def,
     NULL
 };
 
@@ -639,6 +641,8 @@ static void t_timer_fn(void *unused)
 
     page_scrub_schedule_work();
 
+    SCHED_OP(tick, cpu);
+
     set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
 }
 
@@ -681,6 +685,7 @@ void __init scheduler_init(void)
         printk("Could not find scheduler: %s\n", opt_sched);
 
     printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
+    SCHED_OP(init);
 
     if ( idle_vcpu[0] != NULL )
     {
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/trace.c
--- a/xen/common/trace.c        Thu May 25 15:59:18 2006 -0600
+++ b/xen/common/trace.c        Fri May 26 13:41:49 2006 -0600
@@ -91,6 +91,7 @@ static int alloc_trace_bufs(void)
     if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
     {
         printk("Xen trace buffers: memory allocation failed\n");
+        opt_tbuf_size = 0;
         return -EINVAL;
     }
 
@@ -135,10 +136,7 @@ static int tb_set_size(int size)
 
     opt_tbuf_size = size;
     if ( alloc_trace_bufs() != 0 )
-    {
-        opt_tbuf_size = 0;
-        return -EINVAL;
-    }
+        return -EINVAL;
 
     printk("Xen trace buffers: initialized\n");
     return 0;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/domain.h      Fri May 26 13:41:49 2006 -0600
@@ -120,6 +120,18 @@ struct arch_vcpu
     struct vcpu_guest_context guest_context
     __attribute__((__aligned__(16)));
 
+#ifdef CONFIG_X86_PAE
+    /*
+     * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
+     * supplies a >=4GB PAE L3 table. We need two because we cannot set up
+     * an L3 table while we are currently running on it (without using
+     * expensive atomic 64-bit operations).
+     */
+    l3_pgentry_t  lowmem_l3tab[2][4] __attribute__((__aligned__(32)));
+    unsigned long lowmem_l3tab_high_mfn[2]; /* The >=4GB MFN being shadowed. */
+    unsigned int  lowmem_l3tab_inuse;       /* Which lowmem_l3tab is in use? */
+#endif
+
     unsigned long      flags; /* TF_ */
 
     void (*schedule_tail) (struct vcpu *);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/domain.h
--- a/xen/include/asm-x86/hvm/domain.h  Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/domain.h  Fri May 26 13:41:49 2006 -0600
@@ -35,9 +35,9 @@ struct hvm_domain {
     unsigned int           nr_vcpus;
     unsigned int           apic_enabled;
     unsigned int           pae_enabled;
-
-    struct hvm_virpit      vpit;
-    u64                    guest_time;
+    s64                    tsc_frequency;
+    struct pl_time         pl_time;
+    
     struct hvm_virpic      vpic;
     struct hvm_vioapic     vioapic;
     struct hvm_io_handler  io_handler;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/intr.h
--- a/xen/include/asm-x86/hvm/svm/intr.h        Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/svm/intr.h        Fri May 26 13:41:49 2006 -0600
@@ -21,7 +21,6 @@
 #ifndef __ASM_X86_HVM_SVM_INTR_H__
 #define __ASM_X86_HVM_SVM_INTR_H__
 
-extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
 extern void svm_intr_assist(void);
 extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
 extern void svm_intr_assist_test_valid(struct vcpu *v, 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/svm/svm.h
--- a/xen/include/asm-x86/hvm/svm/svm.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/svm/svm.h Fri May 26 13:41:49 2006 -0600
@@ -48,7 +48,6 @@ extern void svm_do_launch(struct vcpu *v
 extern void svm_do_launch(struct vcpu *v);
 extern void svm_do_resume(struct vcpu *v);
 extern void svm_set_guest_time(struct vcpu *v, u64 gtime);
-extern u64 svm_get_guest_time(struct vcpu *v);
 extern void arch_svm_do_resume(struct vcpu *v);
 extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
 /* For debugging. Remove when no longer needed. */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h    Fri May 26 13:41:49 2006 -0600
@@ -32,6 +32,9 @@ struct hvm_vcpu {
     unsigned long   ioflags;
     struct mmio_op  mmio_op;
     struct vlapic   *vlapic;
+    s64             cache_tsc_offset;
+    u64             guest_time;
+
     /* For AP startup */
     unsigned long   init_sipi_sipi_state;
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri May 26 13:41:49 2006 -0600
@@ -34,7 +34,6 @@ extern void arch_vmx_do_launch(struct vc
 extern void arch_vmx_do_launch(struct vcpu *);
 extern void arch_vmx_do_resume(struct vcpu *);
 extern void set_guest_time(struct vcpu *v, u64 gtime);
-extern u64  get_guest_time(struct vcpu *v);
 
 extern unsigned int cpu_rev;
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/hvm/vpit.h
--- a/xen/include/asm-x86/hvm/vpit.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/hvm/vpit.h    Fri May 26 13:41:49 2006 -0600
@@ -29,9 +29,7 @@
 #include <asm/hvm/vpic.h>
 
 #define PIT_FREQ 1193181
-
-#define PIT_BASE 0x40
-#define HVM_PIT_ACCEL_MODE 2
+#define PIT_BASE        0x40
 
 typedef struct PITChannelState {
     int count; /* can be 65536 */
@@ -48,47 +46,56 @@ typedef struct PITChannelState {
     u8 gate; /* timer start */
     s64 count_load_time;
     /* irq handling */
-    s64 next_transition_time;
-    int irq;
-    struct hvm_time_info *hvm_time;
-    u32 period; /* period(ns) based on count */
+    struct vcpu      *vcpu;
+    struct periodic_time *pt;
 } PITChannelState;
-
-struct hvm_time_info {
-    /* extra info for the mode 2 channel */
-    struct timer pit_timer;
-    struct vcpu *vcpu;          /* which vcpu the ac_timer bound to */
-    u64 period_cycles;          /* pit frequency in cpu cycles */
-    s_time_t count_advance;     /* accumulated count advance since last fire */
-    s_time_t count_point;        /* last point accumulating count advance */
-    unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
-    int first_injected;         /* flag to prevent shadow window */
-    s64 cache_tsc_offset;       /* cache of VMCS TSC_OFFSET offset */
-    u64 last_pit_gtime;         /* guest time when last pit is injected */
+   
+/*
+ * Abstract layer of periodic time, one short time.
+ */
+struct periodic_time {
+    char enabled;               /* enabled */
+    char one_shot;              /* one shot time */
+    char irq;
+    char first_injected;        /* flag to prevent shadow window */
+    u32 pending_intr_nr;        /* the couner for pending timer interrupts */
+    u32 period;                 /* frequency in ns */
+    u64 period_cycles;          /* frequency in cpu cycles */
+    s_time_t scheduled;         /* scheduled timer interrupt */
+    u64 last_plt_gtime;         /* platform time when last IRQ is injected */
+    struct timer timer;         /* ac_timer */
 };
 
-typedef struct hvm_virpit {
+typedef struct PITState {
     PITChannelState channels[3];
-    struct hvm_time_info time_info;
     int speaker_data_on;
     int dummy_refresh_clock;
-}hvm_virpit;
+} PITState;
 
+struct pl_time {    /* platform time */
+    struct periodic_time periodic_tm;
+    struct PITState      vpit;
+    /* TODO: RTC/ACPI time */
+};
 
-static __inline__ s_time_t get_pit_scheduled(
-    struct vcpu *v,
-    struct hvm_virpit *vpit)
+static __inline__ s_time_t get_scheduled(
+    struct vcpu *v, int irq,
+    struct periodic_time *pt)
 {
-    struct PITChannelState *s = &(vpit->channels[0]);
-    if ( is_irq_enabled(v, 0) ) {
-        return s->next_transition_time;
+    if ( is_irq_enabled(v, irq) ) {
+        return pt->scheduled;
     }
     else
         return -1;
 }
 
 /* to hook the ioreq packet to get the PIT initialization info */
-extern void pit_init(struct hvm_virpit *pit, struct vcpu *v);
-extern void pickup_deactive_ticks(struct hvm_virpit *vpit);
+extern void hvm_hooks_assist(struct vcpu *v);
+extern void pickup_deactive_ticks(struct periodic_time *vpit);
+extern u64 hvm_get_guest_time(struct vcpu *v);
+extern struct periodic_time *create_periodic_time(struct vcpu *v, u32 period, 
char irq, char one_shot);
+extern void destroy_periodic_time(struct periodic_time *pt);
+void pit_init(struct vcpu *v, unsigned long cpu_khz);
+void pt_timer_fn(void *data);
 
 #endif /* __ASM_X86_HVM_VPIT_H__ */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/string.h
--- a/xen/include/asm-x86/string.h      Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/string.h      Fri May 26 13:41:49 2006 -0600
@@ -2,152 +2,6 @@
 #define __X86_STRING_H__
 
 #include <xen/config.h>
-
-#define __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dest, const char *src)
-{
-    long d0, d1, d2;
-    __asm__ __volatile__ (
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2)
-        : "0" (src), "1" (dest) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCPY
-static inline char *strncpy(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "1: dec  %2        \n"
-        "   js   2f        \n"
-        "   lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        "   rep ; stosb    \n"
-        "2:                \n"
-        : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-        : "0" (src), "1" (dest), "2" (count) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCAT
-static inline char *strcat(char *dest, const char *src)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   dec  %1        \n"
-        "1: lodsb          \n"
-        "   stosb          \n"
-        "   test %%al,%%al \n"
-        "   jne  1b        \n"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL) : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRNCAT
-static inline char *strncat(char *dest, const char *src, size_t count)
-{
-    long d0, d1, d2, d3;
-    __asm__ __volatile__ (
-        "   repne ; scasb   \n"
-        "   dec  %1         \n"
-        "   mov  %8,%3      \n"
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   stosb           \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   stosb"
-        : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-        : "0" (src), "1" (dest), "2" (0UL), "3" (0xffffffffUL), "g" (count)
-        : "memory" );
-    return dest;
-}
-
-#define __HAVE_ARCH_STRCMP
-static inline int strcmp(const char *cs, const char *ct)
-{
-    long d0, d1;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: lodsb           \n"
-        "   scasb           \n"
-        "   jne  2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   xor  %%eax,%%eax\n"
-        "   jmp  3f         \n"
-        "2: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "3:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1)
-        : "1" (cs), "2" (ct) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRNCMP
-static inline int strncmp(const char *cs, const char *ct, size_t count)
-{
-    long d0, d1, d2;
-    register int __res;
-    __asm__ __volatile__ (
-        "1: dec  %3         \n"
-        "   js   2f         \n"
-        "   lodsb           \n"
-        "   scasb           \n"
-        "   jne  3f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "2: xor  %%eax,%%eax\n"
-        "   jmp  4f         \n"
-        "3: sbb  %%eax,%%eax\n"
-        "   or   $1,%%al    \n"
-        "4:                 \n"
-        : "=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-        : "1" (cs), "2" (ct), "3" (count) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRCHR
-static inline char *strchr(const char *s, int c)
-{
-    long d0;
-    register char *__res;
-    __asm__ __volatile__ (
-        "   mov  %%al,%%ah  \n"
-        "1: lodsb           \n"
-        "   cmp  %%ah,%%al  \n"
-        "   je   2f         \n"
-        "   test %%al,%%al  \n"
-        "   jne  1b         \n"
-        "   mov  $1,%1      \n"
-        "2: mov  %1,%0      \n"
-        "   dec  %0         \n"
-        : "=a" (__res), "=&S" (d0) : "1" (s), "0" (c) );
-    return __res;
-}
-
-#define __HAVE_ARCH_STRLEN
-static inline size_t strlen(const char *s)
-{
-    long d0;
-    register int __res;
-    __asm__ __volatile__ (
-        "   repne ; scasb  \n"
-        "   notl %0        \n"
-        "   decl %0        \n"
-        : "=c" (__res), "=&D" (d0) : "1" (s), "a" (0), "0" (0xffffffffUL) );
-    return __res;
-}
 
 static inline void *__variable_memcpy(void *to, const void *from, size_t n)
 {
@@ -258,22 +112,6 @@ extern void *memmove(void *dest, const v
 #define __HAVE_ARCH_MEMCMP
 #define memcmp __builtin_memcmp
 
-#define __HAVE_ARCH_MEMCHR
-static inline void *memchr(const void *cs, int c, size_t count)
-{
-    long d0;
-    register void *__res;
-    if ( count == 0 )
-        return NULL;
-    __asm__ __volatile__ (
-        "   repne ; scasb\n"
-        "   je   1f      \n"
-        "   mov  $1,%0   \n"
-        "1: dec  %0      \n"
-        : "=D" (__res), "=&c" (d0) : "a" (c), "0" (cs), "1" (count) );
-    return __res;
-}
-
 static inline void *__memset_generic(void *s, char c, size_t count)
 {
     long d0, d1;
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/asm-x86/x86_emulate.h Fri May 26 13:41:49 2006 -0600
@@ -9,8 +9,10 @@
 #ifndef __X86_EMULATE_H__
 #define __X86_EMULATE_H__
 
-/*
- * x86_mem_emulator:
+struct x86_emulate_ctxt;
+
+/*
+ * x86_emulate_ops:
  * 
  * These operations represent the instruction emulator's interface to memory.
  * There are two categories of operation: those that act on ordinary memory
@@ -47,7 +49,7 @@
 #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
 #define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */
 #define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */
-struct x86_mem_emulator
+struct x86_emulate_ops
 {
     /*
      * read_std: Read bytes of standard (non-emulated/special) memory.
@@ -59,7 +61,8 @@ struct x86_mem_emulator
     int (*read_std)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_std: Write bytes of standard (non-emulated/special) memory.
@@ -71,7 +74,8 @@ struct x86_mem_emulator
     int (*write_std)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * read_emulated: Read bytes from emulated/special memory area.
@@ -82,7 +86,8 @@ struct x86_mem_emulator
     int (*read_emulated)(
         unsigned long addr,
         unsigned long *val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * write_emulated: Read bytes from emulated/special memory area.
@@ -93,7 +98,8 @@ struct x86_mem_emulator
     int (*write_emulated)(
         unsigned long addr,
         unsigned long val,
-        unsigned int bytes);
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
 
     /*
      * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -107,11 +113,12 @@ struct x86_mem_emulator
         unsigned long addr,
         unsigned long old,
         unsigned long new,
-        unsigned int bytes);
-
-    /*
-     * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
-     *                   emulated/special memory area.
+        unsigned int bytes,
+        struct x86_emulate_ctxt *ctxt);
+
+    /*
+     * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
+     *                     emulated/special memory area.
      *  @addr:  [IN ] Linear address to access.
      *  @old:   [IN ] Value expected to be current at @addr.
      *  @new:   [IN ] Value to write to @addr.
@@ -126,7 +133,8 @@ struct x86_mem_emulator
         unsigned long old_lo,
         unsigned long old_hi,
         unsigned long new_lo,
-        unsigned long new_hi);
+        unsigned long new_hi,
+        struct x86_emulate_ctxt *ctxt);
 };
 
 /* Standard reader/writer functions that callers may wish to use. */
@@ -134,14 +142,28 @@ x86_emulate_read_std(
 x86_emulate_read_std(
     unsigned long addr,
     unsigned long *val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 extern int
 x86_emulate_write_std(
     unsigned long addr,
     unsigned long val,
-    unsigned int bytes);
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt);
 
 struct cpu_user_regs;
+
+struct x86_emulate_ctxt
+{
+    /* Register state before/after emulation. */
+    struct cpu_user_regs   *regs;
+
+    /* Linear faulting address (if emulating a page-faulting instruction). */
+    unsigned long           cr2;
+
+    /* Emulated execution mode, represented by an X86EMUL_MODE value. */
+    int                     mode;
+};
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0 /* Real mode.             */
@@ -159,25 +181,19 @@ struct cpu_user_regs;
 /*
  * x86_emulate_memop: Emulate an instruction that faulted attempting to
  *                    read/write a 'special' memory area.
- *  @regs: Register state at time of fault.
- *  @cr2:  Linear faulting address within an emulated/special memory area.
- *  @ops:  Interface to access special memory.
- *  @mode: Emulated execution mode, represented by an X86EMUL_MODE value.
  * Returns -1 on failure, 0 on success.
  */
-extern int
+int
 x86_emulate_memop(
-    struct cpu_user_regs *regs,
-    unsigned long cr2,
-    struct x86_mem_emulator *ops,
-    int mode);
+    struct x86_emulate_ctxt *ctxt,
+    struct x86_emulate_ops  *ops);
 
 /*
  * Given the 'reg' portion of a ModRM byte, and a register block, return a
  * pointer into the block that addresses the relevant register.
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
-extern void *
+void *
 decode_register(
     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs);
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/public/io/xenbus.h    Fri May 26 13:41:49 2006 -0600
@@ -9,34 +9,37 @@
 #ifndef _XEN_PUBLIC_IO_XENBUS_H
 #define _XEN_PUBLIC_IO_XENBUS_H
 
-/* The state of either end of the Xenbus, i.e. the current communication
-   status of initialisation across the bus.  States here imply nothing about
-   the state of the connection between the driver and the kernel's device
-   layers.  */
-typedef enum
-{
-  XenbusStateUnknown      = 0,
-  XenbusStateInitialising = 1,
-  XenbusStateInitWait     = 2,  /* Finished early initialisation, but waiting
-                                   for information from the peer or hotplug
-                                  scripts. */
-  XenbusStateInitialised  = 3,  /* Initialised and waiting for a connection
-                                  from the peer. */
-  XenbusStateConnected    = 4,
-  XenbusStateClosing      = 5,  /* The device is being closed due to an error
-                                  or an unplug event. */
-  XenbusStateClosed       = 6
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
 
-} XenbusState;
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,
+
+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed       = 6
+};
+typedef enum xenbus_state XenbusState;
 
 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- *  c-file-style: "linux"
- *  indent-tabs-mode: t
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/public/sched_ctl.h
--- a/xen/include/public/sched_ctl.h    Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/public/sched_ctl.h    Fri May 26 13:41:49 2006 -0600
@@ -10,6 +10,7 @@
 /* Scheduler types. */
 #define SCHED_BVT      0
 #define SCHED_SEDF     4
+#define SCHED_CREDIT   5
 
 /* Set or get info? */
 #define SCHED_INFO_PUT 0
@@ -48,6 +49,10 @@ struct sched_adjdom_cmd {
             uint32_t extratime;
             uint32_t weight;
         } sedf;
+        struct csched_domain {
+            uint16_t weight;
+            uint16_t cap;
+        } credit;
     } u;
 };
 
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h        Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/xen/sched-if.h        Fri May 26 13:41:49 2006 -0600
@@ -58,6 +58,8 @@ struct scheduler {
     char *opt_name;         /* option name for this scheduler    */
     unsigned int sched_id;  /* ID for this scheduler             */
 
+    void         (*init)           (void);
+    void         (*tick)           (unsigned int cpu);
     int          (*alloc_task)     (struct vcpu *);
     void         (*add_task)       (struct vcpu *);
     void         (*free_task)      (struct domain *);
diff -r 9d52a66c7499 -r c073ebdbde8c xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Thu May 25 15:59:18 2006 -0600
+++ b/xen/include/xen/softirq.h Fri May 26 13:41:49 2006 -0600
@@ -26,6 +26,19 @@ asmlinkage void do_softirq(void);
 asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, softirq_handler handler);
 
+static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
+{
+    int cpu;
+
+    for_each_cpu_mask(cpu, mask)
+    {
+        if ( test_and_set_bit(nr, &softirq_pending(cpu)) )
+            cpu_clear(cpu, mask);
+    }
+
+    smp_send_event_check_mask(mask);
+}
+
 static inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
 {
     if ( !test_and_set_bit(nr, &softirq_pending(cpu)) )
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c       Fri May 26 
13:41:49 2006 -0600
@@ -0,0 +1,185 @@
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+
+/*
+ * Set of CPUs that remote admin software will allow us to bring online.
+ * Notified to us via xenbus.
+ */
+static cpumask_t xenbus_allowed_cpumask;
+
+/* Set of CPUs that local admin will allow us to bring online. */
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
+
+static int local_cpu_hotplug_request(void)
+{
+       /*
+        * We assume a CPU hotplug request comes from local admin if it is made
+        * via a userspace process (i.e., one with a real mm_struct).
+        */
+       return (current->mm != NULL);
+}
+
+static void vcpu_hotplug(unsigned int cpu)
+{
+       int err;
+       char dir[32], state[32];
+
+       if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
+               return;
+
+       sprintf(dir, "cpu/%d", cpu);
+       err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
+       if (err != 1) {
+               printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+               return;
+       }
+
+       if (strcmp(state, "online") == 0) {
+               cpu_set(cpu, xenbus_allowed_cpumask);
+               (void)cpu_up(cpu);
+       } else if (strcmp(state, "offline") == 0) {
+               cpu_clear(cpu, xenbus_allowed_cpumask);
+               (void)cpu_down(cpu);
+       } else {
+               printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
+                      state, cpu);
+       }
+}
+
+static void handle_vcpu_hotplug_event(
+       struct xenbus_watch *watch, const char **vec, unsigned int len)
+{
+       int cpu;
+       char *cpustr;
+       const char *node = vec[XS_WATCH_PATH];
+
+       if ((cpustr = strstr(node, "cpu/")) != NULL) {
+               sscanf(cpustr, "cpu/%d", &cpu);
+               vcpu_hotplug(cpu);
+       }
+}
+
+static int smpboot_cpu_notify(struct notifier_block *notifier,
+                             unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+
+       /*
+        * We do this in a callback notifier rather than __cpu_disable()
+        * because local_cpu_hotplug_request() does not work in the latter
+        * as it's always executed from within a stopmachine kthread.
+        */
+       if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
+               cpu_clear(cpu, local_allowed_cpumask);
+
+       return NOTIFY_OK;
+}
+
+static int setup_cpu_watcher(struct notifier_block *notifier,
+                             unsigned long event, void *data)
+{
+       int i;
+
+       static struct xenbus_watch cpu_watch = {
+               .node = "cpu",
+               .callback = handle_vcpu_hotplug_event,
+               .flags = XBWF_new_thread };
+       (void)register_xenbus_watch(&cpu_watch);
+
+       if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+               for_each_cpu(i)
+                       vcpu_hotplug(i);
+               printk(KERN_INFO "Brought up %ld CPUs\n",
+                      (long)num_online_cpus());
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init setup_vcpu_hotplug_event(void)
+{
+       static struct notifier_block hotplug_cpu = {
+               .notifier_call = smpboot_cpu_notify };
+       static struct notifier_block xsn_cpu = {
+               .notifier_call = setup_cpu_watcher };
+
+       register_cpu_notifier(&hotplug_cpu);
+       register_xenstore_notifier(&xsn_cpu);
+
+       return 0;
+}
+
+arch_initcall(setup_vcpu_hotplug_event);
+
+int smp_suspend(void)
+{
+       int i, err;
+
+       lock_cpu_hotplug();
+
+       /*
+        * Take all other CPUs offline. We hold the hotplug mutex to
+        * avoid other processes bringing up CPUs under our feet.
+        */
+       while (num_online_cpus() > 1) {
+               unlock_cpu_hotplug();
+               for_each_online_cpu(i) {
+                       if (i == 0)
+                               continue;
+                       err = cpu_down(i);
+                       if (err) {
+                               printk(KERN_CRIT "Failed to take all CPUs "
+                                      "down: %d.\n", err);
+                               for_each_cpu(i)
+                                       vcpu_hotplug(i);
+                               return err;
+                       }
+               }
+               lock_cpu_hotplug();
+       }
+
+       return 0;
+}
+
+void smp_resume(void)
+{
+       int cpu;
+
+       for_each_cpu(cpu)
+               cpu_initialize_context(cpu);
+
+       unlock_cpu_hotplug();
+
+       for_each_cpu(cpu)
+               vcpu_hotplug(cpu);
+}
+
+int cpu_up_is_allowed(unsigned int cpu)
+{
+       int rc = 0;
+
+       if (local_cpu_hotplug_request()) {
+               cpu_set(cpu, local_allowed_cpumask);
+               if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
+                       printk("%s: attempt to bring up CPU %u disallowed by "
+                              "remote admin.\n", __FUNCTION__, cpu);
+                       rc = -EBUSY;
+               }
+       } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
+                  !cpu_isset(cpu, xenbus_allowed_cpumask)) {
+               rc = -EBUSY;
+       }
+
+       return rc;
+}
+
+void init_xenbus_allowed_cpumask(void)
+{
+       xenbus_allowed_cpumask = cpu_present_map;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/e820.h       Fri May 
26 13:41:49 2006 -0600
@@ -0,0 +1,63 @@
+/*
+ * structures and definitions for the int 15, ax=e820 memory map
+ * scheme.
+ *
+ * In a nutshell, setup.S populates a scratch table in the
+ * empty_zero_block that contains a list of usable address/size
+ * duples.  setup.c, this information is transferred into the e820map,
+ * and in init.c/numa.c, that new information is used to mark pages
+ * reserved or not.
+ */
+#ifndef __E820_HEADER
+#define __E820_HEADER
+
+#include <linux/mmzone.h>
+
+#define E820MAP        0x2d0           /* our map */
+#define E820MAX        128             /* number of entries in E820MAP */
+#define E820NR 0x1e8           /* # entries in E820MAP */
+
+#define E820_RAM       1
+#define E820_RESERVED  2
+#define E820_ACPI      3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS       4
+
+#define HIGH_MEMORY    (1024*1024)
+
+#define LOWMEMSIZE()   (0x9f000)
+
+#ifndef __ASSEMBLY__
+struct e820entry {
+       u64 addr;       /* start of memory segment */
+       u64 size;       /* size of memory segment */
+       u32 type;       /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+    int nr_map;
+       struct e820entry map[E820MAX];
+};
+
+extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
+                                   unsigned size);
+extern void add_memory_region(unsigned long start, unsigned long size, 
+                             int type);
+extern void setup_memory_region(void);
+extern void contig_e820_setup(void); 
+extern unsigned long e820_end_of_ram(void);
+extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
+extern void e820_print_map(char *who);
+extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+
+extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned 
long end);
+extern void e820_setup_gap(struct e820entry *e820, int nr_map);
+extern unsigned long e820_hole_size(unsigned long start_pfn,
+                                   unsigned long end_pfn);
+
+extern void __init parse_memopt(char *p, char **end);
+extern void __init parse_memmapopt(char *p, char **end);
+
+extern struct e820map e820;
+#endif/*!__ASSEMBLY__*/
+
+#endif/*__E820_HEADER*/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/xen/cpu_hotplug.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h    Fri May 26 13:41:49 
2006 -0600
@@ -0,0 +1,42 @@
+#ifndef __XEN_CPU_HOTPLUG_H__
+#define __XEN_CPU_HOTPLUG_H__
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+
+#if defined(CONFIG_HOTPLUG_CPU)
+
+#if defined(CONFIG_X86)
+void cpu_initialize_context(unsigned int cpu);
+#else
+#define cpu_initialize_context(cpu)    ((void)0)
+#endif
+
+int cpu_up_is_allowed(unsigned int cpu);
+void init_xenbus_allowed_cpumask(void);
+int smp_suspend(void);
+void smp_resume(void);
+
+#else /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#define cpu_up_is_allowed(cpu)         (1)
+#define init_xenbus_allowed_cpumask()  ((void)0)
+
+static inline int smp_suspend(void)
+{
+       if (num_online_cpus() > 1) {
+               printk(KERN_WARNING "Can't suspend SMP guests "
+                      "without CONFIG_HOTPLUG_CPU\n");
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static inline void smp_resume(void)
+{
+}
+
+#endif /* !defined(CONFIG_HOTPLUG_CPU) */
+
+#endif /* __XEN_CPU_HOTPLUG_H__ */
diff -r 9d52a66c7499 -r c073ebdbde8c 
patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/fix-ide-cd-pio-mode.patch Fri May 26 13:41:49 
2006 -0600
@@ -0,0 +1,18 @@
+diff -ru ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c 
./drivers/ide/ide-lib.c
+--- ../pristine-linux-2.6.16.13/drivers/ide/ide-lib.c  2006-05-02 
22:38:44.000000000 +0100
++++ ./drivers/ide/ide-lib.c    2006-05-24 18:37:05.000000000 +0100
+@@ -410,10 +410,10 @@
+ {
+       u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
+ 
+-      if (!PCI_DMA_BUS_IS_PHYS) {
+-              addr = BLK_BOUNCE_ANY;
+-      } else if (on && drive->media == ide_disk) {
+-              if (HWIF(drive)->pci_dev)
++      if (on && drive->media == ide_disk) {
++              if (!PCI_DMA_BUS_IS_PHYS)
++                      addr = BLK_BOUNCE_ANY;
++              else if (HWIF(drive)->pci_dev)
+                       addr = HWIF(drive)->pci_dev->dma_mask;
+       }
+ 
diff -r 9d52a66c7499 -r c073ebdbde8c tools/libxc/xc_csched.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_csched.c   Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: xc_csched.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: XC Interface to the credit scheduler
+ *
+ */
+#include "xc_private.h"
+
+
+int
+xc_csched_domain_set(
+    int xc_handle,
+    uint32_t domid,
+    struct csched_domain *sdom)
+{
+    DECLARE_DOM0_OP;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_PUT;
+    op.u.adjustdom.u.credit = *sdom;
+
+    return do_dom0_op(xc_handle, &op);
+}
+
+int
+xc_csched_domain_get(
+    int xc_handle,
+    uint32_t domid,
+    struct csched_domain *sdom)
+{
+    DECLARE_DOM0_OP;
+    int err;
+
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t) domid;
+    op.u.adjustdom.sched_id = SCHED_CREDIT;
+    op.u.adjustdom.direction = SCHED_INFO_GET;
+
+    err = do_dom0_op(xc_handle, &op);
+    if ( err == 0 )
+        *sdom = op.u.adjustdom.u.credit;
+
+    return err;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_linux.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenstore/xenstored_linux.c  Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "xenstored_core.h"
+
+#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
+#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
+
+evtchn_port_t xenbus_evtchn(void)
+{
+       int fd;
+       int rc;
+       evtchn_port_t port; 
+       char str[20]; 
+
+       fd = open(XENSTORED_PROC_PORT, O_RDONLY); 
+       if (fd == -1)
+               return -1;
+
+       rc = read(fd, str, sizeof(str)); 
+       if (rc == -1)
+       {
+               int err = errno;
+               close(fd);
+               errno = err;
+               return -1;
+       }
+
+       str[rc] = '\0'; 
+       port = strtoul(str, NULL, 0); 
+
+       close(fd); 
+       return port;
+}
+
+void *xenbus_map(void)
+{
+       int fd;
+       void *addr;
+
+       fd = open(XENSTORED_PROC_KVA, O_RDWR);
+       if (fd == -1)
+               return NULL;
+
+       addr = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE,
+               MAP_SHARED, fd, 0);
+
+       if (addr == MAP_FAILED)
+               addr = NULL;
+
+       close(fd);
+
+       return addr;
+}
diff -r 9d52a66c7499 -r c073ebdbde8c xen/common/sched_credit.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/sched_credit.c Fri May 26 13:41:49 2006 -0600
@@ -0,0 +1,1233 @@
+/****************************************************************************
+ * (C) 2005-2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: common/csched_credit.c
+ *      Author: Emmanuel Ackaouy
+ *
+ * Description: Credit-based SMP CPU scheduler
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+
+
+/*
+ * CSCHED_STATS
+ *
+ * Manage very basic counters and stats.
+ *
+ * Useful for debugging live systems. The stats are displayed
+ * with runq dumps ('r' on the Xen console).
+ */
+#define CSCHED_STATS
+
+
+/*
+ * Basic constants
+ */
+#define CSCHED_TICK             10      /* milliseconds */
+#define CSCHED_TSLICE           30      /* milliseconds */
+#define CSCHED_ACCT_NTICKS      3
+#define CSCHED_ACCT_PERIOD      (CSCHED_ACCT_NTICKS * CSCHED_TICK)
+#define CSCHED_DEFAULT_WEIGHT   256
+
+
+/*
+ * Priorities
+ */
+#define CSCHED_PRI_TS_UNDER     -1      /* time-share w/ credits */
+#define CSCHED_PRI_TS_OVER      -2      /* time-share w/o credits */
+#define CSCHED_PRI_IDLE         -64     /* idle */
+#define CSCHED_PRI_TS_PARKED    -65     /* time-share w/ capped credits */
+
+
+/*
+ * Useful macros
+ */
+#define CSCHED_PCPU(_c)     ((struct csched_pcpu 
*)schedule_data[_c].sched_priv)
+#define CSCHED_VCPU(_vcpu)  ((struct csched_vcpu *) (_vcpu)->sched_priv)
+#define CSCHED_DOM(_dom)    ((struct csched_dom *) (_dom)->sched_priv)
+#define RUNQ(_cpu)          (&(CSCHED_PCPU(_cpu)->runq))
+
+
+/*
+ * Stats
+ */
+#ifdef CSCHED_STATS
+
+#define CSCHED_STAT(_X)         (csched_priv.stats._X)
+#define CSCHED_STAT_DEFINE(_X)  uint32_t _X;
+#define CSCHED_STAT_PRINTK(_X)                                  \
+    do                                                          \
+    {                                                           \
+        printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X));  \
+    } while ( 0 );
+
+#define CSCHED_STATS_EXPAND_SCHED(_MACRO)   \
+    _MACRO(vcpu_alloc)                      \
+    _MACRO(vcpu_add)                        \
+    _MACRO(vcpu_sleep)                      \
+    _MACRO(vcpu_wake_running)               \
+    _MACRO(vcpu_wake_onrunq)                \
+    _MACRO(vcpu_wake_runnable)              \
+    _MACRO(vcpu_wake_not_runnable)          \
+    _MACRO(dom_free)                        \
+    _MACRO(schedule)                        \
+    _MACRO(tickle_local_idler)              \
+    _MACRO(tickle_local_over)               \
+    _MACRO(tickle_local_under)              \
+    _MACRO(tickle_local_other)              \
+    _MACRO(acct_run)                        \
+    _MACRO(acct_no_work)                    \
+    _MACRO(acct_balance)                    \
+    _MACRO(acct_reorder)                    \
+    _MACRO(acct_min_credit)                 \
+    _MACRO(acct_vcpu_active)                \
+    _MACRO(acct_vcpu_idle)                  \
+    _MACRO(acct_vcpu_credit_min)
+
+#define CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    _MACRO(vcpu_migrate)                                \
+    _MACRO(load_balance_idle)                           \
+    _MACRO(load_balance_over)                           \
+    _MACRO(load_balance_other)                          \
+    _MACRO(steal_trylock_failed)                        \
+    _MACRO(steal_peer_down)                             \
+    _MACRO(steal_peer_idle)                             \
+    _MACRO(steal_peer_running)                          \
+    _MACRO(steal_peer_pinned)                           \
+    _MACRO(tickle_idlers_none)                          \
+    _MACRO(tickle_idlers_some)
+
+#ifndef NDEBUG
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)  \
+    _MACRO(vcpu_check)
+#else
+#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+#endif
+
+#define CSCHED_STATS_EXPAND(_MACRO)                 \
+    CSCHED_STATS_EXPAND_SCHED(_MACRO)               \
+    CSCHED_STATS_EXPAND_SMP_LOAD_BALANCE(_MACRO)    \
+    CSCHED_STATS_EXPAND_CHECKS(_MACRO)
+
+#define CSCHED_STATS_RESET()                                        \
+    do                                                              \
+    {                                                               \
+        memset(&csched_priv.stats, 0, sizeof(csched_priv.stats));   \
+    } while ( 0 )
+
+#define CSCHED_STATS_DEFINE()                   \
+    struct                                      \
+    {                                           \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
+    } stats
+
+#define CSCHED_STATS_PRINTK()                   \
+    do                                          \
+    {                                           \
+        printk("stats:\n");                     \
+        CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
+    } while ( 0 )
+
+#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
+
+#else /* CSCHED_STATS */
+
+#define CSCHED_STATS_RESET()    do {} while ( 0 )
+#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
+#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
+#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
+
+#endif /* CSCHED_STATS */
+
+
+/*
+ * Physical CPU
+ */
+struct csched_pcpu {
+    struct list_head runq;
+    uint32_t runq_sort_last;
+};
+
+/*
+ * Virtual CPU
+ */
+struct csched_vcpu {
+    struct list_head runq_elem;
+    struct list_head active_vcpu_elem;
+    struct csched_dom *sdom;
+    struct vcpu *vcpu;
+    atomic_t credit;
+    int credit_last;
+    uint32_t credit_incr;
+    uint32_t state_active;
+    uint32_t state_idle;
+    int16_t pri;
+};
+
+/*
+ * Domain
+ */
+struct csched_dom {
+    struct list_head active_vcpu;
+    struct list_head active_sdom_elem;
+    struct domain *dom;
+    uint16_t active_vcpu_count;
+    uint16_t weight;
+    uint16_t cap;
+};
+
+/*
+ * System-wide private data
+ */
+struct csched_private {
+    spinlock_t lock;
+    struct list_head active_sdom;
+    uint32_t ncpus;
+    unsigned int master;
+    cpumask_t idlers;
+    uint32_t weight;
+    uint32_t credit;
+    int credit_balance;
+    uint32_t runq_sort;
+    CSCHED_STATS_DEFINE();
+};
+
+
+/*
+ * Global variables
+ */
+static struct csched_private csched_priv;
+
+
+
+static inline int
+__vcpu_on_runq(struct csched_vcpu *svc)
+{
+    return !list_empty(&svc->runq_elem);
+}
+
+static inline struct csched_vcpu *
+__runq_elem(struct list_head *elem)
+{
+    return list_entry(elem, struct csched_vcpu, runq_elem);
+}
+
+static inline void
+__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+{
+    const struct list_head * const runq = RUNQ(cpu);
+    struct list_head *iter;
+
+    BUG_ON( __vcpu_on_runq(svc) );
+    BUG_ON( cpu != svc->vcpu->processor );
+
+    list_for_each( iter, runq )
+    {
+        const struct csched_vcpu * const iter_svc = __runq_elem(iter);
+        if ( svc->pri > iter_svc->pri )
+            break;
+    }
+
+    list_add_tail(&svc->runq_elem, iter);
+}
+
+static inline void
+__runq_remove(struct csched_vcpu *svc)
+{
+    BUG_ON( !__vcpu_on_runq(svc) );
+    list_del_init(&svc->runq_elem);
+}
+
+static inline void
+__runq_tickle(unsigned int cpu, struct csched_vcpu *new)
+{
+    struct csched_vcpu * const cur = CSCHED_VCPU(schedule_data[cpu].curr);
+    cpumask_t mask;
+
+    ASSERT(cur);
+    cpus_clear(mask);
+
+    /* If strictly higher priority than current VCPU, signal the CPU */
+    if ( new->pri > cur->pri )
+    {
+        if ( cur->pri == CSCHED_PRI_IDLE )
+            CSCHED_STAT_CRANK(tickle_local_idler);
+        else if ( cur->pri == CSCHED_PRI_TS_OVER )
+            CSCHED_STAT_CRANK(tickle_local_over);
+        else if ( cur->pri == CSCHED_PRI_TS_UNDER )
+            CSCHED_STAT_CRANK(tickle_local_under);
+        else
+            CSCHED_STAT_CRANK(tickle_local_other);
+
+        cpu_set(cpu, mask);
+    }
+
+    /*
+     * If this CPU has at least two runnable VCPUs, we tickle any idlers to
+     * let them know there is runnable work in the system...
+     */
+    if ( cur->pri > CSCHED_PRI_IDLE )
+    {
+        if ( cpus_empty(csched_priv.idlers) )
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_none);
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(tickle_idlers_some);
+            cpus_or(mask, mask, csched_priv.idlers);
+        }
+    }
+
+    /* Send scheduler interrupts to designated CPUs */
+    if ( !cpus_empty(mask) )
+        cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
+}
+
+static void
+csched_pcpu_init(int cpu)
+{
+    struct csched_pcpu *spc;
+    unsigned long flags;
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    /* Initialize/update system-wide config */
+    csched_priv.credit += CSCHED_ACCT_PERIOD;
+    if ( csched_priv.ncpus <= cpu )
+        csched_priv.ncpus = cpu + 1;
+    if ( csched_priv.master >= csched_priv.ncpus )
+        csched_priv.master = cpu;
+
+    /* Allocate per-PCPU info */
+    spc = xmalloc(struct csched_pcpu);
+    BUG_ON( spc == NULL );
+    INIT_LIST_HEAD(&spc->runq);
+    spc->runq_sort_last = csched_priv.runq_sort;
+    schedule_data[cpu].sched_priv = spc;
+
+    /* Start off idling... */
+    BUG_ON( !is_idle_vcpu(schedule_data[cpu].curr) );
+    cpu_set(cpu, csched_priv.idlers);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+}
+
+#ifndef NDEBUG
+static inline void
+__csched_vcpu_check(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( svc->vcpu != vc );
+    BUG_ON( sdom != CSCHED_DOM(vc->domain) );
+    if ( sdom )
+    {
+        BUG_ON( is_idle_vcpu(vc) );
+        BUG_ON( sdom->dom != vc->domain );
+    }
+    else
+    {
+        BUG_ON( !is_idle_vcpu(vc) );
+    }
+
+    CSCHED_STAT_CRANK(vcpu_check);
+}
+#define CSCHED_VCPU_CHECK(_vc)  (__csched_vcpu_check(_vc))
+#else
+#define CSCHED_VCPU_CHECK(_vc)
+#endif
+
+static inline int
+__csched_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
+{
+    /*
+     * Don't pick up work that's in the peer's scheduling tail. Also only pick
+     * up work that's allowed to run on our CPU.
+     */
+    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_running);
+        return 0;
+    }
+
+    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
+    {
+        CSCHED_STAT_CRANK(steal_peer_pinned);
+        return 0;
+    }
+
+    return 1;
+}
+
+static void
+csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
+{
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    /* Update credits */
+    atomic_sub(credit_dec, &svc->credit);
+
+    /* Put this VCPU and domain back on the active list if it was idling */
+    if ( list_empty(&svc->active_vcpu_elem) )
+    {
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( list_empty(&svc->active_vcpu_elem) )
+        {
+            CSCHED_STAT_CRANK(acct_vcpu_active);
+            svc->state_active++;
+
+            sdom->active_vcpu_count++;
+            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
+            if ( list_empty(&sdom->active_sdom_elem) )
+            {
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+                csched_priv.weight += sdom->weight;
+            }
+        }
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+}
+
+static inline void
+__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    BUG_ON( list_empty(&svc->active_vcpu_elem) );
+
+    CSCHED_STAT_CRANK(acct_vcpu_idle);
+    svc->state_idle++;
+
+    sdom->active_vcpu_count--;
+    list_del_init(&svc->active_vcpu_elem);
+    if ( list_empty(&sdom->active_vcpu) )
+    {
+        BUG_ON( csched_priv.weight < sdom->weight );
+        list_del_init(&sdom->active_sdom_elem);
+        csched_priv.weight -= sdom->weight;
+    }
+
+    atomic_set(&svc->credit, 0);
+}
+
+static int
+csched_vcpu_alloc(struct vcpu *vc)
+{
+    struct domain * const dom = vc->domain;
+    struct csched_dom *sdom;
+    struct csched_vcpu *svc;
+    int16_t pri;
+
+    CSCHED_STAT_CRANK(vcpu_alloc);
+
+    /* Allocate, if appropriate, per-domain info */
+    if ( is_idle_vcpu(vc) )
+    {
+        sdom = NULL;
+        pri = CSCHED_PRI_IDLE;
+    }
+    else if ( CSCHED_DOM(dom) )
+    {
+        sdom = CSCHED_DOM(dom);
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+    else 
+    {
+        sdom = xmalloc(struct csched_dom);
+        if ( !sdom )
+            return -1;
+
+        /* Initialize credit and weight */
+        INIT_LIST_HEAD(&sdom->active_vcpu);
+        sdom->active_vcpu_count = 0;
+        INIT_LIST_HEAD(&sdom->active_sdom_elem);
+        sdom->dom = dom;
+        sdom->weight = CSCHED_DEFAULT_WEIGHT;
+        sdom->cap = 0U;
+        dom->sched_priv = sdom;
+        pri = CSCHED_PRI_TS_UNDER;
+    }
+
+    /* Allocate per-VCPU info */
+    svc = xmalloc(struct csched_vcpu);
+    if ( !svc )
+        return -1;
+
+    INIT_LIST_HEAD(&svc->runq_elem);
+    INIT_LIST_HEAD(&svc->active_vcpu_elem);
+    svc->sdom = sdom;
+    svc->vcpu = vc;
+    atomic_set(&svc->credit, 0);
+    svc->credit_last = 0;
+    svc->credit_incr = 0U;
+    svc->state_active = 0U;
+    svc->state_idle = 0U;
+    svc->pri = pri;
+    vc->sched_priv = svc;
+
+    CSCHED_VCPU_CHECK(vc);
+
+    /* Attach fair-share VCPUs to the accounting list */
+    if ( likely(sdom != NULL) )
+        csched_vcpu_acct(svc, 0);
+
+    return 0;
+}
+
+static void
+csched_vcpu_add(struct vcpu *vc) 
+{
+    CSCHED_STAT_CRANK(vcpu_add);
+
+    /* Allocate per-PCPU info */
+    if ( unlikely(!CSCHED_PCPU(vc->processor)) )
+        csched_pcpu_init(vc->processor);
+
+    CSCHED_VCPU_CHECK(vc);
+}
+
+static void
+csched_vcpu_free(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_dom * const sdom = svc->sdom;
+    unsigned long flags;
+
+    BUG_ON( sdom == NULL );
+    BUG_ON( !list_empty(&svc->runq_elem) );
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    if ( !list_empty(&svc->active_vcpu_elem) )
+        __csched_vcpu_acct_idle_locked(svc);
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    xfree(svc);
+}
+
+static void
+csched_vcpu_sleep(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+
+    CSCHED_STAT_CRANK(vcpu_sleep);
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( schedule_data[vc->processor].curr == vc )
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    else if ( __vcpu_on_runq(svc) )
+        __runq_remove(svc);
+}
+
+static void
+csched_vcpu_wake(struct vcpu *vc)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON( is_idle_vcpu(vc) );
+
+    if ( unlikely(schedule_data[cpu].curr == vc) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_running);
+        return;
+    }
+    if ( unlikely(__vcpu_on_runq(svc)) )
+    {
+        CSCHED_STAT_CRANK(vcpu_wake_onrunq);
+        return;
+    }
+
+    if ( likely(vcpu_runnable(vc)) )
+        CSCHED_STAT_CRANK(vcpu_wake_runnable);
+    else
+        CSCHED_STAT_CRANK(vcpu_wake_not_runnable);
+
+    /* Put the VCPU on the runq and tickle CPUs */
+    __runq_insert(cpu, svc);
+    __runq_tickle(cpu, svc);
+}
+
+static int
+csched_vcpu_set_affinity(struct vcpu *vc, cpumask_t *affinity)
+{
+    unsigned long flags;
+    int lcpu;
+
+    if ( vc == current )
+    {
+        /* No locking needed but also can't move on the spot... */
+        if ( !cpu_isset(vc->processor, *affinity) )
+            return -EBUSY;
+
+        vc->cpu_affinity = *affinity;
+    }
+    else
+    {
+        /* Pause, modify, and unpause. */
+        vcpu_pause(vc);
+
+        vc->cpu_affinity = *affinity;
+        if ( !cpu_isset(vc->processor, vc->cpu_affinity) )
+        {
+            /*
+             * We must grab the scheduler lock for the CPU currently owning
+             * this VCPU before changing its ownership.
+             */
+            vcpu_schedule_lock_irqsave(vc, flags);
+            lcpu = vc->processor;
+
+            vc->processor = first_cpu(vc->cpu_affinity);
+
+            spin_unlock_irqrestore(&schedule_data[lcpu].schedule_lock, flags);
+        }
+
+        vcpu_unpause(vc);
+    }
+
+    return 0;
+}
+
+static int
+csched_dom_cntl(
+    struct domain *d,
+    struct sched_adjdom_cmd *cmd)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(d);
+    unsigned long flags;
+
+    if ( cmd->direction == SCHED_INFO_GET )
+    {
+        cmd->u.credit.weight = sdom->weight;
+        cmd->u.credit.cap = sdom->cap;
+    }
+    else
+    {
+        ASSERT( cmd->direction == SCHED_INFO_PUT );
+
+        spin_lock_irqsave(&csched_priv.lock, flags);
+
+        if ( cmd->u.credit.weight != 0 )
+        {
+            csched_priv.weight -= sdom->weight;
+            sdom->weight = cmd->u.credit.weight;
+            csched_priv.weight += sdom->weight;
+        }
+
+        if ( cmd->u.credit.cap != (uint16_t)~0U )
+            sdom->cap = cmd->u.credit.cap;
+
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+    }
+
+    return 0;
+}
+
+static void
+csched_dom_free(struct domain *dom)
+{
+    struct csched_dom * const sdom = CSCHED_DOM(dom);
+    int i;
+
+    CSCHED_STAT_CRANK(dom_free);
+
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+    {
+        if ( dom->vcpu[i] )
+            csched_vcpu_free(dom->vcpu[i]);
+    }
+
+    xfree(sdom);
+}
+
+/*
+ * This is a O(n) optimized sort of the runq.
+ *
+ * Time-share VCPUs can only be one of two priorities, UNDER or OVER. We walk
+ * through the runq and move up any UNDERs that are preceded by OVERS. We
+ * remember the last UNDER to make the move up operation O(1).
+ */
+static void
+csched_runq_sort(unsigned int cpu)
+{
+    struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
+    struct list_head *runq, *elem, *next, *last_under;
+    struct csched_vcpu *svc_elem;
+    unsigned long flags;
+    int sort_epoch;
+
+    sort_epoch = csched_priv.runq_sort;
+    if ( sort_epoch == spc->runq_sort_last )
+        return;
+
+    spc->runq_sort_last = sort_epoch;
+
+    spin_lock_irqsave(&schedule_data[cpu].schedule_lock, flags);
+
+    runq = &spc->runq;
+    elem = runq->next;
+    last_under = runq;
+
+    while ( elem != runq )
+    {
+        next = elem->next;
+        svc_elem = __runq_elem(elem);
+
+        if ( svc_elem->pri == CSCHED_PRI_TS_UNDER )
+        {
+            /* does elem need to move up the runq? */
+            if ( elem->prev != last_under )
+            {
+                list_del(elem);
+                list_add(elem, last_under);
+            }
+            last_under = elem;
+        }
+
+        elem = next;
+    }
+
+    spin_unlock_irqrestore(&schedule_data[cpu].schedule_lock, flags);
+}
+
+static void
+csched_acct(void)
+{
+    unsigned long flags;
+    struct list_head *iter_vcpu, *next_vcpu;
+    struct list_head *iter_sdom, *next_sdom;
+    struct csched_vcpu *svc;
+    struct csched_dom *sdom;
+    uint32_t credit_total;
+    uint32_t weight_total;
+    uint32_t weight_left;
+    uint32_t credit_fair;
+    uint32_t credit_peak;
+    int credit_balance;
+    int credit_xtra;
+    int credit;
+
+
+    spin_lock_irqsave(&csched_priv.lock, flags);
+
+    weight_total = csched_priv.weight;
+    credit_total = csched_priv.credit;
+
+    /* Converge balance towards 0 when it drops negative */
+    if ( csched_priv.credit_balance < 0 )
+    {
+        credit_total -= csched_priv.credit_balance;
+        CSCHED_STAT_CRANK(acct_balance);
+    }
+
+    if ( unlikely(weight_total == 0) )
+    {
+        csched_priv.credit_balance = 0;
+        spin_unlock_irqrestore(&csched_priv.lock, flags);
+        CSCHED_STAT_CRANK(acct_no_work);
+        return;
+    }
+
+    CSCHED_STAT_CRANK(acct_run);
+
+    weight_left = weight_total;
+    credit_balance = 0;
+    credit_xtra = 0;
+
+    list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+    {
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        BUG_ON( is_idle_domain(sdom->dom) );
+        BUG_ON( sdom->active_vcpu_count == 0 );
+        BUG_ON( sdom->weight == 0 );
+        BUG_ON( sdom->weight > weight_left );
+
+        weight_left -= sdom->weight;
+
+        /*
+         * A domain's fair share is computed using its weight in competition
+         * with that of all other active domains.
+         *
+         * At most, a domain can use credits to run all its active VCPUs
+         * for one full accounting period. We allow a domain to earn more
+         * only when the system-wide credit balance is negative.
+         */
+        credit_peak = sdom->active_vcpu_count * CSCHED_ACCT_PERIOD;
+        if ( csched_priv.credit_balance < 0 )
+        {
+            credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+                             (weight_total - 1)
+                           ) / weight_total;
+        }
+        if ( sdom->cap != 0U )
+        {
+            uint32_t credit_cap = ((sdom->cap * CSCHED_ACCT_PERIOD) + 99) / 
100;
+            if ( credit_cap < credit_peak )
+                credit_peak = credit_cap;
+        }
+
+        credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1)
+                      ) / weight_total;
+
+        if ( credit_fair < credit_peak )
+        {
+            credit_xtra = 1;
+        }
+        else
+        {
+            if ( weight_left != 0U )
+            {
+                /* Give other domains a chance at unused credits */
+                credit_total += ( ( ( credit_fair - credit_peak
+                                    ) * weight_total
+                                  ) + ( weight_left - 1 )
+                                ) / weight_left;
+            }
+
+            if ( credit_xtra )
+            {
+                /*
+                 * Lazily keep domains with extra credits at the head of
+                 * the queue to give others a chance at them in future
+                 * accounting periods.
+                 */
+                CSCHED_STAT_CRANK(acct_reorder);
+                list_del(&sdom->active_sdom_elem);
+                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+            }
+
+            credit_fair = credit_peak;
+        }
+
+        /* Compute fair share per VCPU */
+        credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 )
+                      ) / sdom->active_vcpu_count;
+
+
+        list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu )
+        {
+            svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem);
+            BUG_ON( sdom != svc->sdom );
+
+            /* Increment credit */
+            atomic_add(credit_fair, &svc->credit);
+            credit = atomic_read(&svc->credit);
+
+            /*
+             * Recompute priority or, if VCPU is idling, remove it from
+             * the active list.
+             */
+            if ( credit < 0 )
+            {
+                if ( sdom->cap == 0U )
+                    svc->pri = CSCHED_PRI_TS_OVER;
+                else
+                    svc->pri = CSCHED_PRI_TS_PARKED;
+
+                if ( credit < -CSCHED_TSLICE )
+                {
+                    CSCHED_STAT_CRANK(acct_min_credit);
+                    credit = -CSCHED_TSLICE;
+                    atomic_set(&svc->credit, credit);
+                }
+            }
+            else
+            {
+                svc->pri = CSCHED_PRI_TS_UNDER;
+
+                if ( credit > CSCHED_TSLICE )
+                    __csched_vcpu_acct_idle_locked(svc);
+            }
+
+            svc->credit_last = credit;
+            svc->credit_incr = credit_fair;
+            credit_balance += credit;
+        }
+    }
+
+    csched_priv.credit_balance = credit_balance;
+
+    spin_unlock_irqrestore(&csched_priv.lock, flags);
+
+    /* Inform each CPU that its runq needs to be sorted */
+    csched_priv.runq_sort++;
+}
+
+static void
+csched_tick(unsigned int cpu)
+{
+    struct csched_vcpu * const svc = CSCHED_VCPU(current);
+    struct csched_dom * const sdom = svc->sdom;
+
+    /*
+     * Accounting for running VCPU
+     *
+     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
+     */
+    if ( likely(sdom != NULL) )
+    {
+        csched_vcpu_acct(svc, CSCHED_TICK);
+    }
+
+    /*
+     * Accounting duty
+     *
+     * Note: Currently, this is always done by the master boot CPU. Eventually,
+     * we could distribute or at the very least cycle the duty.
+     */
+    if ( (csched_priv.master == cpu) &&
+         (schedule_data[cpu].tick % CSCHED_ACCT_NTICKS) == 0 )
+    {
+        csched_acct();
+    }
+
+    /*
+     * Check if runq needs to be sorted
+     *
+     * Every physical CPU resorts the runq after the accounting master has
+     * modified priorities. This is a special O(n) sort and runs at most
+     * once per accounting period (currently 30 milliseconds).
+     */
+    csched_runq_sort(cpu);
+}
+
+static struct csched_vcpu *
+csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
+{
+    struct list_head *iter;
+    struct csched_vcpu *speer;
+    struct vcpu *vc;
+
+    list_for_each( iter, &spc->runq )
+    {
+        speer = __runq_elem(iter);
+
+        /*
+         * If next available VCPU here is not of higher priority than ours,
+         * this PCPU is useless to us.
+         */
+        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
+        {
+            CSCHED_STAT_CRANK(steal_peer_idle);
+            break;
+        }
+
+        /* Is this VCPU is runnable on our PCPU? */
+        vc = speer->vcpu;
+        BUG_ON( is_idle_vcpu(vc) );
+
+        if ( __csched_vcpu_is_stealable(cpu, vc) )
+        {
+            /* We got a candidate. Grab it! */
+            __runq_remove(speer);
+            vc->processor = cpu;
+
+            return speer;
+        }
+    }
+
+    return NULL;
+}
+
+static struct csched_vcpu *
+csched_load_balance(int cpu, struct csched_vcpu *snext)
+{
+    struct csched_pcpu *spc;
+    struct csched_vcpu *speer;
+    int peer_cpu;
+
+    if ( snext->pri == CSCHED_PRI_IDLE )
+        CSCHED_STAT_CRANK(load_balance_idle);
+    else if ( snext->pri == CSCHED_PRI_TS_OVER )
+        CSCHED_STAT_CRANK(load_balance_over);
+    else
+        CSCHED_STAT_CRANK(load_balance_other);
+
+    peer_cpu = cpu;
+    BUG_ON( peer_cpu != snext->vcpu->processor );
+
+    while ( 1 )
+    {
+        /* For each PCPU in the system starting with our neighbour... */
+        peer_cpu = (peer_cpu + 1) % csched_priv.ncpus;
+        if ( peer_cpu == cpu )
+            break;
+
+        BUG_ON( peer_cpu >= csched_priv.ncpus );
+        BUG_ON( peer_cpu == cpu );
+
+        /*
+         * Get ahold of the scheduler lock for this peer CPU.
+         *
+         * Note: We don't spin on this lock but simply try it. Spinning could
+         * cause a deadlock if the peer CPU is also load balancing and trying
+         * to lock this CPU.
+         */
+        if ( spin_trylock(&schedule_data[peer_cpu].schedule_lock) )
+        {
+
+            spc = CSCHED_PCPU(peer_cpu);
+            if ( unlikely(spc == NULL) )
+            {
+                CSCHED_STAT_CRANK(steal_peer_down);
+                speer = NULL;
+            }
+            else
+            {
+                speer = csched_runq_steal(spc, cpu, snext->pri);
+            }
+
+            spin_unlock(&schedule_data[peer_cpu].schedule_lock);
+
+            /* Got one! */
+            if ( speer )
+            {
+                CSCHED_STAT_CRANK(vcpu_migrate);
+                return speer;
+            }
+        }
+        else
+        {
+            CSCHED_STAT_CRANK(steal_trylock_failed);
+        }
+    }
+
+
+    /* Failed to find more important work */
+    __runq_remove(snext);
+    return snext;
+}
+
+/*
+ * This function is in the critical path. It is designed to be simple and
+ * fast for the common case.
+ */
+static struct task_slice
+csched_schedule(s_time_t now)
+{
+    const int cpu = smp_processor_id();
+    struct list_head * const runq = RUNQ(cpu);
+    struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+    struct csched_vcpu *snext;
+    struct task_slice ret;
+
+    CSCHED_STAT_CRANK(schedule);
+    CSCHED_VCPU_CHECK(current);
+
+    /*
+     * Select next runnable local VCPU (ie top of local runq)
+     */
+    if ( vcpu_runnable(current) )
+        __runq_insert(cpu, scurr);
+    else
+        BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
+
+    snext = __runq_elem(runq->next);
+
+    /*
+     * SMP Load balance:
+     *
+     * If the next highest priority local runnable VCPU has already eaten
+     * through its credits, look on other PCPUs to see if we have more
+     * urgent work... If not, csched_load_balance() will return snext, but
+     * already removed from the runq.
+     */
+    if ( snext->pri > CSCHED_PRI_TS_OVER )
+        __runq_remove(snext);
+    else
+        snext = csched_load_balance(cpu, snext);
+
+    /*
+     * Update idlers mask if necessary. When we're idling, other CPUs
+     * will tickle us when they get extra work.
+     */
+    if ( snext->pri == CSCHED_PRI_IDLE )
+    {
+        if ( !cpu_isset(cpu, csched_priv.idlers) )
+            cpu_set(cpu, csched_priv.idlers);
+    }
+    else if ( cpu_isset(cpu, csched_priv.idlers) )
+    {
+        cpu_clear(cpu, csched_priv.idlers);
+    }
+
+    /*
+     * Return task to run next...
+     */
+    ret.time = MILLISECS(CSCHED_TSLICE);
+    ret.task = snext->vcpu;
+
+    CSCHED_VCPU_CHECK(ret.task);
+    BUG_ON( !vcpu_runnable(ret.task) );
+
+    return ret;
+}
+
+static void
+csched_dump_vcpu(struct csched_vcpu *svc)
+{
+    struct csched_dom * const sdom = svc->sdom;
+
+    printk("[%i.%i] pri=%i cpu=%i",
+            svc->vcpu->domain->domain_id,
+            svc->vcpu->vcpu_id,
+            svc->pri,
+            svc->vcpu->processor);
+
+    if ( sdom )
+    {
+        printk(" credit=%i (%d+%u) {a=%u i=%u w=%u}",
+            atomic_read(&svc->credit),
+            svc->credit_last,
+            svc->credit_incr,
+            svc->state_active,
+            svc->state_idle,
+            sdom->weight);
+    }
+
+    printk("\n");
+}
+
+static void
+csched_dump_pcpu(int cpu)
+{
+    struct list_head *runq, *iter;
+    struct csched_pcpu *spc;
+    struct csched_vcpu *svc;
+    int loop;
+
+    spc = CSCHED_PCPU(cpu);
+    runq = &spc->runq;
+
+    printk(" tick=%lu, sort=%d\n",
+            schedule_data[cpu].tick,
+            spc->runq_sort_last);
+
+    /* current VCPU */
+    svc = CSCHED_VCPU(schedule_data[cpu].curr);
+    if ( svc )
+    {
+        printk("\trun: ");
+        csched_dump_vcpu(svc);
+    }
+
+    loop = 0;
+    list_for_each( iter, runq )
+    {
+        svc = __runq_elem(iter);
+        if ( svc )
+        {
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_dump(void)
+{
+    struct list_head *iter_sdom, *iter_svc;
+    int loop;
+
+    printk("info:\n"
+           "\tncpus              = %u\n"
+           "\tmaster             = %u\n"
+           "\tcredit             = %u\n"
+           "\tcredit balance     = %d\n"
+           "\tweight             = %u\n"
+           "\trunq_sort          = %u\n"
+           "\ttick               = %dms\n"
+           "\ttslice             = %dms\n"
+           "\taccounting period  = %dms\n"
+           "\tdefault-weight     = %d\n",
+           csched_priv.ncpus,
+           csched_priv.master,
+           csched_priv.credit,
+           csched_priv.credit_balance,
+           csched_priv.weight,
+           csched_priv.runq_sort,
+           CSCHED_TICK,
+           CSCHED_TSLICE,
+           CSCHED_ACCT_PERIOD,
+           CSCHED_DEFAULT_WEIGHT);
+
+    printk("idlers: 0x%lx\n", csched_priv.idlers.bits[0]);
+
+    CSCHED_STATS_PRINTK();
+
+    printk("active vcpus:\n");
+    loop = 0;
+    list_for_each( iter_sdom, &csched_priv.active_sdom )
+    {
+        struct csched_dom *sdom;
+        sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
+
+        list_for_each( iter_svc, &sdom->active_vcpu )
+        {
+            struct csched_vcpu *svc;
+            svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem);
+
+            printk("\t%3d: ", ++loop);
+            csched_dump_vcpu(svc);
+        }
+    }
+}
+
+static void
+csched_init(void)
+{
+    spin_lock_init(&csched_priv.lock);
+    INIT_LIST_HEAD(&csched_priv.active_sdom);
+    csched_priv.ncpus = 0;
+    csched_priv.master = UINT_MAX;
+    cpus_clear(csched_priv.idlers);
+    csched_priv.weight = 0U;
+    csched_priv.credit = 0U;
+    csched_priv.credit_balance = 0;
+    csched_priv.runq_sort = 0U;
+    CSCHED_STATS_RESET();
+}
+
+
+struct scheduler sched_credit_def = {
+    .name           = "SMP Credit Scheduler",
+    .opt_name       = "credit",
+    .sched_id       = SCHED_CREDIT,
+
+    .alloc_task     = csched_vcpu_alloc,
+    .add_task       = csched_vcpu_add,
+    .sleep          = csched_vcpu_sleep,
+    .wake           = csched_vcpu_wake,
+    .set_affinity   = csched_vcpu_set_affinity,
+
+    .adjdom         = csched_dom_cntl,
+    .free_task      = csched_dom_free,
+
+    .tick           = csched_tick,
+    .do_schedule    = csched_schedule,
+
+    .dump_cpu_state = csched_dump_pcpu,
+    .dump_settings  = csched_dump,
+    .init           = csched_init,
+};
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/drivers/xen/net_driver_util.c
--- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c        Thu May 25 
15:59:18 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/if_ether.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <xen/net_driver_util.h>
-
-
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
-{
-       char *s;
-       int i;
-       char *e;
-       char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL);
-       if (IS_ERR(macstr))
-               return PTR_ERR(macstr);
-       s = macstr;
-       for (i = 0; i < ETH_ALEN; i++) {
-               mac[i] = simple_strtoul(s, &e, 16);
-               if (s == e || (e[0] != ':' && e[0] != 0)) {
-                       kfree(macstr);
-                       return -ENOENT;
-               }
-               s = &e[1];
-       }
-       kfree(macstr);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(xen_net_read_mac);
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/asm-x86_64/e820.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h    Thu May 25 15:59:18 
2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-/*
- * structures and definitions for the int 15, ax=e820 memory map
- * scheme.
- *
- * In a nutshell, setup.S populates a scratch table in the
- * empty_zero_block that contains a list of usable address/size
- * duples.  setup.c, this information is transferred into the e820map,
- * and in init.c/numa.c, that new information is used to mark pages
- * reserved or not.
- */
-#ifndef __E820_HEADER
-#define __E820_HEADER
-
-#include <linux/mmzone.h>
-
-#define E820MAP        0x2d0           /* our map */
-#define E820MAX        128             /* number of entries in E820MAP */
-#define E820NR 0x1e8           /* # entries in E820MAP */
-
-#define E820_RAM       1
-#define E820_RESERVED  2
-#define E820_ACPI      3 /* usable as RAM once ACPI tables have been read */
-#define E820_NVS       4
-
-#define HIGH_MEMORY    (1024*1024)
-
-#define LOWMEMSIZE()   (0x9f000)
-
-#ifndef __ASSEMBLY__
-struct e820entry {
-       u64 addr;       /* start of memory segment */
-       u64 size;       /* size of memory segment */
-       u32 type;       /* type of memory segment */
-} __attribute__((packed));
-
-struct e820map {
-    int nr_map;
-       struct e820entry map[E820MAX];
-};
-
-extern unsigned long find_e820_area(unsigned long start, unsigned long end, 
-                                   unsigned size);
-extern void add_memory_region(unsigned long start, unsigned long size, 
-                             int type);
-extern void setup_memory_region(void);
-extern void contig_e820_setup(void); 
-extern unsigned long e820_end_of_ram(void);
-extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
-extern void e820_print_map(char *who);
-extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
-
-extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned 
long end);
-extern void e820_setup_gap(struct e820entry *e820, int nr_map);
-extern unsigned long e820_hole_size(unsigned long start_pfn,
-                                   unsigned long end_pfn);
-
-extern void __init parse_memopt(char *p, char **end);
-extern void __init parse_memmapopt(char *p, char **end);
-
-extern struct e820map e820;
-#endif/*!__ASSEMBLY__*/
-
-#endif/*__E820_HEADER*/
diff -r 9d52a66c7499 -r c073ebdbde8c 
linux-2.6-xen-sparse/include/xen/net_driver_util.h
--- a/linux-2.6-xen-sparse/include/xen/net_driver_util.h        Thu May 25 
15:59:18 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-/*****************************************************************************
- *
- * Utility functions for Xen network devices.
- *
- * Copyright (c) 2005 XenSource Ltd.
- * 
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject
- * to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _ASM_XEN_NET_DRIVER_UTIL_H
-#define _ASM_XEN_NET_DRIVER_UTIL_H
-
-
-#include <xen/xenbus.h>
-
-
-/**
- * Read the 'mac' node at the given device's node in the store, and parse that
- * as colon-separated octets, placing result the given mac array.  mac must be
- * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
- * Return 0 on success, or -errno on error.
- */
-int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
-
-
-#endif /* _ASM_XEN_NET_DRIVER_UTIL_H */
diff -r 9d52a66c7499 -r c073ebdbde8c tools/xenstore/xenstored_proc.h
--- a/tools/xenstore/xenstored_proc.h   Thu May 25 15:59:18 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/* 
-    Copyright (C) 2005 XenSource Ltd
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-*/
-
-#ifndef _XENSTORED_PROC_H
-#define _XENSTORED_PROC_H
-
-#define XENSTORED_PROC_KVA  "/proc/xen/xsd_kva"
-#define XENSTORED_PROC_PORT "/proc/xen/xsd_port"
-
-
-#endif /* _XENSTORED_PROC_H */

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel

<Prev in Thread] Current Thread [Next in Thread>