WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 16 Jun 2006 18:41:46 +0000
Delivery-date: Fri, 16 Jun 2006 11:48:21 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID fbc0e953732ef78292d9e87ff6dd7f3432ddd014
# Parent  7f67c15e2c917dc52a3f8acc0fdb79a63b894b15
# Parent  73c73fb8875c331b8c0e6ed0317c8d71b83cdda2
merge with xen-unstable.hg
---
 tools/security/python/xensec_tools/acm_getdecision                  |   55 
 extras/mini-os/events.c                                             |   12 
 extras/mini-os/include/xenbus.h                                     |   28 
 extras/mini-os/kernel.c                                             |   23 
 extras/mini-os/xenbus/xenbus.c                                      |  202 +
 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c                      |    5 
 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c                        |    2 
 linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c                     |  142 
 linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c                  |    6 
 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c                |   27 
 linux-2.6-xen-sparse/drivers/xen/core/gnttab.c                      |   15 
 linux-2.6-xen-sparse/drivers/xen/core/skbuff.c                      |   11 
 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c                |   68 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h            |    4 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h    |   11 
 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h    |    4 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h          |    4 
 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h  |   12 
 linux-2.6-xen-sparse/include/xen/gnttab.h                           |    1 
 linux-2.6-xen-sparse/kernel/fork.c                                  | 1619 
++++++++++
 tools/console/daemon/io.c                                           |   66 
 tools/console/daemon/utils.c                                        |   26 
 tools/console/daemon/utils.h                                        |    3 
 tools/ioemu/sdl.c                                                   |    9 
 tools/ioemu/target-i386-dm/helper2.c                                |   32 
 tools/libxc/xc_elf.h                                                |    3 
 tools/libxc/xc_linux.c                                              |  118 
 tools/libxc/xc_linux_restore.c                                      |   22 
 tools/libxc/xc_load_elf.c                                           |   32 
 tools/libxc/xenctrl.h                                               |   54 
 tools/python/xen/util/security.py                                   |   41 
 tools/python/xen/xm/addlabel.py                                     |    2 
 tools/python/xen/xm/create.py                                       |    2 
 tools/python/xen/xm/main.py                                         |    3 
 tools/security/Makefile                                             |    2 
 tools/security/python/xensec_gen/cgi-bin/policy.cgi                 |    2 
 tools/security/secpol_xml2bin.c                                     |    6 
 tools/xenmon/xenbaked.c                                             |   55 
 tools/xenstat/libxenstat/src/xenstat.c                              |   23 
 tools/xenstore/fake_libxc.c                                         |    4 
 tools/xenstore/xenstored_core.c                                     |   13 
 tools/xenstore/xenstored_domain.c                                   |   79 
 tools/xm-test/tests/block-integrity/01_block_device_read_verify.py  |    4 
 tools/xm-test/tests/block-integrity/02_block_device_write_verify.py |    4 
 xen/arch/x86/traps.c                                                |    2 
 xen/common/event_channel.c                                          |   14 
 xen/include/asm-ia64/event.h                                        |    8 
 xen/include/asm-x86/event.h                                         |    8 
 xen/include/xen/elf.h                                               |    2 
 49 files changed, 2417 insertions(+), 473 deletions(-)

diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/events.c
--- a/extras/mini-os/events.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/events.c   Thu Jun 15 10:23:57 2006 -0600
@@ -35,24 +35,29 @@ int do_event(u32 port, struct pt_regs *r
     ev_action_t  *action;
     if (port >= NR_EVS) {
         printk("Port number too large: %d\n", port);
-        return 0;
+        goto out;
     }
 
     action = &ev_actions[port];
     action->count++;
 
     if (!action->handler)
+    {
+        printk("Spurious event on port %d\n", port);
         goto out;
+    }
     
     if (action->status & EVS_DISABLED)
+    {
+        printk("Event on port %d disabled\n", port);
         goto out;
+    }
     
     /* call the handler */
     action->handler(port, regs);
-
-       clear_evtchn(port);
     
  out:
+       clear_evtchn(port);
     return 1;
 
 }
@@ -135,6 +140,7 @@ void init_events(void)
     {
         ev_actions[i].status  = EVS_DISABLED;
         ev_actions[i].handler = default_handler;
+        mask_evtchn(i);
     }
 }
 
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h   Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/include/xenbus.h   Thu Jun 15 10:23:57 2006 -0600
@@ -1,6 +1,34 @@
 #ifndef XENBUS_H__
 #define XENBUS_H__
 
+/* Initialize the XenBus system. */
 void init_xenbus(void);
 
+/* Read the value associated with a path.  Returns a malloc'd error
+   string on failure and sets *value to NULL.  On success, *value is
+   set to a malloc'd copy of the value. */
+char *xenbus_read(const char *path, char **value);
+
+/* Associates a value with a path.  Returns a malloc'd error string on
+   failure. */
+char *xenbus_write(const char *path, const char *value);
+
+/* Removes the value associated with a path.  Returns a malloc'd error
+   string on failure. */
+char *xenbus_rm(const char *path);
+
+/* List the contents of a directory.  Returns a malloc'd error string
+   on failure and sets *contents to NULL.  On success, *contents is
+   set to a malloc'd array of pointers to malloc'd strings.  The array
+   is NULL terminated.  May block. */
+char *xenbus_ls(const char *prefix, char ***contents);
+
+/* Reads permissions associated with a path.  Returns a malloc'd error
+   string on failure and sets *value to NULL.  On success, *value is
+   set to a malloc'd copy of the value. */
+char *xenbus_get_perms(const char *path, char **value);
+
+/* Sets the permissions associated with a path.  Returns a malloc'd
+   error string on failure. */
+char *xenbus_set_perms(const char *path, domid_t dom, char perm);
 #endif /* XENBUS_H__ */
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/kernel.c   Thu Jun 15 10:23:57 2006 -0600
@@ -82,17 +82,6 @@ static shared_info_t *map_shared_info(un
 }
 
 
-void test_xenbus(void);
-
-/* Do initialisation from a thread once the scheduler's available */
-static void init_xs(void *ign)
-{
-    init_xenbus();
-
-    test_xenbus();
-}
-
-
 u8 xen_features[XENFEAT_NR_SUBMAPS * 32];
 
 void setup_xen_features(void)
@@ -111,10 +100,18 @@ void setup_xen_features(void)
     }
 }
 
+void test_xenbus(void);
+
+void xenbus_tester(void *p)
+{
+    test_xenbus();
+}
+
 /* This should be overridden by the application we are linked against. */
 __attribute__((weak)) int app_main(start_info_t *si)
 {
     printk("Dummy main: start_info=%p\n", si);
+    create_thread("xenbus_tester", xenbus_tester, si);
     return 0;
 }
 
@@ -183,8 +180,8 @@ void start_kernel(start_info_t *si)
     /* Init scheduler. */
     init_sched();
  
-    /* Init XenBus from a separate thread */
-    create_thread("init_xs", init_xs, NULL);
+    /* Init XenBus */
+    init_xenbus();
 
     /* Call (possibly overridden) app_main() */
     app_main(&start_info);
diff -r 7f67c15e2c91 -r fbc0e953732e extras/mini-os/xenbus/xenbus.c
--- a/extras/mini-os/xenbus/xenbus.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/extras/mini-os/xenbus/xenbus.c    Thu Jun 15 10:23:57 2006 -0600
@@ -3,11 +3,12 @@
  * (C) 2006 - Cambridge University
  ****************************************************************************
  *
- *        File: mm.c
+ *        File: xenbus.c
  *      Author: Steven Smith (sos22@xxxxxxxxx) 
  *     Changes: Grzegorz Milos (gm281@xxxxxxxxx)
+ *     Changes: John D. Ramsdell
  *              
- *        Date: Mar 2006, chages Aug 2005
+ *        Date: Jun 2006, chages Aug 2005
  * 
  * Environment: Xen Minimal OS
  * Description: Minimal implementation of xenbus
@@ -167,6 +168,7 @@ void init_xenbus(void)
 void init_xenbus(void)
 {
     int err;
+    printk("Initialising xenbus\n");
     DEBUG("init_xenbus called.\n");
     xenstore_buf = mfn_to_virt(start_info.store_mfn);
     create_thread("xenstore", xenbus_thread_func, NULL);
@@ -262,15 +264,15 @@ static void xb_write(int type, int req_i
 /* Send a mesasge to xenbus, in the same fashion as xb_write, and
    block waiting for a reply.  The reply is malloced and should be
    freed by the caller. */
-static void *xenbus_msg_reply(int type,
+static struct xsd_sockmsg *
+xenbus_msg_reply(int type,
         int trans,
         struct write_req *io,
         int nr_reqs)
 {
     int id;
     DEFINE_WAIT(w);
-    void *rep;
-    struct xsd_sockmsg *repmsg;
+    struct xsd_sockmsg *rep;
 
     id = allocate_xenbus_id();
     add_waiter(w, req_info[id].waitq);
@@ -281,12 +283,26 @@ static void *xenbus_msg_reply(int type,
     wake(current);
 
     rep = req_info[id].reply;
-    repmsg = rep;
-    BUG_ON(repmsg->req_id != id);
+    BUG_ON(rep->req_id != id);
     release_xenbus_id(id);
-
     return rep;
 }
+
+static char *errmsg(struct xsd_sockmsg *rep)
+{
+    if (!rep) {
+       char msg[] = "No reply";
+       size_t len = strlen(msg) + 1;
+       return memcpy(malloc(len), msg, len);
+    }
+    if (rep->type != XS_ERROR)
+       return NULL;
+    char *res = malloc(rep->len + 1);
+    memcpy(res, rep + 1, rep->len);
+    res[rep->len] = 0;
+    free(rep);
+    return res;
+}      
 
 /* Send a debug message to xenbus.  Can block. */
 static void xenbus_debug_msg(const char *msg)
@@ -296,27 +312,29 @@ static void xenbus_debug_msg(const char 
         { "print", sizeof("print") },
         { msg, len },
         { "", 1 }};
-    void *reply;
-    struct xsd_sockmsg *repmsg;
-
-    reply = xenbus_msg_reply(XS_DEBUG, 0, req, 3);
-    repmsg = reply;
+    struct xsd_sockmsg *reply;
+
+    reply = xenbus_msg_reply(XS_DEBUG, 0, req, ARRAY_SIZE(req));
     DEBUG("Got a reply, type %d, id %d, len %d.\n",
-            repmsg->type, repmsg->req_id, repmsg->len);
+            reply->type, reply->req_id, reply->len);
 }
 
 /* List the contents of a directory.  Returns a malloc()ed array of
    pointers to malloc()ed strings.  The array is NULL terminated.  May
    block. */
-static char **xenbus_ls(const char *pre)
-{
-    void *reply;
-    struct xsd_sockmsg *repmsg;
+char *xenbus_ls(const char *pre, char ***contents)
+{
+    struct xsd_sockmsg *reply, *repmsg;
     struct write_req req[] = { { pre, strlen(pre)+1 } };
     int nr_elems, x, i;
     char **res;
 
-    repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, 1);
+    repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(repmsg);
+    if (msg) {
+       *contents = NULL;
+       return msg;
+    }
     reply = repmsg + 1;
     for (x = nr_elems = 0; x < repmsg->len; x++)
         nr_elems += (((char *)reply)[x] == 0);
@@ -329,20 +347,91 @@ static char **xenbus_ls(const char *pre)
     }
     res[i] = NULL;
     free(repmsg);
-    return res;
-}
-
-static char *xenbus_read(const char *path)
-{
-    struct write_req req[] = { {path, strlen(path) + 1}};
+    *contents = res;
+    return NULL;
+}
+
+char *xenbus_read(const char *path, char **value)
+{
+    struct write_req req[] = { {path, strlen(path) + 1} };
     struct xsd_sockmsg *rep;
     char *res;
-    rep = xenbus_msg_reply(XS_READ, 0, req, 1);
+    rep = xenbus_msg_reply(XS_READ, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg) {
+       *value = NULL;
+       return msg;
+    }
     res = malloc(rep->len + 1);
     memcpy(res, rep + 1, rep->len);
     res[rep->len] = 0;
     free(rep);
-    return res;
+    *value = res;
+    return NULL;
+}
+
+char *xenbus_write(const char *path, const char *value)
+{
+    struct write_req req[] = { 
+       {path, strlen(path) + 1},
+       {value, strlen(value) + 1},
+    };
+    struct xsd_sockmsg *rep;
+    rep = xenbus_msg_reply(XS_WRITE, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg)
+       return msg;
+    free(rep);
+    return NULL;
+}
+
+char *xenbus_rm(const char *path)
+{
+    struct write_req req[] = { {path, strlen(path) + 1} };
+    struct xsd_sockmsg *rep;
+    rep = xenbus_msg_reply(XS_RM, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg)
+       return msg;
+    free(rep);
+    return NULL;
+}
+
+char *xenbus_get_perms(const char *path, char **value)
+{
+    struct write_req req[] = { {path, strlen(path) + 1} };
+    struct xsd_sockmsg *rep;
+    char *res;
+    rep = xenbus_msg_reply(XS_GET_PERMS, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg) {
+       *value = NULL;
+       return msg;
+    }
+    res = malloc(rep->len + 1);
+    memcpy(res, rep + 1, rep->len);
+    res[rep->len] = 0;
+    free(rep);
+    *value = res;
+    return NULL;
+}
+
+#define PERM_MAX_SIZE 32
+char *xenbus_set_perms(const char *path, domid_t dom, char perm)
+{
+    char value[PERM_MAX_SIZE];
+    snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom);
+    struct write_req req[] = { 
+       {path, strlen(path) + 1},
+       {value, strlen(value) + 1},
+    };
+    struct xsd_sockmsg *rep;
+    rep = xenbus_msg_reply(XS_SET_PERMS, 0, req, ARRAY_SIZE(req));
+    char *msg = errmsg(rep);
+    if (msg)
+       return msg;
+    free(rep);
+    return NULL;
 }
 
 static void do_ls_test(const char *pre)
@@ -351,7 +440,12 @@ static void do_ls_test(const char *pre)
     int x;
 
     DEBUG("ls %s...\n", pre);
-    dirs = xenbus_ls(pre);
+    char *msg = xenbus_ls(pre, &dirs);
+    if (msg) {
+       DEBUG("Error in xenbus ls: %s\n", msg);
+       free(msg);
+       return;
+    }
     for (x = 0; dirs[x]; x++) 
     {
         DEBUG("ls %s[%d] -> %s\n", pre, x, dirs[x]);
@@ -364,9 +458,38 @@ static void do_read_test(const char *pat
 {
     char *res;
     DEBUG("Read %s...\n", path);
-    res = xenbus_read(path);
+    char *msg = xenbus_read(path, &res);
+    if (msg) {
+       DEBUG("Error in xenbus read: %s\n", msg);
+       free(msg);
+       return;
+    }
     DEBUG("Read %s -> %s.\n", path, res);
     free(res);
+}
+
+static void do_write_test(const char *path, const char *val)
+{
+    DEBUG("Write %s to %s...\n", val, path);
+    char *msg = xenbus_write(path, val);
+    if (msg) {
+       DEBUG("Result %s\n", msg);
+       free(msg);
+    } else {
+       DEBUG("Success.\n");
+    }
+}
+
+static void do_rm_test(const char *path)
+{
+    DEBUG("rm %s...\n", path);
+    char *msg = xenbus_rm(path);
+    if (msg) {
+       DEBUG("Result %s\n", msg);
+       free(msg);
+    } else {
+       DEBUG("Success.\n");
+    }
 }
 
 /* Simple testing thing */
@@ -383,5 +506,22 @@ void test_xenbus(void)
     DEBUG("Doing read test.\n");
     do_read_test("device/vif/0/mac");
     do_read_test("device/vif/0/backend");
-    printk("Xenbus initialised.\n");
-}
+
+    DEBUG("Doing write test.\n");
+    do_write_test("device/vif/0/flibble", "flobble");
+    do_read_test("device/vif/0/flibble");
+    do_write_test("device/vif/0/flibble", "widget");
+    do_read_test("device/vif/0/flibble");
+
+    DEBUG("Doing rm test.\n");
+    do_rm_test("device/vif/0/flibble");
+    do_read_test("device/vif/0/flibble");
+    DEBUG("(Should have said ENOENT)\n");
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-basic-offset: 4
+ * End:
+ */
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c    Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c    Thu Jun 15 10:23:57 
2006 -0600
@@ -133,6 +133,7 @@ void xen_tlb_flush(void)
        op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
        BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
+EXPORT_SYMBOL(xen_tlb_flush);
 
 void xen_invlpg(unsigned long ptr)
 {
@@ -141,6 +142,7 @@ void xen_invlpg(unsigned long ptr)
        op.arg1.linear_addr = ptr & PAGE_MASK;
        BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
+EXPORT_SYMBOL(xen_invlpg);
 
 #ifdef CONFIG_SMP
 
@@ -363,7 +365,8 @@ void xen_destroy_contiguous_region(unsig
        };
        set_xen_guest_handle(reservation.extent_start, &frame);
 
-       if (xen_feature(XENFEAT_auto_translated_physmap))
+       if (xen_feature(XENFEAT_auto_translated_physmap) ||
+           !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
                return;
 
        scrub_pages(vstart, 1 << order);
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c      Thu Jun 15 10:23:57 
2006 -0600
@@ -763,7 +763,7 @@ void __init pgtable_cache_init(void)
 #endif
                                0,
                                pgd_ctor,
-                               pgd_dtor);
+                               PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
        if (!pgd_cache)
                panic("pgtable_cache_init(): Cannot create pgd cache");
 }
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c   Thu Jun 15 10:23:57 
2006 -0600
@@ -300,11 +300,6 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
        unsigned long flags;
 
        if (PTRS_PER_PMD > 1) {
-               if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
-                       int rc = xen_create_contiguous_region(
-                               (unsigned long)pgd, 0, 32);
-                       BUG_ON(rc);
-               }
                if (HAVE_SHARED_KERNEL_PMD)
                        clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
                                        swapper_pg_dir + USER_PTRS_PER_PGD,
@@ -320,69 +315,105 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
        }
 }
 
+/* never called when PTRS_PER_PMD > 1 */
 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
 {
        unsigned long flags; /* can be called from interrupt context */
 
-       if (PTRS_PER_PMD > 1) {
-               if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
-                       xen_destroy_contiguous_region((unsigned long)pgd, 0);
-       } else {
-               spin_lock_irqsave(&pgd_lock, flags);
-               pgd_list_del(pgd);
-               spin_unlock_irqrestore(&pgd_lock, flags);
-
-               pgd_test_and_unpin(pgd);
-       }
+       spin_lock_irqsave(&pgd_lock, flags);
+       pgd_list_del(pgd);
+       spin_unlock_irqrestore(&pgd_lock, flags);
+
+       pgd_test_and_unpin(pgd);
 }
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        int i;
        pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+       pmd_t **pmd;
+       unsigned long flags;
 
        pgd_test_and_unpin(pgd);
 
        if (PTRS_PER_PMD == 1 || !pgd)
                return pgd;
 
-       for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
-               pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
-               if (!pmd)
-                       goto out_oom;
-               set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
-       }
-
-       if (!HAVE_SHARED_KERNEL_PMD) {
-               unsigned long flags;
-
-               for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
+       if (HAVE_SHARED_KERNEL_PMD) {
+               for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
                        pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
                        if (!pmd)
                                goto out_oom;
                        set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
                }
-
-               spin_lock_irqsave(&pgd_lock, flags);
-               for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
-                       unsigned long v = (unsigned long)i << PGDIR_SHIFT;
-                       pgd_t *kpgd = pgd_offset_k(v);
-                       pud_t *kpud = pud_offset(kpgd, v);
-                       pmd_t *kpmd = pmd_offset(kpud, v);
-                       pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
-                       memcpy(pmd, kpmd, PAGE_SIZE);
-                       make_lowmem_page_readonly(
-                               pmd, XENFEAT_writable_page_tables);
+               return pgd;
+       }
+
+       /*
+        * We can race save/restore (if we sleep during a GFP_KERNEL memory
+        * allocation). We therefore store virtual addresses of pmds as they
+        * do not change across save/restore, and poke the machine addresses
+        * into the pgdir under the pgd_lock.
+        */
+       pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
+       if (!pmd) {
+               kmem_cache_free(pgd_cache, pgd);
+               return NULL;
+       }
+
+       /* Allocate pmds, remember virtual addresses. */
+       for (i = 0; i < PTRS_PER_PGD; ++i) {
+               pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+               if (!pmd[i])
+                       goto out_oom;
+       }
+
+       spin_lock_irqsave(&pgd_lock, flags);
+
+       /* Protect against save/restore: move below 4GB under pgd_lock. */
+       if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
+               int rc = xen_create_contiguous_region(
+                       (unsigned long)pgd, 0, 32);
+               if (rc) {
+                       spin_unlock_irqrestore(&pgd_lock, flags);
+                       goto out_oom;
                }
-               pgd_list_add(pgd);
-               spin_unlock_irqrestore(&pgd_lock, flags);
-       }
+       }
+
+       /* Copy kernel pmd contents and write-protect the new pmds. */
+       for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
+               unsigned long v = (unsigned long)i << PGDIR_SHIFT;
+               pgd_t *kpgd = pgd_offset_k(v);
+               pud_t *kpud = pud_offset(kpgd, v);
+               pmd_t *kpmd = pmd_offset(kpud, v);
+               memcpy(pmd[i], kpmd, PAGE_SIZE);
+               make_lowmem_page_readonly(
+                       pmd[i], XENFEAT_writable_page_tables);
+       }
+
+       /* It is safe to poke machine addresses of pmds under the pmd_lock. */
+       for (i = 0; i < PTRS_PER_PGD; i++)
+               set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
+
+       /* Ensure this pgd gets picked up and pinned on save/restore. */
+       pgd_list_add(pgd);
+
+       spin_unlock_irqrestore(&pgd_lock, flags);
+
+       kfree(pmd);
 
        return pgd;
 
 out_oom:
-       for (i--; i >= 0; i--)
-               kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+       if (HAVE_SHARED_KERNEL_PMD) {
+               for (i--; i >= 0; i--)
+                       kmem_cache_free(pmd_cache,
+                                       (void *)__va(pgd_val(pgd[i])-1));
+       } else {
+               for (i--; i >= 0; i--)
+                       kmem_cache_free(pmd_cache, pmd[i]);
+               kfree(pmd);
+       }
        kmem_cache_free(pgd_cache, pgd);
        return NULL;
 }
@@ -391,6 +422,14 @@ void pgd_free(pgd_t *pgd)
 {
        int i;
 
+       /*
+        * After this the pgd should not be pinned for the duration of this
+        * function's execution. We should never sleep and thus never race:
+        *  1. User pmds will not become write-protected under our feet due
+        *     to a concurrent mm_pin_all().
+        *  2. The machine addresses in PGD entries will not become invalid
+        *     due to a concurrent save/restore.
+        */
        pgd_test_and_unpin(pgd);
 
        /* in the PAE case user pgd entries are overwritten before usage */
@@ -399,11 +438,13 @@ void pgd_free(pgd_t *pgd)
                        pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
                        kmem_cache_free(pmd_cache, pmd);
                }
+
                if (!HAVE_SHARED_KERNEL_PMD) {
                        unsigned long flags;
                        spin_lock_irqsave(&pgd_lock, flags);
                        pgd_list_del(pgd);
                        spin_unlock_irqrestore(&pgd_lock, flags);
+
                        for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
                                pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
                                make_lowmem_page_writable(
@@ -411,8 +452,13 @@ void pgd_free(pgd_t *pgd)
                                memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
                                kmem_cache_free(pmd_cache, pmd);
                        }
+
+                       if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+                               xen_destroy_contiguous_region(
+                                       (unsigned long)pgd, 0);
                }
        }
+
        /* in the non-PAE case, free_pgtables() clears user pgd entries */
        kmem_cache_free(pgd_cache, pgd);
 }
@@ -588,7 +634,7 @@ void mm_pin(struct mm_struct *mm)
 void mm_pin(struct mm_struct *mm)
 {
        if (xen_feature(XENFEAT_writable_page_tables))
-           return;
+               return;
        spin_lock(&mm->page_table_lock);
        __pgd_pin(mm->pgd);
        spin_unlock(&mm->page_table_lock);
@@ -597,7 +643,7 @@ void mm_unpin(struct mm_struct *mm)
 void mm_unpin(struct mm_struct *mm)
 {
        if (xen_feature(XENFEAT_writable_page_tables))
-           return;
+               return;
        spin_lock(&mm->page_table_lock);
        __pgd_unpin(mm->pgd);
        spin_unlock(&mm->page_table_lock);
@@ -607,11 +653,17 @@ void mm_pin_all(void)
 {
        struct page *page;
        if (xen_feature(XENFEAT_writable_page_tables))
-           return;
+               return;
        for (page = pgd_list; page; page = (struct page *)page->index) {
                if (!test_bit(PG_pinned, &page->flags))
                        __pgd_pin((pgd_t *)page_address(page));
        }
+}
+
+void _arch_dup_mmap(struct mm_struct *mm)
+{
+       if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
+               mm_pin(mm);
 }
 
 void _arch_exit_mmap(struct mm_struct *mm)
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c        Thu Jun 15 
10:23:57 2006 -0600
@@ -130,6 +130,12 @@ void mm_pin_all(void)
                                  context.unpinned));
 }
 
+void _arch_dup_mmap(struct mm_struct *mm)
+{
+    if (!mm->context.pinned)
+        mm_pin(mm);
+}
+
 void _arch_exit_mmap(struct mm_struct *mm)
 {
     struct task_struct *tsk = current;
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu Jun 15 
10:23:57 2006 -0600
@@ -342,8 +342,20 @@ static void blkfront_closing(struct xenb
 static void blkfront_closing(struct xenbus_device *dev)
 {
        struct blkfront_info *info = dev->dev.driver_data;
+       unsigned long flags;
 
        DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
+
+       if (info->rq == NULL)
+               return;
+
+       spin_lock_irqsave(&blkif_io_lock, flags);
+       /* No more blkif_request(). */
+       blk_stop_queue(info->rq);
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       flush_scheduled_work();
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
 
        xlvbd_del(info);
 
@@ -407,7 +419,8 @@ static void blkif_restart_queue(void *ar
 {
        struct blkfront_info *info = (struct blkfront_info *)arg;
        spin_lock_irq(&blkif_io_lock);
-       kick_pending_request_queues(info);
+       if (info->connected == BLKIF_STATE_CONNECTED)
+               kick_pending_request_queues(info);
        spin_unlock_irq(&blkif_io_lock);
 }
 
@@ -695,6 +708,12 @@ static void blkif_free(struct blkfront_i
        spin_lock_irq(&blkif_io_lock);
        info->connected = suspend ?
                BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+       /* No more blkif_request(). */
+       if (info->rq)
+               blk_stop_queue(info->rq);
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       flush_scheduled_work();
        spin_unlock_irq(&blkif_io_lock);
 
        /* Free resources associated with old device channel. */
@@ -768,17 +787,17 @@ static void blkif_recover(struct blkfron
 
        (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
+       spin_lock_irq(&blkif_io_lock);
+
        /* Now safe for us to use the shared ring */
-       spin_lock_irq(&blkif_io_lock);
        info->connected = BLKIF_STATE_CONNECTED;
-       spin_unlock_irq(&blkif_io_lock);
 
        /* Send off requeued requests */
        flush_requests(info);
 
        /* Kick any other new requests queued since we resumed */
-       spin_lock_irq(&blkif_io_lock);
        kick_pending_request_queues(info);
+
        spin_unlock_irq(&blkif_io_lock);
 }
 
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c    Thu Jun 15 10:23:57 
2006 -0600
@@ -334,6 +334,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
 
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+       struct gnttab_free_callback **pcb;
+       unsigned long flags;
+
+       spin_lock_irqsave(&gnttab_list_lock, flags);
+       for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+               if (*pcb == callback) {
+                       *pcb = callback->next;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
 #ifndef __ia64__
 static int map_pte_fn(pte_t *pte, struct page *pmd_page,
                      unsigned long addr, void *data)
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
--- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c    Thu Jun 15 10:02:53 
2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c    Thu Jun 15 10:23:57 
2006 -0600
@@ -121,8 +121,15 @@ static int __init skbuff_init(void)
        for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
                size = PAGE_SIZE << order;
                sprintf(name[order], "xen-skb-%lu", size);
-               skbuff_order_cachep[order] = kmem_cache_create(
-                       name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+               if (is_running_on_xen() &&
+                   (xen_start_info->flags & SIF_PRIVILEGED))
+                       skbuff_order_cachep[order] = kmem_cache_create(
+                               name[order], size, size, 0,
+                               skbuff_ctor, skbuff_dtor);
+               else
+                       skbuff_order_cachep[order] = kmem_cache_create(
+                               name[order], size, size, 0, NULL, NULL);
+                       
        }
 
        skbuff_cachep = skbuff_order_cachep[0];
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Jun 15 
10:23:57 2006 -0600
@@ -1072,68 +1072,39 @@ static void xennet_set_features(struct n
 
 static void network_connect(struct net_device *dev)
 {
-       struct netfront_info *np;
+       struct netfront_info *np = netdev_priv(dev);
        int i, requeue_idx;
-       struct netif_tx_request *tx;
        struct sk_buff *skb;
 
        xennet_set_features(dev);
 
-       np = netdev_priv(dev);
        spin_lock_irq(&np->tx_lock);
        spin_lock(&np->rx_lock);
 
-       /* Recovery procedure: */
-
        /*
-        * Step 1: Rebuild the RX and TX ring contents.
-        * NB. We could just free the queued TX packets now but we hope
-        * that sending them out might do some good.  We have to rebuild
-        * the RX ring because some of our pages are currently flipped out
-        * so we can't just free the RX skbs.
-        * NB2. Freelist index entries are always going to be less than
+         * Recovery procedure:
+        *  NB. Freelist index entries are always going to be less than
         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
-        * greater than PAGE_OFFSET: we use this property to distinguish
-        * them.
-        */
-
-       /*
-        * Rebuild the TX buffer freelist and the TX ring itself.
-        * NB. This reorders packets.  We could keep more private state
-        * to avoid this but maybe it doesn't matter so much given the
-        * interface has been down.
-        */
+        *  greater than PAGE_OFFSET: we use this property to distinguish
+        *  them.
+         */
+
+       /* Step 1: Discard all pending TX packet fragments. */
        for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
                if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
                        continue;
 
                skb = np->tx_skbs[i];
-
-               tx = RING_GET_REQUEST(&np->tx, requeue_idx);
-               requeue_idx++;
-
-               tx->id = i;
-               gnttab_grant_foreign_access_ref(
-                       np->grant_tx_ref[i], np->xbdev->otherend_id,
-                       virt_to_mfn(np->tx_skbs[i]->data),
-                       GNTMAP_readonly);
-               tx->gref = np->grant_tx_ref[i];
-               tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
-               tx->size = skb->len;
-               tx->flags = 0;
-               if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
-                       tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
-               if (skb->proto_data_valid) /* remote but checksummed? */
-                       tx->flags |= NETTXF_data_validated;
-
-               np->stats.tx_bytes += skb->len;
-               np->stats.tx_packets++;
-       }
-
-       np->tx.req_prod_pvt = requeue_idx;
-       RING_PUSH_REQUESTS(&np->tx);
-
-       /* Rebuild the RX buffer freelist and the RX ring itself. */
+               gnttab_end_foreign_access_ref(
+                       np->grant_tx_ref[i], GNTMAP_readonly);
+               gnttab_release_grant_reference(
+                       &np->gref_tx_head, np->grant_tx_ref[i]);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+               add_id_to_freelist(np->tx_skbs, i);
+               dev_kfree_skb_irq(skb);
+       }
+
+       /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
        for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) {
                if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET)
                        continue;
@@ -1150,7 +1121,7 @@ static void network_connect(struct net_d
        RING_PUSH_REQUESTS(&np->rx);
 
        /*
-        * Step 2: All public and private state should now be sane.  Get
+        * Step 3: All public and private state should now be sane.  Get
         * ready to start sending and receiving packets and give the driver
         * domain a kick because we've probably just requeued some
         * packets.
@@ -1158,6 +1129,7 @@ static void network_connect(struct net_d
        netif_carrier_on(dev);
        notify_remote_via_irq(np->irq);
        network_tx_buf_gc(dev);
+       network_alloc_rx_buffers(dev);
 
        spin_unlock(&np->rx_lock);
        spin_unlock_irq(&np->tx_lock);
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h  Thu Jun 15 
10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h  Thu Jun 15 
10:23:57 2006 -0600
@@ -18,4 +18,8 @@ extern void _arch_exit_mmap(struct mm_st
 extern void _arch_exit_mmap(struct mm_struct *mm);
 #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
 
+/* kernel/fork.c:dup_mmap hook */
+extern void _arch_dup_mmap(struct mm_struct *mm);
+#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
+
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h  Thu Jun 
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h  Thu Jun 
15 10:23:57 2006 -0600
@@ -51,8 +51,7 @@ static inline void switch_mm(struct mm_s
        struct mmuext_op _op[2], *op = _op;
 
        if (likely(prev != next)) {
-               if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
-                       mm_pin(next);
+               BUG_ON(!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags));
 
                /* stop flush ipis for the previous mm */
                cpu_clear(cpu, prev->cpu_vm_mask);
@@ -99,7 +98,11 @@ static inline void switch_mm(struct mm_s
 #define deactivate_mm(tsk, mm) \
        asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
 
-#define activate_mm(prev, next) \
-       switch_mm((prev),(next),NULL)
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+       if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
+               mm_pin(next);
+       switch_mm(prev, next, NULL);
+}
 
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Thu Jun 
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h  Thu Jun 
15 10:23:57 2006 -0600
@@ -25,9 +25,9 @@ static char * __init machine_specific_me
        if ( rc == -ENOSYS ) {
                memmap.nr_entries = 1;
                map[0].addr = 0ULL;
-               map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
+               map[0].size = PFN_PHYS(xen_start_info->nr_pages);
                /* 8MB slack (to balance backend allocations). */
-               map[0].size += 8 << 20;
+               map[0].size += 8ULL << 20;
                map[0].type = E820_RAM;
                rc = 0;
        }
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h        Thu Jun 
15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h        Thu Jun 
15 10:23:57 2006 -0600
@@ -28,6 +28,10 @@ extern spinlock_t mm_unpinned_lock;
 /* mm/memory.c:exit_mmap hook */
 extern void _arch_exit_mmap(struct mm_struct *mm);
 #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
+
+/* kernel/fork.c:dup_mmap hook */
+extern void _arch_dup_mmap(struct mm_struct *mm);
+#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
 #endif
 
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e 
linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h        
Thu Jun 15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h        
Thu Jun 15 10:23:57 2006 -0600
@@ -73,8 +73,7 @@ static inline void switch_mm(struct mm_s
        struct mmuext_op _op[3], *op = _op;
 
        if (likely(prev != next)) {
-               if (!next->context.pinned)
-                       mm_pin(next);
+               BUG_ON(!next->context.pinned);
 
                /* stop flush ipis for the previous mm */
                clear_bit(cpu, &prev->cpu_vm_mask);
@@ -127,8 +126,11 @@ static inline void switch_mm(struct mm_s
        asm volatile("movl %0,%%fs"::"r"(0));  \
 } while(0)
 
-#define activate_mm(prev, next) do {           \
-       switch_mm((prev),(next),NULL);          \
-} while (0)
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+       if (!next->context.pinned)
+               mm_pin(next);
+       switch_mm(prev, next, NULL);
+}
 
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e linux-2.6-xen-sparse/include/xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:02:53 2006 -0600
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:23:57 2006 -0600
@@ -100,6 +100,7 @@ void gnttab_release_grant_reference(gran
 
 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
                                  void (*fn)(void *), void *arg, u16 count);
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
 
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
                                     unsigned long frame, int readonly);
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/io.c Thu Jun 15 10:23:57 2006 -0600
@@ -24,8 +24,8 @@
 #include "io.h"
 #include <xenctrl.h>
 #include <xs.h>
-#include <xen/linux/evtchn.h>
 #include <xen/io/console.h>
+#include <xenctrl.h>
 
 #include <malloc.h>
 #include <stdlib.h>
@@ -36,7 +36,6 @@
 #include <unistd.h>
 #include <termios.h>
 #include <stdarg.h>
-#include <sys/ioctl.h>
 #include <sys/mman.h>
 
 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
@@ -64,18 +63,11 @@ struct domain
        char *conspath;
        int ring_ref;
        evtchn_port_t local_port;
-       int evtchn_fd;
+       int xce_handle;
        struct xencons_interface *interface;
 };
 
 static struct domain *dom_head;
-
-static void evtchn_notify(struct domain *dom)
-{
-       struct ioctl_evtchn_notify notify;
-       notify.port = dom->local_port;
-       (void)ioctl(dom->evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
-}
 
 static void buffer_append(struct domain *dom)
 {
@@ -106,7 +98,7 @@ static void buffer_append(struct domain 
 
        mb();
        intf->out_cons = cons;
-       evtchn_notify(dom);
+       xc_evtchn_notify(dom->xce_handle, dom->local_port);
 
        if (buffer->max_capacity &&
            buffer->size > buffer->max_capacity) {
@@ -234,7 +226,6 @@ static int domain_create_ring(struct dom
 static int domain_create_ring(struct domain *dom)
 {
        int err, remote_port, ring_ref, rc;
-       struct ioctl_evtchn_bind_interdomain bind;
 
        err = xs_gather(xs, dom->conspath,
                        "ring-ref", "%u", &ring_ref,
@@ -258,24 +249,24 @@ static int domain_create_ring(struct dom
        }
 
        dom->local_port = -1;
-       if (dom->evtchn_fd != -1)
-               close(dom->evtchn_fd);
+       if (dom->xce_handle != -1)
+               xc_evtchn_close(dom->xce_handle);
 
        /* Opening evtchn independently for each console is a bit
         * wasteful, but that's how the code is structured... */
-       dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR);
-       if (dom->evtchn_fd == -1) {
+       dom->xce_handle = xc_evtchn_open();
+       if (dom->xce_handle == -1) {
                err = errno;
                goto out;
        }
  
-       bind.remote_domain = dom->domid;
-       bind.remote_port   = remote_port;
-       rc = ioctl(dom->evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+       rc = xc_evtchn_bind_interdomain(dom->xce_handle,
+               dom->domid, remote_port);
+
        if (rc == -1) {
                err = errno;
-               close(dom->evtchn_fd);
-               dom->evtchn_fd = -1;
+               xc_evtchn_close(dom->xce_handle);
+               dom->xce_handle = -1;
                goto out;
        }
        dom->local_port = rc;
@@ -285,8 +276,8 @@ static int domain_create_ring(struct dom
 
                if (dom->tty_fd == -1) {
                        err = errno;
-                       close(dom->evtchn_fd);
-                       dom->evtchn_fd = -1;
+                       xc_evtchn_close(dom->xce_handle);
+                       dom->xce_handle = -1;
                        dom->local_port = -1;
                        goto out;
                }
@@ -344,7 +335,7 @@ static struct domain *create_domain(int 
        dom->ring_ref = -1;
        dom->local_port = -1;
        dom->interface = NULL;
-       dom->evtchn_fd = -1;
+       dom->xce_handle = -1;
 
        if (!watch_domain(dom, true))
                goto out;
@@ -409,9 +400,9 @@ static void shutdown_domain(struct domai
        if (d->interface != NULL)
                munmap(d->interface, getpagesize());
        d->interface = NULL;
-       if (d->evtchn_fd != -1)
-               close(d->evtchn_fd);
-       d->evtchn_fd = -1;
+       if (d->xce_handle != -1)
+               xc_evtchn_close(d->xce_handle);
+       d->xce_handle = -1;
        cleanup_domain(d);
 }
 
@@ -483,7 +474,7 @@ static void handle_tty_read(struct domai
                }
                wmb();
                intf->in_prod = prod;
-               evtchn_notify(dom);
+               xc_evtchn_notify(dom->xce_handle, dom->local_port);
        } else {
                close(dom->tty_fd);
                dom->tty_fd = -1;
@@ -516,14 +507,14 @@ static void handle_tty_write(struct doma
 
 static void handle_ring_read(struct domain *dom)
 {
-       evtchn_port_t v;
-
-       if (!read_sync(dom->evtchn_fd, &v, sizeof(v)))
+       evtchn_port_t port;
+
+       if ((port = xc_evtchn_pending(dom->xce_handle)) == -1)
                return;
 
        buffer_append(dom);
 
-       (void)write_sync(dom->evtchn_fd, &v, sizeof(v));
+       (void)xc_evtchn_unmask(dom->xce_handle, port);
 }
 
 static void handle_xs(void)
@@ -566,9 +557,10 @@ void handle_io(void)
                max_fd = MAX(xs_fileno(xs), max_fd);
 
                for (d = dom_head; d; d = d->next) {
-                       if (d->evtchn_fd != -1) {
-                               FD_SET(d->evtchn_fd, &readfds);
-                               max_fd = MAX(d->evtchn_fd, max_fd);
+                       if (d->xce_handle != -1) {
+                               int evtchn_fd = xc_evtchn_fd(d->xce_handle);
+                               FD_SET(evtchn_fd, &readfds);
+                               max_fd = MAX(evtchn_fd, max_fd);
                        }
 
                        if (d->tty_fd != -1) {
@@ -588,8 +580,8 @@ void handle_io(void)
 
                for (d = dom_head; d; d = n) {
                        n = d->next;
-                       if (d->evtchn_fd != -1 &&
-                           FD_ISSET(d->evtchn_fd, &readfds))
+                       if (d->xce_handle != -1 &&
+                           FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds))
                                handle_ring_read(d);
 
                        if (d->tty_fd != -1) {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/utils.c
--- a/tools/console/daemon/utils.c      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/utils.c      Thu Jun 15 10:23:57 2006 -0600
@@ -38,32 +38,6 @@
 
 struct xs_handle *xs;
 int xc;
-
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
-{
-       size_t offset = 0;
-       ssize_t len;
-
-       while (offset < size) {
-               if (do_read) {
-                       len = read(fd, data + offset, size - offset);
-               } else {
-                       len = write(fd, data + offset, size - offset);
-               }
-
-               if (len < 1) {
-                       if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
-                               continue;
-                       } else {
-                               return false;
-                       }
-               } else {
-                       offset += len;
-               }
-       }
-
-       return true;
-}
 
 static void child_exit(int sig)
 {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/console/daemon/utils.h
--- a/tools/console/daemon/utils.h      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/console/daemon/utils.h      Thu Jun 15 10:23:57 2006 -0600
@@ -29,9 +29,6 @@
 
 void daemonize(const char *pidfile);
 bool xen_setup(void);
-#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
-#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
-bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
 
 extern struct xs_handle *xs;
 extern int xc;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/ioemu/sdl.c
--- a/tools/ioemu/sdl.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/ioemu/sdl.c Thu Jun 15 10:23:57 2006 -0600
@@ -376,13 +376,18 @@ static void sdl_update_caption(void)
 
 static void sdl_hide_cursor(void)
 {
-    SDL_SetCursor(sdl_cursor_hidden);
+    if (kbd_mouse_is_absolute()) {
+       SDL_ShowCursor(1);
+       SDL_SetCursor(sdl_cursor_hidden);
+    } else {
+       SDL_ShowCursor(0);
+    }
 }
 
 static void sdl_show_cursor(void)
 {
     if (!kbd_mouse_is_absolute()) {
-       SDL_SetCursor(sdl_cursor_normal);
+       SDL_ShowCursor(1);
     }
 }
 
diff -r 7f67c15e2c91 -r fbc0e953732e tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/ioemu/target-i386-dm/helper2.c      Thu Jun 15 10:23:57 2006 -0600
@@ -47,11 +47,9 @@
 
 #include <limits.h>
 #include <fcntl.h>
-#include <sys/ioctl.h>
 
 #include <xenctrl.h>
 #include <xen/hvm/ioreq.h>
-#include <xen/linux/evtchn.h>
 
 #include "cpu.h"
 #include "exec-all.h"
@@ -123,7 +121,7 @@ target_ulong cpu_get_phys_page_debug(CPU
 }
 
 //the evtchn fd for polling
-int evtchn_fd = -1;
+int xce_handle = -1;
 
 //which vcpu we are serving
 int send_vcpu = 0;
@@ -170,11 +168,10 @@ static ioreq_t* __cpu_get_ioreq(int vcpu
 //retval--the number of ioreq packet
 static ioreq_t* cpu_get_ioreq(void)
 {
-    int i, rc;
+    int i;
     evtchn_port_t port;
 
-    rc = read(evtchn_fd, &port, sizeof(port));
-    if ( rc == sizeof(port) ) {
+    if ( (port = xc_evtchn_pending(xce_handle)) != -1 ) {
         for ( i = 0; i < vcpus; i++ )
             if ( shared_page->vcpu_iodata[i].dm_eport == port )
                 break;
@@ -184,8 +181,7 @@ static ioreq_t* cpu_get_ioreq(void)
             exit(1);
         }
 
-        // unmask the wanted port again
-        write(evtchn_fd, &port, sizeof(port));
+       xc_evtchn_unmask(xce_handle, port);
 
         //get the io packet from shared memory
         send_vcpu = i;
@@ -436,6 +432,7 @@ int main_loop(void)
     extern int shutdown_requested;
     CPUState *env = global_env;
     int retval;
+    int evtchn_fd = xc_evtchn_fd(xce_handle);
     extern void main_loop_wait(int);
 
     /* Watch stdin (fd 0) to see when it has input. */
@@ -475,11 +472,9 @@ int main_loop(void)
         main_loop_wait(0);
 
         if (env->send_event) {
-            struct ioctl_evtchn_notify notify;
-
             env->send_event = 0;
-            notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport;
-            (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
+            (void)xc_evtchn_notify(xce_handle,
+                 shared_page->vcpu_iodata[send_vcpu].dm_eport);
         }
     }
     destroy_hvm_domain();
@@ -511,7 +506,6 @@ CPUState * cpu_init()
 CPUState * cpu_init()
 {
     CPUX86State *env;
-    struct ioctl_evtchn_bind_interdomain bind;
     int i, rc;
 
     cpu_exec_init();
@@ -523,21 +517,19 @@ CPUState * cpu_init()
 
     cpu_single_env = env;
 
-    if (evtchn_fd != -1)//the evtchn has been opened by another cpu object
+    if (xce_handle != -1)//the evtchn has been opened by another cpu object
         return NULL;
 
-    //use nonblock reading not polling, may change in future.
-    evtchn_fd = open("/dev/xen/evtchn", O_RDWR|O_NONBLOCK);
-    if (evtchn_fd == -1) {
+    xce_handle = xc_evtchn_open();
+    if (xce_handle == -1) {
         fprintf(logfile, "open evtchn device error %d\n", errno);
         return NULL;
     }
 
     /* FIXME: how about if we overflow the page here? */
-    bind.remote_domain = domid;
     for ( i = 0; i < vcpus; i++ ) {
-        bind.remote_port = shared_page->vcpu_iodata[i].vp_eport;
-        rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+        rc = xc_evtchn_bind_interdomain(xce_handle, domid,
+            shared_page->vcpu_iodata[i].vp_eport);
         if ( rc == -1 ) {
             fprintf(logfile, "bind interdomain ioctl error %d\n", errno);
             return NULL;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_elf.h
--- a/tools/libxc/xc_elf.h      Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_elf.h      Thu Jun 15 10:23:57 2006 -0600
@@ -170,13 +170,14 @@ typedef struct {
 #define EM_PARISC      15              /* HPPA */
 #define EM_SPARC32PLUS 18              /* Enhanced instruction set SPARC */
 #define EM_PPC         20              /* PowerPC */
+#define EM_PPC64       21              /* PowerPC 64-bit */
 #define EM_ARM         40              /* Advanced RISC Machines ARM */
 #define EM_ALPHA       41              /* DEC ALPHA */
 #define EM_SPARCV9     43              /* SPARC version 9 */
 #define EM_ALPHA_EXP   0x9026          /* DEC ALPHA */
+#define EM_IA_64       50              /* Intel Merced */
 #define EM_X86_64      62              /* AMD x86-64 architecture */
 #define EM_VAX         75              /* DEC VAX */
-#define EM_NUM         15              /* number of machine types */
 
 /* Version */
 #define EV_NONE                0               /* Invalid */
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_linux.c
--- a/tools/libxc/xc_linux.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_linux.c    Thu Jun 15 10:23:57 2006 -0600
@@ -103,6 +103,124 @@ int do_xen_hypercall(int xc_handle, priv
                       (unsigned long)hypercall);
 }
 
+#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
+#define EVTCHN_DEV_MAJOR 10
+#define EVTCHN_DEV_MINOR 201
+
+int xc_evtchn_open(void)
+{
+    struct stat st;
+    int fd;
+
+    /* Make sure any existing device file links to correct device. */
+    if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
+        (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
+        (void)unlink(EVTCHN_DEV_NAME);
+
+reopen:
+    if ( (fd = open(EVTCHN_DEV_NAME, O_RDWR)) == -1 )
+    {
+        if ( (errno == ENOENT) &&
+            ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
+            (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
+            makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0) )
+            goto reopen;
+
+        PERROR("Could not open event channel interface");
+        return -1;
+    }
+
+    return fd;
+}
+
+int xc_evtchn_close(int xce_handle)
+{
+    return close(xce_handle);
+}
+
+int xc_evtchn_fd(int xce_handle)
+{
+    return xce_handle;
+}
+
+int xc_evtchn_notify(int xce_handle, evtchn_port_t port)
+{
+    struct ioctl_evtchn_notify notify;
+
+    notify.port = port;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, &notify);
+}
+
+evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
+    evtchn_port_t remote_port)
+{
+    struct ioctl_evtchn_bind_interdomain bind;
+
+    bind.remote_domain = domid;
+    bind.remote_port = remote_port;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+}
+
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
+{
+    struct ioctl_evtchn_unbind unbind;
+
+    unbind.port = port;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
+}
+
+evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
+{
+    struct ioctl_evtchn_bind_virq bind;
+
+    bind.virq = virq;
+
+    return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
+}
+
+static int dorw(int fd, char *data, size_t size, int do_write)
+{
+    size_t offset = 0;
+    ssize_t len;
+
+    while ( offset < size )
+    {
+        if (do_write)
+            len = write(fd, data + offset, size - offset);
+        else
+            len = read(fd, data + offset, size - offset);
+
+        if ( len == -1 )
+        {
+             if ( errno == EINTR )
+                 continue;
+             return -1;
+        }
+
+        offset += len;
+    }
+
+    return 0;
+}
+
+evtchn_port_t xc_evtchn_pending(int xce_handle)
+{
+    evtchn_port_t port;
+
+    if ( dorw(xce_handle, (char *)&port, sizeof(port), 0) == -1 )
+        return -1;
+
+    return port;
+}
+
+int xc_evtchn_unmask(int xce_handle, evtchn_port_t port)
+{
+    return dorw(xce_handle, (char *)&port, sizeof(port), 1);
+}
+
 /*
  * Local variables:
  * mode: C
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_linux_restore.c    Thu Jun 15 10:23:57 2006 -0600
@@ -456,6 +456,15 @@ int xc_linux_restore(int xc_handle, int 
         n+= j; /* crude stats */
     }
 
+    /*
+     * Ensure we flush all machphys updates before potential PAE-specific
+     * reallocations below.
+     */
+    if (xc_finish_mmu_updates(xc_handle, mmu)) {
+        ERR("Error doing finish_mmu_updates()");
+        goto out;
+    }
+
     DPRINTF("Received all pages (%d races)\n", nraces);
 
     if ((pt_levels == 3) && !pae_extended_cr3) {
@@ -550,14 +559,11 @@ int xc_linux_restore(int xc_handle, int 
             }
         }
 
-    }
-
-
-    if (xc_finish_mmu_updates(xc_handle, mmu)) {
-        ERR("Error doing finish_mmu_updates()");
-        goto out;
-    }
-
+        if (xc_finish_mmu_updates(xc_handle, mmu)) {
+            ERR("Error doing finish_mmu_updates()");
+            goto out;
+        }
+    }
 
     /*
      * Pin page tables. Do this after writing to them as otherwise Xen
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xc_load_elf.c Thu Jun 15 10:23:57 2006 -0600
@@ -21,6 +21,24 @@ loadelfsymtab(
 loadelfsymtab(
     const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
     struct domain_setup_info *dsi);
+
+/*
+ * Elf header attributes we require for each supported host platform.
+ * These are checked in parseelfimage().
+ */
+#if defined(__ia64__)
+#define ELFCLASS   ELFCLASS64
+#define ELFDATA    ELFDATA2LSB
+#define ELFMACHINE EM_IA_64
+#elif defined(__i386__)
+#define ELFCLASS   ELFCLASS32
+#define ELFDATA    ELFDATA2LSB
+#define ELFMACHINE EM_386
+#elif defined(__x86_64__)
+#define ELFCLASS   ELFCLASS64
+#define ELFDATA    ELFDATA2LSB
+#define ELFMACHINE EM_X86_64
+#endif
 
 int probe_elf(const char *image,
               unsigned long image_size,
@@ -61,16 +79,10 @@ static int parseelfimage(const char *ima
         return -EINVAL;
     }
 
-    if (
-#if defined(__i386__)
-        (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
-        (ehdr->e_machine != EM_386) ||
-#elif defined(__x86_64__)
-        (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
-        (ehdr->e_machine != EM_X86_64) ||
-#endif
-        (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
-        (ehdr->e_type != ET_EXEC) )
+    if ( (ehdr->e_ident[EI_CLASS] != ELFCLASS) ||
+         (ehdr->e_machine != ELFMACHINE) ||
+         (ehdr->e_ident[EI_DATA] != ELFDATA) ||
+         (ehdr->e_type != ET_EXEC) )
     {
         ERROR("Kernel not a Xen-compatible Elf image.");
         return -EINVAL;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/libxc/xenctrl.h     Thu Jun 15 10:23:57 2006 -0600
@@ -604,4 +604,58 @@ int xc_finish_mmu_updates(int xc_handle,
 
 int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size);
 
+/*
+ * Return a handle to the event channel driver, or -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_open(void);
+
+/*
+ * Close a handle previously allocated with xc_evtchn_open().
+ */
+int xc_evtchn_close(int xce_handle);
+
+/*
+ * Return an fd that can be select()ed on for further calls to
+ * xc_evtchn_pending().
+ */
+int xc_evtchn_fd(int xce_handle);
+
+/*
+ * Notify the given event channel. Returns -1 on failure, in which case
+ * errno will be set appropriately.
+ */
+int xc_evtchn_notify(int xce_handle, evtchn_port_t port);
+
+/*
+ * Returns a new event port bound to the remote port for the given domain ID,
+ * or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
+    evtchn_port_t remote_port);
+
+/*
+ * Unbind the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unbind(int xce_handle, evtchn_port_t port);
+
+/*
+ * Bind an event channel to the given VIRQ. Returns the event channel bound to
+ * the VIRQ, or -1 on failure, in which case errno will be set appropriately.
+ */
+evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq);
+
+/*
+ * Return the next event channel to become pending, or -1 on failure, in which
+ * case errno will be set appropriately.  
+ */
+evtchn_port_t xc_evtchn_pending(int xce_handle);
+
+/*
+ * Unmask the given event channel. Returns -1 on failure, in which case errno
+ * will be set appropriately.
+ */
+int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
+
 #endif
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/util/security.py
--- a/tools/python/xen/util/security.py Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/util/security.py Thu Jun 15 10:23:57 2006 -0600
@@ -52,7 +52,8 @@ binary_name_re = re.compile(".*[chwall|s
 binary_name_re = re.compile(".*[chwall|ste|chwall_ste].*\.bin", re.IGNORECASE)
 policy_name_re = re.compile(".*[chwall|ste|chwall_ste].*", re.IGNORECASE)
 
-
+#other global variables
+NULL_SSIDREF = 0
 
 log = logging.getLogger("xend.util.security")
 
@@ -255,6 +256,8 @@ def ssidref2label(ssidref_var):
     #2. get labelnames for both ssidref parts
     pri_ssid = ssidref & 0xffff
     sec_ssid = ssidref >> 16
+    pri_null_ssid = NULL_SSIDREF & 0xffff
+    sec_null_ssid = NULL_SSIDREF >> 16
     pri_labels = []
     sec_labels = []
     labels = []
@@ -270,7 +273,11 @@ def ssidref2label(ssidref_var):
     f.close()
 
     #3. get the label that is in both lists (combination must be a single 
label)
-    if secondary == "NULL":
+    if (primary == "CHWALL") and (pri_ssid == pri_null_ssid) and (sec_ssid != 
sec_null_ssid):
+        labels = sec_labels
+    elif (secondary == "CHWALL") and (pri_ssid != pri_null_ssid) and (sec_ssid 
== sec_null_ssid):
+        labels = pri_labels
+    elif secondary == "NULL":
         labels = pri_labels
     else:
         for i in pri_labels:
@@ -285,7 +292,7 @@ def ssidref2label(ssidref_var):
 
 
 
-def label2ssidref(labelname, policyname):
+def label2ssidref(labelname, policyname, type):
     """
     returns ssidref corresponding to labelname;
     maps current policy to default directory
@@ -293,6 +300,14 @@ def label2ssidref(labelname, policyname)
 
     if policyname in ['NULL', 'INACTIVE', 'DEFAULT']:
         err("Cannot translate labels for \'" + policyname + "\' policy.")
+
+    allowed_types = ['ANY']
+    if type == 'dom':
+        allowed_types.append('VM')
+    elif type == 'res':
+        allowed_types.append('RES')
+    else:
+        err("Invalid type.  Must specify 'dom' or 'res'.")
 
     (primary, secondary, f, pol_exists) = getmapfile(policyname)
 
@@ -303,11 +318,15 @@ def label2ssidref(labelname, policyname)
         l = line.split()
         if (len(l) < 5) or (l[0] != "LABEL->SSID"):
             continue
-        if primary and (l[2] == primary) and (l[3] == labelname):
+        if primary and (l[1] in allowed_types) and (l[2] == primary) and (l[3] 
== labelname):
             pri_ssid.append(int(l[4], 16))
-        if secondary and (l[2] == secondary) and (l[3] == labelname):
+        if secondary and (l[1] in allowed_types) and (l[2] == secondary) and 
(l[3] == labelname):
             sec_ssid.append(int(l[4], 16))
     f.close()
+    if (type == 'res') and (primary == "CHWALL") and (len(pri_ssid) == 0):
+        pri_ssid.append(NULL_SSIDREF)
+    elif (type == 'res') and (secondary == "CHWALL") and (len(sec_ssid) == 0):
+        sec_ssid.append(NULL_SSIDREF)
 
     #3. sanity check and composition of ssidref
     if (len(pri_ssid) == 0) or ((len(sec_ssid) == 0) and (secondary != 
"NULL")):
@@ -360,7 +379,7 @@ def refresh_ssidref(config):
         err("Policy \'" + policyname + "\' in label does not match active 
policy \'"
             + active_policy +"\'!")
 
-    new_ssidref = label2ssidref(labelname, policyname)
+    new_ssidref = label2ssidref(labelname, policyname, 'dom')
     if not new_ssidref:
         err("SSIDREF refresh failed!")
 
@@ -409,7 +428,7 @@ def get_decision(arg1, arg2):
     enables domains to retrieve access control decisions from
     the hypervisor Access Control Module.
     IN: args format = ['domid', id] or ['ssidref', ssidref]
-    or ['access_control', ['policy', policy], ['label', label]]
+    or ['access_control', ['policy', policy], ['label', label], ['type', type]]
     """
 
     if not on():
@@ -417,14 +436,14 @@ def get_decision(arg1, arg2):
 
     #translate labels before calling low-level function
     if arg1[0] == 'access_control':
-        if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') :
+        if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') or (arg1[3][0] 
!= 'type'):
             err("Argument type not supported.")
-        ssidref = label2ssidref(arg1[2][1], arg1[1][1])
+        ssidref = label2ssidref(arg1[2][1], arg1[1][1], arg1[3][1])
         arg1 = ['ssidref', str(ssidref)]
     if arg2[0] == 'access_control':
-        if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') :
+        if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') or (arg2[3][0] 
!= 'type'):
             err("Argument type not supported.")
-        ssidref = label2ssidref(arg2[2][1], arg2[1][1])
+        ssidref = label2ssidref(arg2[2][1], arg2[1][1], arg2[3][1])
         arg2 = ['ssidref', str(ssidref)]
 
     # accept only int or string types for domid and ssidref
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/addlabel.py
--- a/tools/python/xen/xm/addlabel.py   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/addlabel.py   Thu Jun 15 10:23:57 2006 -0600
@@ -50,7 +50,7 @@ def main(argv):
             err("No active policy. Policy must be specified in command line.")
 
         #sanity checks: make sure this label can be instantiated later on
-        ssidref = label2ssidref(label, policyref)
+        ssidref = label2ssidref(label, policyref, 'dom')
 
         new_label = "access_control = ['policy=%s,label=%s']\n" % (policyref, 
label)
         if not os.path.isfile(configfile):
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/create.py     Thu Jun 15 10:23:57 2006 -0600
@@ -541,7 +541,7 @@ def configure_security(config, vals):
         if sxp.child_value(config, 'ssidref'):
             err("ERROR: SSIDREF and access_control are mutually exclusive but 
both specified!")
         #else calculate ssidre from label
-        ssidref = security.label2ssidref(label, policy)
+        ssidref = security.label2ssidref(label, policy, 'dom')
         if not ssidref :
             err("ERROR calculating ssidref from access_control.")
         security_label = ['security', [ config_access_control, ['ssidref' , 
ssidref ] ] ]
diff -r 7f67c15e2c91 -r fbc0e953732e tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py       Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/python/xen/xm/main.py       Thu Jun 15 10:23:57 2006 -0600
@@ -1193,6 +1193,9 @@ def main(argv=sys.argv):
             else:
                 print  >>sys.stderr, "Error: %s" % ex.faultString
             sys.exit(1)
+        except (ValueError, OverflowError):
+            err("Invalid argument.")
+            usage(argv[1])
         except:
             print "Unexpected error:", sys.exc_info()[0]
             print
diff -r 7f67c15e2c91 -r fbc0e953732e tools/security/Makefile
--- a/tools/security/Makefile   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/security/Makefile   Thu Jun 15 10:23:57 2006 -0600
@@ -33,7 +33,7 @@ OBJS_XML2BIN := $(patsubst %.c,%.o,$(fil
 
 ACM_INST_TOOLS    = xensec_tool xensec_xml2bin xensec_gen
 ACM_OBJS          = $(OBJS_TOOL) $(OBJS_XML2BIN) $(OBJS_GETD)
-ACM_SCRIPTS       = python/xensec_tools/acm_getlabel 
python/xensec_tools/acm_getdecision
+ACM_SCRIPTS       = python/xensec_tools/acm_getlabel
 
 ACM_CONFIG_DIR    = /etc/xen/acm-security
 ACM_POLICY_DIR    = $(ACM_CONFIG_DIR)/policies
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/security/python/xensec_gen/cgi-bin/policy.cgi
--- a/tools/security/python/xensec_gen/cgi-bin/policy.cgi       Thu Jun 15 
10:02:53 2006 -0600
+++ b/tools/security/python/xensec_gen/cgi-bin/policy.cgi       Thu Jun 15 
10:23:57 2006 -0600
@@ -406,7 +406,7 @@ def parsePolicyXml( ):
                                        msg = msg + 'Please validate the Policy 
file used.'
                                        formatXmlError( msg )
 
-                                       allCSMTypes[csName][1] = csMemberList
+                               allCSMTypes[csName][1] = csMemberList
 
        if pOrder != '':
                formPolicyOrder[1] = pOrder
diff -r 7f67c15e2c91 -r fbc0e953732e tools/security/secpol_xml2bin.c
--- a/tools/security/secpol_xml2bin.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/security/secpol_xml2bin.c   Thu Jun 15 10:23:57 2006 -0600
@@ -44,6 +44,8 @@
 
 #define DEBUG    0
 
+#define NULL_LABEL_NAME "__NULL_LABEL__"
+
 /* primary / secondary policy component setting */
 enum policycomponent { CHWALL, STE, NULLPOLICY }
     primary = NULLPOLICY, secondary = NULLPOLICY;
@@ -467,7 +469,7 @@ int init_ssid_queues(void)
         return -ENOMEM;
 
     /* default chwall ssid */
-    default_ssid_chwall->name = "DEFAULT";
+    default_ssid_chwall->name = NULL_LABEL_NAME;
     default_ssid_chwall->num = max_chwall_ssids++;
     default_ssid_chwall->is_ref = 0;
     default_ssid_chwall->type = ANY;
@@ -484,7 +486,7 @@ int init_ssid_queues(void)
     max_chwall_labels++;
 
     /* default ste ssid */
-    default_ssid_ste->name = "DEFAULT";
+    default_ssid_ste->name = NULL_LABEL_NAME;
     default_ssid_ste->num = max_ste_ssids++;
     default_ssid_ste->is_ref = 0;
     default_ssid_ste->type = ANY;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenmon/xenbaked.c   Thu Jun 15 10:23:57 2006 -0600
@@ -33,9 +33,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
@@ -45,7 +42,6 @@
 #include <xen/xen.h>
 #include <string.h>
 #include <sys/select.h>
-#include <xen/linux/evtchn.h>
 
 #define PERROR(_m, _a...)                                       \
 do {                                                            \
@@ -256,51 +252,29 @@ void log_event(int event_id)
         stat_map[0].event_count++;     // other
 }
 
-#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
-#define EVTCHN_DEV_MAJOR 10
-#define EVTCHN_DEV_MINOR 201
-
 int virq_port;
-int eventchn_fd = -1;
+int xce_handle = -1;
 
 /* Returns the event channel handle. */
 /* Stolen from xenstore code */
 int eventchn_init(void)
 {
-  struct stat st;
-  struct ioctl_evtchn_bind_virq bind;
   int rc;
   
   // to revert to old way:
   if (0)
     return -1;
   
-  /* Make sure any existing device file links to correct device. */
-  if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
-      (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
-    (void)unlink(EVTCHN_DEV_NAME);
-  
- reopen:
-  eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
-  if (eventchn_fd == -1) {
-    if ((errno == ENOENT) &&
-       ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
-       (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
-              makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
-      goto reopen;
-    return -errno;
-  }
-  
-  if (eventchn_fd < 0)
+  xce_handle = xc_evtchn_open();
+
+  if (xce_handle < 0)
     perror("Failed to open evtchn device");
   
-  bind.virq = VIRQ_TBUF;
-  rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
-  if (rc == -1)
+  if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
     perror("Failed to bind to domain exception virq port");
   virq_port = rc;
   
-  return eventchn_fd;
+  return xce_handle;
 }
 
 void wait_for_event(void)
@@ -309,27 +283,30 @@ void wait_for_event(void)
   fd_set inset;
   evtchn_port_t port;
   struct timeval tv;
+  int evtchn_fd;
   
-  if (eventchn_fd < 0) {
+  if (xce_handle < 0) {
     nanosleep(&opts.poll_sleep, NULL);
     return;
   }
 
+  evtchn_fd = xc_evtchn_fd(xce_handle);
+
   FD_ZERO(&inset);
-  FD_SET(eventchn_fd, &inset);
+  FD_SET(evtchn_fd, &inset);
   tv.tv_sec = 1;
   tv.tv_usec = 0;
   // tv = millis_to_timespec(&opts.poll_sleep);
-  ret = select(eventchn_fd+1, &inset, NULL, NULL, &tv);
+  ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
   
-  if ( (ret == 1) && FD_ISSET(eventchn_fd, &inset)) {
-    if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+  if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
+    if ((port = xc_evtchn_pending(xce_handle)) == -1)
       perror("Failed to read from event fd");
     
     //    if (port == virq_port)
     //      printf("got the event I was looking for\r\n");
-    
-    if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+
+    if (xc_evtchn_unmask(xce_handle, port) == -1)
       perror("Failed to write to event fd");
   }
 }
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Thu Jun 15 10:23:57 2006 -0600
@@ -223,18 +223,20 @@ xenstat_node *xenstat_get_node(xenstat_h
 
        num_domains = 0;
        do {
-               xenstat_domain *domain;
+               xenstat_domain *domain, *tmp;
 
                new_domains = xc_domain_getinfolist(handle->xc_handle,
                        num_domains, DOMAIN_CHUNK_SIZE, domaininfo);
 
-               node->domains = realloc(node->domains,
-                                       (num_domains + new_domains)
-                                       * sizeof(xenstat_domain));
-               if (node->domains == NULL) {
+               tmp = realloc(node->domains,
+                             (num_domains + new_domains)
+                             * sizeof(xenstat_domain));
+               if (tmp == NULL) {
+                       free(node->domains);
                        free(node);
                        return NULL;
                }
+               node->domains = tmp;
 
                domain = node->domains + num_domains;
 
@@ -582,11 +584,14 @@ static int xenstat_collect_networks(xens
                        domain->num_networks = 1;
                        domain->networks = malloc(sizeof(xenstat_network));
                } else {
+                       struct xenstat_network *tmp;
                        domain->num_networks++;
-                       domain->networks =
-                           realloc(domain->networks,
-                                   domain->num_networks *
-                                   sizeof(xenstat_network));
+                       tmp = realloc(domain->networks,
+                                     domain->num_networks *
+                                     sizeof(xenstat_network));
+                       if (tmp == NULL)
+                               free(domain->networks);
+                       domain->networks = tmp;
                }
                if (domain->networks == NULL)
                        return 0;
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/fake_libxc.c
--- a/tools/xenstore/fake_libxc.c       Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/fake_libxc.c       Thu Jun 15 10:23:57 2006 -0600
@@ -37,7 +37,7 @@ static evtchn_port_t port;
 static evtchn_port_t port;
 
 /* The event channel maps to a signal, shared page to an mmapped file. */
-void evtchn_notify(int local_port)
+void xc_evtchn_notify(int xce_handle, int local_port)
 {
        assert(local_port == port);
        if (kill(xs_test_pid, SIGUSR2) != 0)
@@ -124,7 +124,7 @@ void fake_ack_event(void)
        signal(SIGUSR2, send_to_fd);
 }
 
-int fake_open_eventchn(void)
+int xc_evtchn_open(void)
 {
        int fds[2];
 
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/xenstored_core.c   Thu Jun 15 10:23:57 2006 -0600
@@ -54,7 +54,7 @@
 #include "hashtable.h"
 
 
-extern int eventchn_fd; /* in xenstored_domain.c */
+extern int xce_handle; /* in xenstored_domain.c */
 
 static bool verbose = false;
 LIST_HEAD(connections);
@@ -353,8 +353,11 @@ static int initialize_set(fd_set *inset,
 
        set_fd(sock,               inset, &max);
        set_fd(ro_sock,            inset, &max);
-       set_fd(eventchn_fd,        inset, &max);
        set_fd(reopen_log_pipe[0], inset, &max);
+
+       if (xce_handle != -1)
+               set_fd(xc_evtchn_fd(xce_handle), inset, &max);
+
        list_for_each_entry(i, &connections, list) {
                if (i->domain)
                        continue;
@@ -1769,6 +1772,7 @@ int main(int argc, char *argv[])
        bool outputpid = false;
        bool no_domain_init = false;
        const char *pidfile = NULL;
+       int evtchn_fd = -1;
 
        while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options,
                                  NULL)) != -1) {
@@ -1907,6 +1911,9 @@ int main(int argc, char *argv[])
        signal(SIGUSR1, stop_failtest);
 #endif
 
+       if (xce_handle != -1)
+               evtchn_fd = xc_evtchn_fd(xce_handle);
+
        /* Get ready to listen to the tools. */
        max = initialize_set(&inset, &outset, *sock, *ro_sock);
 
@@ -1934,7 +1941,7 @@ int main(int argc, char *argv[])
                if (FD_ISSET(*ro_sock, &inset))
                        accept_connection(*ro_sock, false);
 
-               if (eventchn_fd > 0 && FD_ISSET(eventchn_fd, &inset))
+               if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
                        handle_event();
 
                list_for_each_entry(i, &connections, list) {
diff -r 7f67c15e2c91 -r fbc0e953732e tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xenstore/xenstored_domain.c Thu Jun 15 10:23:57 2006 -0600
@@ -18,15 +18,10 @@
 */
 
 #include <stdio.h>
-#include <linux/ioctl.h>
-#include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdarg.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
 
 //#define DEBUG
 #include "utils.h"
@@ -37,12 +32,11 @@
 #include "xenstored_test.h"
 
 #include <xenctrl.h>
-#include <xen/sys/evtchn.h>
 
 static int *xc_handle;
 static evtchn_port_t virq_port;
 
-int eventchn_fd = -1; 
+int xce_handle = -1; 
 
 struct domain
 {
@@ -82,19 +76,6 @@ struct domain
 };
 
 static LIST_HEAD(domains);
-
-#ifndef TESTING
-static void evtchn_notify(int port)
-{
-       int rc; 
-
-       struct ioctl_evtchn_notify notify;
-       notify.port = port;
-       rc = ioctl(eventchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
-}
-#else
-extern void evtchn_notify(int port);
-#endif
 
 /* FIXME: Mark connection as broken (close it?) when this happens. */
 static bool check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
@@ -146,7 +127,7 @@ static int writechn(struct connection *c
        mb();
        intf->rsp_prod += len;
 
-       evtchn_notify(conn->domain->port);
+       xc_evtchn_notify(xce_handle, conn->domain->port);
 
        return len;
 }
@@ -176,7 +157,7 @@ static int readchn(struct connection *co
        mb();
        intf->req_cons += len;
 
-       evtchn_notify(conn->domain->port);
+       xc_evtchn_notify(xce_handle, conn->domain->port);
 
        return len;
 }
@@ -184,13 +165,11 @@ static int destroy_domain(void *_domain)
 static int destroy_domain(void *_domain)
 {
        struct domain *domain = _domain;
-       struct ioctl_evtchn_unbind unbind;
 
        list_del(&domain->list);
 
        if (domain->port) {
-               unbind.port = domain->port;
-               if (ioctl(eventchn_fd, IOCTL_EVTCHN_UNBIND, &unbind) == -1)
+               if (xc_evtchn_unbind(xce_handle, domain->port) == -1)
                        eprintf("> Unbinding port %i failed!\n", domain->port);
        }
 
@@ -231,14 +210,14 @@ void handle_event(void)
 {
        evtchn_port_t port;
 
-       if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+       if ((port = xc_evtchn_pending(xce_handle)) == -1)
                barf_perror("Failed to read from event fd");
 
        if (port == virq_port)
                domain_cleanup();
 
 #ifndef TESTING
-       if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
+       if (xc_evtchn_unmask(xce_handle, port) == -1)
                barf_perror("Failed to write to event fd");
 #endif
 }
@@ -269,7 +248,6 @@ static struct domain *new_domain(void *c
                                 int port)
 {
        struct domain *domain;
-       struct ioctl_evtchn_bind_interdomain bind;
        int rc;
 
 
@@ -283,9 +261,7 @@ static struct domain *new_domain(void *c
        talloc_set_destructor(domain, destroy_domain);
 
        /* Tell kernel we're interested in this event. */
-       bind.remote_domain = domid;
-       bind.remote_port   = port;
-       rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
+       rc = xc_evtchn_bind_interdomain(xce_handle, domid, port);
        if (rc == -1)
            return NULL;
        domain->port = rc;
@@ -490,23 +466,14 @@ static int dom0_init(void)
 
        talloc_steal(dom0->conn, dom0); 
 
-       evtchn_notify(dom0->port); 
+       xc_evtchn_notify(xce_handle, dom0->port); 
 
        return 0; 
 }
-
-
-
-#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
-#define EVTCHN_DEV_MAJOR 10
-#define EVTCHN_DEV_MINOR 201
-
 
 /* Returns the event channel handle. */
 int domain_init(void)
 {
-       struct stat st;
-       struct ioctl_evtchn_bind_virq bind;
        int rc;
 
        xc_handle = talloc(talloc_autofree_context(), int);
@@ -519,39 +486,19 @@ int domain_init(void)
 
        talloc_set_destructor(xc_handle, close_xc_handle);
 
-#ifdef TESTING
-       eventchn_fd = fake_open_eventchn();
-       (void)&st;
-#else
-       /* Make sure any existing device file links to correct device. */
-       if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
-           (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
-               (void)unlink(EVTCHN_DEV_NAME);
-
- reopen:
-       eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
-       if (eventchn_fd == -1) {
-               if ((errno == ENOENT) &&
-                   ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
-                   (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
-                          makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
-                       goto reopen;
-               return -errno;
-       }
-#endif
-       if (eventchn_fd < 0)
+       xce_handle = xc_evtchn_open();
+
+       if (xce_handle < 0)
                barf_perror("Failed to open evtchn device");
 
        if (dom0_init() != 0) 
                barf_perror("Failed to initialize dom0 state"); 
 
-       bind.virq = VIRQ_DOM_EXC;
-       rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
-       if (rc == -1)
+       if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_DOM_EXC)) == -1)
                barf_perror("Failed to bind to domain exception virq port");
        virq_port = rc;
 
-       return eventchn_fd;
+       return xce_handle;
 }
 
 void domain_entry_inc(struct connection *conn)
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/xm-test/tests/block-integrity/01_block_device_read_verify.py
--- a/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py        
Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py        
Thu Jun 15 10:23:57 2006 -0600
@@ -31,7 +31,7 @@ traceCommand("cat /dev/urandom > /dev/ra
 
 s, o = traceCommand("md5sum /dev/ram1")
 
-dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
 
 block_attach(domain, "phy:ram1", "hda1")
 
@@ -40,7 +40,7 @@ except ConsoleError, e:
 except ConsoleError, e:
     FAIL(str(e))
 
-domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
 
 domain.closeConsole()
 
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/xm-test/tests/block-integrity/02_block_device_write_verify.py
--- a/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py       
Thu Jun 15 10:02:53 2006 -0600
+++ b/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py       
Thu Jun 15 10:23:57 2006 -0600
@@ -37,7 +37,7 @@ except ConsoleError, e:
 except ConsoleError, e:
     FAIL(str(e))
 
-domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
+domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
 
 domain.closeConsole()
 
@@ -45,7 +45,7 @@ domain.stop()
 
 s, o = traceCommand("md5sum /dev/ram1")
 
-dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
+dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
 
 if domU_md5sum_match == None:
     FAIL("Failed to get md5sum of data written in domU.")
diff -r 7f67c15e2c91 -r fbc0e953732e xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/arch/x86/traps.c      Thu Jun 15 10:23:57 2006 -0600
@@ -1279,7 +1279,7 @@ static void nmi_softirq(void)
 static void nmi_softirq(void)
 {
     /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
-    evtchn_notify(dom0->vcpu[0]);
+    vcpu_kick(dom0->vcpu[0]);
 }
 
 static void nmi_dom0_report(unsigned int reason_idx)
diff -r 7f67c15e2c91 -r fbc0e953732e xen/common/event_channel.c
--- a/xen/common/event_channel.c        Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/common/event_channel.c        Thu Jun 15 10:23:57 2006 -0600
@@ -493,10 +493,9 @@ void evtchn_set_pending(struct vcpu *v, 
 
     if ( !test_bit        (port, s->evtchn_mask) &&
          !test_and_set_bit(port / BITS_PER_LONG,
-                           &v->vcpu_info->evtchn_pending_sel) &&
-         !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
-    {
-        evtchn_notify(v);
+                           &v->vcpu_info->evtchn_pending_sel) )
+    {
+        vcpu_mark_events_pending(v);
     }
     
     /* Check if some VCPU might be polling for this event. */
@@ -682,10 +681,9 @@ static long evtchn_unmask(evtchn_unmask_
     if ( test_and_clear_bit(port, s->evtchn_mask) &&
          test_bit          (port, s->evtchn_pending) &&
          !test_and_set_bit (port / BITS_PER_LONG,
-                            &v->vcpu_info->evtchn_pending_sel) &&
-         !test_and_set_bit (0, &v->vcpu_info->evtchn_upcall_pending) )
-    {
-        evtchn_notify(v);
+                            &v->vcpu_info->evtchn_pending_sel) )
+    {
+        vcpu_mark_events_pending(v);
     }
 
     spin_unlock(&d->evtchn_lock);
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/asm-ia64/event.h
--- a/xen/include/asm-ia64/event.h      Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/asm-ia64/event.h      Thu Jun 15 10:23:57 2006 -0600
@@ -12,7 +12,7 @@
 #include <public/arch-ia64.h>
 #include <asm/vcpu.h>
 
-static inline void evtchn_notify(struct vcpu *v)
+static inline void vcpu_kick(struct vcpu *v)
 {
     /*
      * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
@@ -30,6 +30,12 @@ static inline void evtchn_notify(struct 
 
     if(!VMX_DOMAIN(v) && !v->arch.event_callback_ip)
         vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector);
+}
+
+static inline void vcpu_mark_events_pending(struct vcpu *v)
+{
+    if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
+        vcpu_kick(v);
 }
 
 /* Note: Bitwise operations result in fast code with no branches. */
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h       Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/asm-x86/event.h       Thu Jun 15 10:23:57 2006 -0600
@@ -9,7 +9,7 @@
 #ifndef __ASM_EVENT_H__
 #define __ASM_EVENT_H__
 
-static inline void evtchn_notify(struct vcpu *v)
+static inline void vcpu_kick(struct vcpu *v)
 {
     /*
      * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
@@ -24,6 +24,12 @@ static inline void evtchn_notify(struct 
     vcpu_unblock(v);
     if ( running )
         smp_send_event_check_cpu(v->processor);
+}
+
+static inline void vcpu_mark_events_pending(struct vcpu *v)
+{
+    if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
+        vcpu_kick(v);
 }
 
 static inline int local_events_need_delivery(void)
diff -r 7f67c15e2c91 -r fbc0e953732e xen/include/xen/elf.h
--- a/xen/include/xen/elf.h     Thu Jun 15 10:02:53 2006 -0600
+++ b/xen/include/xen/elf.h     Thu Jun 15 10:23:57 2006 -0600
@@ -178,9 +178,9 @@ typedef struct {
 #define EM_ALPHA       41              /* DEC ALPHA */
 #define EM_SPARCV9     43              /* SPARC version 9 */
 #define EM_ALPHA_EXP   0x9026          /* DEC ALPHA */
+#define EM_IA_64       50              /* Intel Merced */
 #define EM_X86_64      62              /* AMD x86-64 architecture */
 #define EM_VAX         75              /* DEC VAX */
-#define EM_NUM         15              /* number of machine types */
 
 /* Version */
 #define EV_NONE                0               /* Invalid */
diff -r 7f67c15e2c91 -r fbc0e953732e linux-2.6-xen-sparse/kernel/fork.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/linux-2.6-xen-sparse/kernel/fork.c        Thu Jun 15 10:23:57 2006 -0600
@@ -0,0 +1,1619 @@
+/*
+ *  linux/kernel/fork.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ *  'fork.c' contains the help-routines for the 'fork' system call
+ * (see also entry.S and others).
+ * Fork is rather simple, once you get the hang of it, but the memory
+ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/unistd.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/completion.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/mempolicy.h>
+#include <linux/sem.h>
+#include <linux/file.h>
+#include <linux/key.h>
+#include <linux/binfmts.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/security.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/jiffies.h>
+#include <linux/futex.h>
+#include <linux/rcupdate.h>
+#include <linux/ptrace.h>
+#include <linux/mount.h>
+#include <linux/audit.h>
+#include <linux/profile.h>
+#include <linux/rmap.h>
+#include <linux/acct.h>
+#include <linux/cn_proc.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Protected counters by write_lock_irq(&tasklist_lock)
+ */
+unsigned long total_forks;     /* Handle normal Linux uptimes. */
+int nr_threads;                /* The idle threads do not count.. */
+
+int max_threads;               /* tunable limit on nr_threads */
+
+DEFINE_PER_CPU(unsigned long, process_counts) = 0;
+
+ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
+
+EXPORT_SYMBOL(tasklist_lock);
+
+int nr_processes(void)
+{
+       int cpu;
+       int total = 0;
+
+       for_each_online_cpu(cpu)
+               total += per_cpu(process_counts, cpu);
+
+       return total;
+}
+
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+# define alloc_task_struct()   kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
+# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))
+static kmem_cache_t *task_struct_cachep;
+#endif
+
+/* SLAB cache for signal_struct structures (tsk->signal) */
+kmem_cache_t *signal_cachep;
+
+/* SLAB cache for sighand_struct structures (tsk->sighand) */
+kmem_cache_t *sighand_cachep;
+
+/* SLAB cache for files_struct structures (tsk->files) */
+kmem_cache_t *files_cachep;
+
+/* SLAB cache for fs_struct structures (tsk->fs) */
+kmem_cache_t *fs_cachep;
+
+/* SLAB cache for vm_area_struct structures */
+kmem_cache_t *vm_area_cachep;
+
+/* SLAB cache for mm_struct structures (tsk->mm) */
+static kmem_cache_t *mm_cachep;
+
+void free_task(struct task_struct *tsk)
+{
+       free_thread_info(tsk->thread_info);
+       free_task_struct(tsk);
+}
+EXPORT_SYMBOL(free_task);
+
+void __put_task_struct_cb(struct rcu_head *rhp)
+{
+       struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+       WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+       WARN_ON(atomic_read(&tsk->usage));
+       WARN_ON(tsk == current);
+
+       if (unlikely(tsk->audit_context))
+               audit_free(tsk);
+       security_task_free(tsk);
+       free_uid(tsk->user);
+       put_group_info(tsk->group_info);
+
+       if (!profile_handoff_task(tsk))
+               free_task(tsk);
+}
+
+void __init fork_init(unsigned long mempages)
+{
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+#ifndef ARCH_MIN_TASKALIGN
+#define ARCH_MIN_TASKALIGN     L1_CACHE_BYTES
+#endif
+       /* create a slab on which task_structs can be allocated */
+       task_struct_cachep =
+               kmem_cache_create("task_struct", sizeof(struct task_struct),
+                       ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+#endif
+
+       /*
+        * The default maximum number of threads is set to a safe
+        * value: the thread structures can take up at most half
+        * of memory.
+        */
+       max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
+
+       /*
+        * we need to allow at least 20 threads to boot a system
+        */
+       if(max_threads < 20)
+               max_threads = 20;
+
+       init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
+       init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
+       init_task.signal->rlim[RLIMIT_SIGPENDING] =
+               init_task.signal->rlim[RLIMIT_NPROC];
+}
+
+static struct task_struct *dup_task_struct(struct task_struct *orig)
+{
+       struct task_struct *tsk;
+       struct thread_info *ti;
+
+       prepare_to_copy(orig);
+
+       tsk = alloc_task_struct();
+       if (!tsk)
+               return NULL;
+
+       ti = alloc_thread_info(tsk);
+       if (!ti) {
+               free_task_struct(tsk);
+               return NULL;
+       }
+
+       *tsk = *orig;
+       tsk->thread_info = ti;
+       setup_thread_stack(tsk, orig);
+
+       /* One for us, one for whoever does the "release_task()" (usually 
parent) */
+       atomic_set(&tsk->usage,2);
+       atomic_set(&tsk->fs_excl, 0);
+       return tsk;
+}
+
+#ifdef CONFIG_MMU
+static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+       struct vm_area_struct *mpnt, *tmp, **pprev;
+       struct rb_node **rb_link, *rb_parent;
+       int retval;
+       unsigned long charge;
+       struct mempolicy *pol;
+
+       down_write(&oldmm->mmap_sem);
+       flush_cache_mm(oldmm);
+       down_write(&mm->mmap_sem);
+
+       mm->locked_vm = 0;
+       mm->mmap = NULL;
+       mm->mmap_cache = NULL;
+       mm->free_area_cache = oldmm->mmap_base;
+       mm->cached_hole_size = ~0UL;
+       mm->map_count = 0;
+       cpus_clear(mm->cpu_vm_mask);
+       mm->mm_rb = RB_ROOT;
+       rb_link = &mm->mm_rb.rb_node;
+       rb_parent = NULL;
+       pprev = &mm->mmap;
+
+       for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
+               struct file *file;
+
+               if (mpnt->vm_flags & VM_DONTCOPY) {
+                       long pages = vma_pages(mpnt);
+                       mm->total_vm -= pages;
+                       vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
+                                                               -pages);
+                       continue;
+               }
+               charge = 0;
+               if (mpnt->vm_flags & VM_ACCOUNT) {
+                       unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> 
PAGE_SHIFT;
+                       if (security_vm_enough_memory(len))
+                               goto fail_nomem;
+                       charge = len;
+               }
+               tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+               if (!tmp)
+                       goto fail_nomem;
+               *tmp = *mpnt;
+               pol = mpol_copy(vma_policy(mpnt));
+               retval = PTR_ERR(pol);
+               if (IS_ERR(pol))
+                       goto fail_nomem_policy;
+               vma_set_policy(tmp, pol);
+               tmp->vm_flags &= ~VM_LOCKED;
+               tmp->vm_mm = mm;
+               tmp->vm_next = NULL;
+               anon_vma_link(tmp);
+               file = tmp->vm_file;
+               if (file) {
+                       struct inode *inode = file->f_dentry->d_inode;
+                       get_file(file);
+                       if (tmp->vm_flags & VM_DENYWRITE)
+                               atomic_dec(&inode->i_writecount);
+      
+                       /* insert tmp into the share list, just after mpnt */
+                       spin_lock(&file->f_mapping->i_mmap_lock);
+                       tmp->vm_truncate_count = mpnt->vm_truncate_count;
+                       flush_dcache_mmap_lock(file->f_mapping);
+                       vma_prio_tree_add(tmp, mpnt);
+                       flush_dcache_mmap_unlock(file->f_mapping);
+                       spin_unlock(&file->f_mapping->i_mmap_lock);
+               }
+
+               /*
+                * Link in the new vma and copy the page table entries.
+                */
+               *pprev = tmp;
+               pprev = &tmp->vm_next;
+
+               __vma_link_rb(mm, tmp, rb_link, rb_parent);
+               rb_link = &tmp->vm_rb.rb_right;
+               rb_parent = &tmp->vm_rb;
+
+               mm->map_count++;
+               retval = copy_page_range(mm, oldmm, mpnt);
+
+               if (tmp->vm_ops && tmp->vm_ops->open)
+                       tmp->vm_ops->open(tmp);
+
+               if (retval)
+                       goto out;
+       }
+#ifdef arch_dup_mmap
+       arch_dup_mmap(mm, oldmm);
+#endif
+       retval = 0;
+out:
+       up_write(&mm->mmap_sem);
+       flush_tlb_mm(oldmm);
+       up_write(&oldmm->mmap_sem);
+       return retval;
+fail_nomem_policy:
+       kmem_cache_free(vm_area_cachep, tmp);
+fail_nomem:
+       retval = -ENOMEM;
+       vm_unacct_memory(charge);
+       goto out;
+}
+
+static inline int mm_alloc_pgd(struct mm_struct * mm)
+{
+       mm->pgd = pgd_alloc(mm);
+       if (unlikely(!mm->pgd))
+               return -ENOMEM;
+       return 0;
+}
+
+static inline void mm_free_pgd(struct mm_struct * mm)
+{
+       pgd_free(mm->pgd);
+}
+#else
+#define dup_mmap(mm, oldmm)    (0)
+#define mm_alloc_pgd(mm)       (0)
+#define mm_free_pgd(mm)
+#endif /* CONFIG_MMU */
+
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
+
+#define allocate_mm()  (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+#define free_mm(mm)    (kmem_cache_free(mm_cachep, (mm)))
+
+#include <linux/init_task.h>
+
+static struct mm_struct * mm_init(struct mm_struct * mm)
+{
+       atomic_set(&mm->mm_users, 1);
+       atomic_set(&mm->mm_count, 1);
+       init_rwsem(&mm->mmap_sem);
+       INIT_LIST_HEAD(&mm->mmlist);
+       mm->core_waiters = 0;
+       mm->nr_ptes = 0;
+       set_mm_counter(mm, file_rss, 0);
+       set_mm_counter(mm, anon_rss, 0);
+       spin_lock_init(&mm->page_table_lock);
+       rwlock_init(&mm->ioctx_list_lock);
+       mm->ioctx_list = NULL;
+       mm->free_area_cache = TASK_UNMAPPED_BASE;
+       mm->cached_hole_size = ~0UL;
+
+       if (likely(!mm_alloc_pgd(mm))) {
+               mm->def_flags = 0;
+               return mm;
+       }
+       free_mm(mm);
+       return NULL;
+}
+
+/*
+ * Allocate and initialize an mm_struct.
+ */
+struct mm_struct * mm_alloc(void)
+{
+       struct mm_struct * mm;
+
+       mm = allocate_mm();
+       if (mm) {
+               memset(mm, 0, sizeof(*mm));
+               mm = mm_init(mm);
+       }
+       return mm;
+}
+
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+void fastcall __mmdrop(struct mm_struct *mm)
+{
+       BUG_ON(mm == &init_mm);
+       mm_free_pgd(mm);
+       destroy_context(mm);
+       free_mm(mm);
+}
+
+/*
+ * Decrement the use count and release all resources for an mm.
+ */
+void mmput(struct mm_struct *mm)
+{
+       if (atomic_dec_and_test(&mm->mm_users)) {
+               exit_aio(mm);
+               exit_mmap(mm);
+               if (!list_empty(&mm->mmlist)) {
+                       spin_lock(&mmlist_lock);
+                       list_del(&mm->mmlist);
+                       spin_unlock(&mmlist_lock);
+               }
+               put_swap_token(mm);
+               mmdrop(mm);
+       }
+}
+EXPORT_SYMBOL_GPL(mmput);
+
+/**
+ * get_task_mm - acquire a reference to the task's mm
+ *
+ * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
+ * this kernel workthread has transiently adopted a user mm with use_mm,
+ * to do its AIO) is not set and if so returns a reference to it, after
+ * bumping up the use count.  User must release the mm via mmput()
+ * after use.  Typically used by /proc and ptrace.
+ */
+struct mm_struct *get_task_mm(struct task_struct *task)
+{
+       struct mm_struct *mm;
+
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (task->flags & PF_BORROWED_MM)
+                       mm = NULL;
+               else
+                       atomic_inc(&mm->mm_users);
+       }
+       task_unlock(task);
+       return mm;
+}
+EXPORT_SYMBOL_GPL(get_task_mm);
+
+/* Please note the differences between mmput and mm_release.
+ * mmput is called whenever we stop holding onto a mm_struct,
+ * error success whatever.
+ *
+ * mm_release is called after a mm_struct has been removed
+ * from the current process.
+ *
+ * This difference is important for error handling, when we
+ * only half set up a mm_struct for a new process and need to restore
+ * the old one.  Because we mmput the new mm_struct before
+ * restoring the old one. . .
+ * Eric Biederman 10 January 1998
+ */
+void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+       struct completion *vfork_done = tsk->vfork_done;
+
+       /* Get rid of any cached register state */
+       deactivate_mm(tsk, mm);
+
+       /* notify parent sleeping on vfork() */
+       if (vfork_done) {
+               tsk->vfork_done = NULL;
+               complete(vfork_done);
+       }
+       if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
+               u32 __user * tidptr = tsk->clear_child_tid;
+               tsk->clear_child_tid = NULL;
+
+               /*
+                * We don't check the error code - if userspace has
+                * not set up a proper pointer then tough luck.
+                */
+               put_user(0, tidptr);
+               sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
+       }
+}
+
+/*
+ * Allocate a new mm structure and copy contents from the
+ * mm structure of the passed in task structure.
+ */
+static struct mm_struct *dup_mm(struct task_struct *tsk)
+{
+       struct mm_struct *mm, *oldmm = current->mm;
+       int err;
+
+       if (!oldmm)
+               return NULL;
+
+       mm = allocate_mm();
+       if (!mm)
+               goto fail_nomem;
+
+       memcpy(mm, oldmm, sizeof(*mm));
+
+       if (!mm_init(mm))
+               goto fail_nomem;
+
+       if (init_new_context(tsk, mm))
+               goto fail_nocontext;
+
+       err = dup_mmap(mm, oldmm);
+       if (err)
+               goto free_pt;
+
+       mm->hiwater_rss = get_mm_rss(mm);
+       mm->hiwater_vm = mm->total_vm;
+
+       return mm;
+
+free_pt:
+       mmput(mm);
+
+fail_nomem:
+       return NULL;
+
+fail_nocontext:
+       /*
+        * If init_new_context() failed, we cannot use mmput() to free the mm
+        * because it calls destroy_context()
+        */
+       mm_free_pgd(mm);
+       free_mm(mm);
+       return NULL;
+}
+
+static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+{
+       struct mm_struct * mm, *oldmm;
+       int retval;
+
+       tsk->min_flt = tsk->maj_flt = 0;
+       tsk->nvcsw = tsk->nivcsw = 0;
+
+       tsk->mm = NULL;
+       tsk->active_mm = NULL;
+
+       /*
+        * Are we cloning a kernel thread?
+        *
+        * We need to steal a active VM for that..
+        */
+       oldmm = current->mm;
+       if (!oldmm)
+               return 0;
+
+       if (clone_flags & CLONE_VM) {
+               atomic_inc(&oldmm->mm_users);
+               mm = oldmm;
+               goto good_mm;
+       }
+
+       retval = -ENOMEM;
+       mm = dup_mm(tsk);
+       if (!mm)
+               goto fail_nomem;
+
+good_mm:
+       tsk->mm = mm;
+       tsk->active_mm = mm;
+       return 0;
+
+fail_nomem:
+       return retval;
+}
+
+static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+{
+       struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+       /* We don't need to lock fs - think why ;-) */
+       if (fs) {
+               atomic_set(&fs->count, 1);
+               rwlock_init(&fs->lock);
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+               fs->rootmnt = mntget(old->rootmnt);
+               fs->root = dget(old->root);
+               fs->pwdmnt = mntget(old->pwdmnt);
+               fs->pwd = dget(old->pwd);
+               if (old->altroot) {
+                       fs->altrootmnt = mntget(old->altrootmnt);
+                       fs->altroot = dget(old->altroot);
+               } else {
+                       fs->altrootmnt = NULL;
+                       fs->altroot = NULL;
+               }
+               read_unlock(&old->lock);
+       }
+       return fs;
+}
+
+struct fs_struct *copy_fs_struct(struct fs_struct *old)
+{
+       return __copy_fs_struct(old);
+}
+
+EXPORT_SYMBOL_GPL(copy_fs_struct);
+
+static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+{
+       if (clone_flags & CLONE_FS) {
+               atomic_inc(&current->fs->count);
+               return 0;
+       }
+       tsk->fs = __copy_fs_struct(current->fs);
+       if (!tsk->fs)
+               return -ENOMEM;
+       return 0;
+}
+
+static int count_open_files(struct fdtable *fdt)
+{
+       int size = fdt->max_fdset;
+       int i;
+
+       /* Find the last open fd */
+       for (i = size/(8*sizeof(long)); i > 0; ) {
+               if (fdt->open_fds->fds_bits[--i])
+                       break;
+       }
+       i = (i+1) * 8 * sizeof(long);
+       return i;
+}
+
+static struct files_struct *alloc_files(void)
+{
+       struct files_struct *newf;
+       struct fdtable *fdt;
+
+       newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
+       if (!newf)
+               goto out;
+
+       atomic_set(&newf->count, 1);
+
+       spin_lock_init(&newf->file_lock);
+       fdt = &newf->fdtab;
+       fdt->next_fd = 0;
+       fdt->max_fds = NR_OPEN_DEFAULT;
+       fdt->max_fdset = __FD_SETSIZE;
+       fdt->close_on_exec = &newf->close_on_exec_init;
+       fdt->open_fds = &newf->open_fds_init;
+       fdt->fd = &newf->fd_array[0];
+       INIT_RCU_HEAD(&fdt->rcu);
+       fdt->free_files = NULL;
+       fdt->next = NULL;
+       rcu_assign_pointer(newf->fdt, fdt);
+out:
+       return newf;
+}
+
+/*
+ * Allocate a new files structure and copy contents from the
+ * passed in files structure.
+ */
+static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
+{
+       struct files_struct *newf;
+       struct file **old_fds, **new_fds;
+       int open_files, size, i, expand;
+       struct fdtable *old_fdt, *new_fdt;
+
+       newf = alloc_files();
+       if (!newf)
+               goto out;
+
+       spin_lock(&oldf->file_lock);
+       old_fdt = files_fdtable(oldf);
+       new_fdt = files_fdtable(newf);
+       size = old_fdt->max_fdset;
+       open_files = count_open_files(old_fdt);
+       expand = 0;
+
+       /*
+        * Check whether we need to allocate a larger fd array or fd set.
+        * Note: we're not a clone task, so the open count won't  change.
+        */
+       if (open_files > new_fdt->max_fdset) {
+               new_fdt->max_fdset = 0;
+               expand = 1;
+       }
+       if (open_files > new_fdt->max_fds) {
+               new_fdt->max_fds = 0;
+               expand = 1;
+       }
+
+       /* if the old fdset gets grown now, we'll only copy up to "size" fds */
+       if (expand) {
+               spin_unlock(&oldf->file_lock);
+               spin_lock(&newf->file_lock);
+               *errorp = expand_files(newf, open_files-1);
+               spin_unlock(&newf->file_lock);
+               if (*errorp < 0)
+                       goto out_release;
+               new_fdt = files_fdtable(newf);
+               /*
+                * Reacquire the oldf lock and a pointer to its fd table
+                * who knows it may have a new bigger fd table. We need
+                * the latest pointer.
+                */
+               spin_lock(&oldf->file_lock);
+               old_fdt = files_fdtable(oldf);
+       }
+
+       old_fds = old_fdt->fd;
+       new_fds = new_fdt->fd;
+
+       memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, 
open_files/8);
+       memcpy(new_fdt->close_on_exec->fds_bits, 
old_fdt->close_on_exec->fds_bits, open_files/8);
+
+       for (i = open_files; i != 0; i--) {
+               struct file *f = *old_fds++;
+               if (f) {
+                       get_file(f);
+               } else {
+                       /*
+                        * The fd may be claimed in the fd bitmap but not yet
+                        * instantiated in the files array if a sibling thread
+                        * is partway through open().  So make sure that this
+                        * fd is available to the new process.
+                        */
+                       FD_CLR(open_files - i, new_fdt->open_fds);
+               }
+               rcu_assign_pointer(*new_fds++, f);
+       }
+       spin_unlock(&oldf->file_lock);
+
+       /* compute the remainder to be cleared */
+       size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
+
+       /* This is long word aligned thus could use a optimized version */ 
+       memset(new_fds, 0, size); 
+
+       if (new_fdt->max_fdset > open_files) {
+               int left = (new_fdt->max_fdset-open_files)/8;
+               int start = open_files / (8 * sizeof(unsigned long));
+
+               memset(&new_fdt->open_fds->fds_bits[start], 0, left);
+               memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
+       }
+
+out:
+       return newf;
+
+out_release:
+       free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
+       free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
+       free_fd_array(new_fdt->fd, new_fdt->max_fds);
+       kmem_cache_free(files_cachep, newf);
+       return NULL;
+}
+
+static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+{
+       struct files_struct *oldf, *newf;
+       int error = 0;
+
+       /*
+        * A background process may not have any files ...
+        */
+       oldf = current->files;
+       if (!oldf)
+               goto out;
+
+       if (clone_flags & CLONE_FILES) {
+               atomic_inc(&oldf->count);
+               goto out;
+       }
+
+       /*
+        * Note: we may be using current for both targets (See exec.c)
+        * This works because we cache current->files (old) as oldf. Don't
+        * break this.
+        */
+       tsk->files = NULL;
+       error = -ENOMEM;
+       newf = dup_fd(oldf, &error);
+       if (!newf)
+               goto out;
+
+       tsk->files = newf;
+       error = 0;
+out:
+       return error;
+}
+
+/*
+ *     Helper to unshare the files of the current task.
+ *     We don't want to expose copy_files internals to
+ *     the exec layer of the kernel.
+ */
+
+int unshare_files(void)
+{
+       struct files_struct *files  = current->files;
+       int rc;
+
+       if(!files)
+               BUG();
+
+       /* This can race but the race causes us to copy when we don't
+          need to and drop the copy */
+       if(atomic_read(&files->count) == 1)
+       {
+               atomic_inc(&files->count);
+               return 0;
+       }
+       rc = copy_files(0, current);
+       if(rc)
+               current->files = files;
+       return rc;
+}
+
+EXPORT_SYMBOL(unshare_files);
+
+void sighand_free_cb(struct rcu_head *rhp)
+{
+       struct sighand_struct *sp;
+
+       sp = container_of(rhp, struct sighand_struct, rcu);
+       kmem_cache_free(sighand_cachep, sp);
+}
+
+static inline int copy_sighand(unsigned long clone_flags, struct task_struct * 
tsk)
+{
+       struct sighand_struct *sig;
+
+       if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+               atomic_inc(&current->sighand->count);
+               return 0;
+       }
+       sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+       rcu_assign_pointer(tsk->sighand, sig);
+       if (!sig)
+               return -ENOMEM;
+       spin_lock_init(&sig->siglock);
+       atomic_set(&sig->count, 1);
+       memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+       return 0;
+}
+
+static inline int copy_signal(unsigned long clone_flags, struct task_struct * 
tsk)
+{
+       struct signal_struct *sig;
+       int ret;
+
+       if (clone_flags & CLONE_THREAD) {
+               atomic_inc(&current->signal->count);
+               atomic_inc(&current->signal->live);
+               return 0;
+       }
+       sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+       tsk->signal = sig;
+       if (!sig)
+               return -ENOMEM;
+
+       ret = copy_thread_group_keys(tsk);
+       if (ret < 0) {
+               kmem_cache_free(signal_cachep, sig);
+               return ret;
+       }
+
+       atomic_set(&sig->count, 1);
+       atomic_set(&sig->live, 1);
+       init_waitqueue_head(&sig->wait_chldexit);
+       sig->flags = 0;
+       sig->group_exit_code = 0;
+       sig->group_exit_task = NULL;
+       sig->group_stop_count = 0;
+       sig->curr_target = NULL;
+       init_sigpending(&sig->shared_pending);
+       INIT_LIST_HEAD(&sig->posix_timers);
+
+       hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
+       sig->it_real_incr.tv64 = 0;
+       sig->real_timer.function = it_real_fn;
+       sig->real_timer.data = tsk;
+
+       sig->it_virt_expires = cputime_zero;
+       sig->it_virt_incr = cputime_zero;
+       sig->it_prof_expires = cputime_zero;
+       sig->it_prof_incr = cputime_zero;
+
+       sig->leader = 0;        /* session leadership doesn't inherit */
+       sig->tty_old_pgrp = 0;
+
+       sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+       sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+       sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+       sig->sched_time = 0;
+       INIT_LIST_HEAD(&sig->cpu_timers[0]);
+       INIT_LIST_HEAD(&sig->cpu_timers[1]);
+       INIT_LIST_HEAD(&sig->cpu_timers[2]);
+
+       task_lock(current->group_leader);
+       memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
+       task_unlock(current->group_leader);
+
+       if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+               /*
+                * New sole thread in the process gets an expiry time
+                * of the whole CPU time limit.
+                */
+               tsk->it_prof_expires =
+                       secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+       }
+
+       return 0;
+}
+
+static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+{
+       unsigned long new_flags = p->flags;
+
+       new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
+       new_flags |= PF_FORKNOEXEC;
+       if (!(clone_flags & CLONE_PTRACE))
+               p->ptrace = 0;
+       p->flags = new_flags;
+}
+
+asmlinkage long sys_set_tid_address(int __user *tidptr)
+{
+       current->clear_child_tid = tidptr;
+
+       return current->pid;
+}
+
+/*
+ * This creates a new process as a copy of the old one,
+ * but does not actually start it yet.
+ *
+ * It copies the registers, and all the appropriate
+ * parts of the process environment (as per the clone
+ * flags). The actual kick-off is left to the caller.
+ */
+static task_t *copy_process(unsigned long clone_flags,
+                                unsigned long stack_start,
+                                struct pt_regs *regs,
+                                unsigned long stack_size,
+                                int __user *parent_tidptr,
+                                int __user *child_tidptr,
+                                int pid)
+{
+       int retval;
+       struct task_struct *p = NULL;
+
+       if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+               return ERR_PTR(-EINVAL);
+
+       /*
+        * Thread groups must share signals as well, and detached threads
+        * can only be started up within the thread group.
+        */
+       if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
+               return ERR_PTR(-EINVAL);
+
+       /*
+        * Shared signal handlers imply shared VM. By way of the above,
+        * thread groups also imply shared VM. Blocking this case allows
+        * for various simplifications in other code.
+        */
+       if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
+               return ERR_PTR(-EINVAL);
+
+       retval = security_task_create(clone_flags);
+       if (retval)
+               goto fork_out;
+
+       retval = -ENOMEM;
+       p = dup_task_struct(current);
+       if (!p)
+               goto fork_out;
+
+       retval = -EAGAIN;
+       if (atomic_read(&p->user->processes) >=
+                       p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
+               if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
+                               p->user != &root_user)
+                       goto bad_fork_free;
+       }
+
+       atomic_inc(&p->user->__count);
+       atomic_inc(&p->user->processes);
+       get_group_info(p->group_info);
+
+       /*
+        * If multiple threads are within copy_process(), then this check
+        * triggers too late. This doesn't hurt, the check is only there
+        * to stop root fork bombs.
+        */
+       if (nr_threads >= max_threads)
+               goto bad_fork_cleanup_count;
+
+       if (!try_module_get(task_thread_info(p)->exec_domain->module))
+               goto bad_fork_cleanup_count;
+
+       if (p->binfmt && !try_module_get(p->binfmt->module))
+               goto bad_fork_cleanup_put_domain;
+
+       p->did_exec = 0;
+       copy_flags(clone_flags, p);
+       p->pid = pid;
+       retval = -EFAULT;
+       if (clone_flags & CLONE_PARENT_SETTID)
+               if (put_user(p->pid, parent_tidptr))
+                       goto bad_fork_cleanup;
+
+       p->proc_dentry = NULL;
+
+       INIT_LIST_HEAD(&p->children);
+       INIT_LIST_HEAD(&p->sibling);
+       p->vfork_done = NULL;
+       spin_lock_init(&p->alloc_lock);
+       spin_lock_init(&p->proc_lock);
+
+       clear_tsk_thread_flag(p, TIF_SIGPENDING);
+       init_sigpending(&p->pending);
+
+       p->utime = cputime_zero;
+       p->stime = cputime_zero;
+       p->sched_time = 0;
+       p->rchar = 0;           /* I/O counter: bytes read */
+       p->wchar = 0;           /* I/O counter: bytes written */
+       p->syscr = 0;           /* I/O counter: read syscalls */
+       p->syscw = 0;           /* I/O counter: write syscalls */
+       acct_clear_integrals(p);
+
+       p->it_virt_expires = cputime_zero;
+       p->it_prof_expires = cputime_zero;
+       p->it_sched_expires = 0;
+       INIT_LIST_HEAD(&p->cpu_timers[0]);
+       INIT_LIST_HEAD(&p->cpu_timers[1]);
+       INIT_LIST_HEAD(&p->cpu_timers[2]);
+
+       p->lock_depth = -1;             /* -1 = no lock */
+       do_posix_clock_monotonic_gettime(&p->start_time);
+       p->security = NULL;
+       p->io_context = NULL;
+       p->io_wait = NULL;
+       p->audit_context = NULL;
+       cpuset_fork(p);
+#ifdef CONFIG_NUMA
+       p->mempolicy = mpol_copy(p->mempolicy);
+       if (IS_ERR(p->mempolicy)) {
+               retval = PTR_ERR(p->mempolicy);
+               p->mempolicy = NULL;
+               goto bad_fork_cleanup_cpuset;
+       }
+#endif
+
+#ifdef CONFIG_DEBUG_MUTEXES
+       p->blocked_on = NULL; /* not blocked yet */
+#endif
+
+       p->tgid = p->pid;
+       if (clone_flags & CLONE_THREAD)
+               p->tgid = current->tgid;
+
+       if ((retval = security_task_alloc(p)))
+               goto bad_fork_cleanup_policy;
+       if ((retval = audit_alloc(p)))
+               goto bad_fork_cleanup_security;
+       /* copy all the process information */
+       if ((retval = copy_semundo(clone_flags, p)))
+               goto bad_fork_cleanup_audit;
+       if ((retval = copy_files(clone_flags, p)))
+               goto bad_fork_cleanup_semundo;
+       if ((retval = copy_fs(clone_flags, p)))
+               goto bad_fork_cleanup_files;
+       if ((retval = copy_sighand(clone_flags, p)))
+               goto bad_fork_cleanup_fs;
+       if ((retval = copy_signal(clone_flags, p)))
+               goto bad_fork_cleanup_sighand;
+       if ((retval = copy_mm(clone_flags, p)))
+               goto bad_fork_cleanup_signal;
+       if ((retval = copy_keys(clone_flags, p)))
+               goto bad_fork_cleanup_mm;
+       if ((retval = copy_namespace(clone_flags, p)))
+               goto bad_fork_cleanup_keys;
+       retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
+       if (retval)
+               goto bad_fork_cleanup_namespace;
+
+       p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : 
NULL;
+       /*
+        * Clear TID on mm_release()?
+        */
+       p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? 
child_tidptr: NULL;
+
+       /*
+        * sigaltstack should be cleared when sharing the same VM
+        */
+       if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
+               p->sas_ss_sp = p->sas_ss_size = 0;
+
+       /*
+        * Syscall tracing should be turned off in the child regardless
+        * of CLONE_PTRACE.
+        */
+       clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+       clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
+#endif
+
+       /* Our parent execution domain becomes current domain
+          These must match for thread signalling to apply */
+          
+       p->parent_exec_id = p->self_exec_id;
+
+       /* ok, now we should be set up.. */
+       p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & 
CSIGNAL);
+       p->pdeath_signal = 0;
+       p->exit_state = 0;
+
+       /*
+        * Ok, make it visible to the rest of the system.
+        * We dont wake it up yet.
+        */
+       p->group_leader = p;
+       INIT_LIST_HEAD(&p->ptrace_children);
+       INIT_LIST_HEAD(&p->ptrace_list);
+
+       /* Perform scheduler related setup. Assign this task to a CPU. */
+       sched_fork(p, clone_flags);
+
+       /* Need tasklist lock for parent etc handling! */
+       write_lock_irq(&tasklist_lock);
+
+       /*
+        * The task hasn't been attached yet, so its cpus_allowed mask will
+        * not be changed, nor will its assigned CPU.
+        *
+        * The cpus_allowed mask of the parent may have changed after it was
+        * copied first time - so re-copy it here, then check the child's CPU
+        * to ensure it is on a valid CPU (and if not, just force it back to
+        * parent's CPU). This avoids alot of nasty races.
+        */
+       p->cpus_allowed = current->cpus_allowed;
+       if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+                       !cpu_online(task_cpu(p))))
+               set_task_cpu(p, smp_processor_id());
+
+       /*
+        * Check for pending SIGKILL! The new thread should not be allowed
+        * to slip out of an OOM kill. (or normal SIGKILL.)
+        */
+       if (sigismember(&current->pending.signal, SIGKILL)) {
+               write_unlock_irq(&tasklist_lock);
+               retval = -EINTR;
+               goto bad_fork_cleanup_namespace;
+       }
+
+       /* CLONE_PARENT re-uses the old parent */
+       if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+               p->real_parent = current->real_parent;
+       else
+               p->real_parent = current;
+       p->parent = p->real_parent;
+
+       spin_lock(&current->sighand->siglock);
+       if (clone_flags & CLONE_THREAD) {
+               /*
+                * Important: if an exit-all has been started then
+                * do not create this new thread - the whole thread
+                * group is supposed to exit anyway.
+                */
+               if (current->signal->flags & SIGNAL_GROUP_EXIT) {
+                       spin_unlock(&current->sighand->siglock);
+                       write_unlock_irq(&tasklist_lock);
+                       retval = -EAGAIN;
+                       goto bad_fork_cleanup_namespace;
+               }
+               p->group_leader = current->group_leader;
+
+               if (current->signal->group_stop_count > 0) {
+                       /*
+                        * There is an all-stop in progress for the group.
+                        * We ourselves will stop as soon as we check signals.
+                        * Make the new thread part of that group stop too.
+                        */
+                       current->signal->group_stop_count++;
+                       set_tsk_thread_flag(p, TIF_SIGPENDING);
+               }
+
+               if (!cputime_eq(current->signal->it_virt_expires,
+                               cputime_zero) ||
+                   !cputime_eq(current->signal->it_prof_expires,
+                               cputime_zero) ||
+                   current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY 
||
+                   !list_empty(&current->signal->cpu_timers[0]) ||
+                   !list_empty(&current->signal->cpu_timers[1]) ||
+                   !list_empty(&current->signal->cpu_timers[2])) {
+                       /*
+                        * Have child wake up on its first tick to check
+                        * for process CPU timers.
+                        */
+                       p->it_prof_expires = jiffies_to_cputime(1);
+               }
+       }
+
+       /*
+        * inherit ioprio
+        */
+       p->ioprio = current->ioprio;
+
+       SET_LINKS(p);
+       if (unlikely(p->ptrace & PT_PTRACED))
+               __ptrace_link(p, current->parent);
+
+       if (thread_group_leader(p)) {
+               p->signal->tty = current->signal->tty;
+               p->signal->pgrp = process_group(current);
+               p->signal->session = current->signal->session;
+               attach_pid(p, PIDTYPE_PGID, process_group(p));
+               attach_pid(p, PIDTYPE_SID, p->signal->session);
+               if (p->pid)
+                       __get_cpu_var(process_counts)++;
+       }
+       attach_pid(p, PIDTYPE_TGID, p->tgid);
+       attach_pid(p, PIDTYPE_PID, p->pid);
+
+       nr_threads++;
+       total_forks++;
+       spin_unlock(&current->sighand->siglock);
+       write_unlock_irq(&tasklist_lock);
+       proc_fork_connector(p);
+       return p;
+
+bad_fork_cleanup_namespace:
+       exit_namespace(p);
+bad_fork_cleanup_keys:
+       exit_keys(p);
+bad_fork_cleanup_mm:
+       if (p->mm)
+               mmput(p->mm);
+bad_fork_cleanup_signal:
+       exit_signal(p);
+bad_fork_cleanup_sighand:
+       exit_sighand(p);
+bad_fork_cleanup_fs:
+       exit_fs(p); /* blocking */
+bad_fork_cleanup_files:
+       exit_files(p); /* blocking */
+bad_fork_cleanup_semundo:
+       exit_sem(p);
+bad_fork_cleanup_audit:
+       audit_free(p);
+bad_fork_cleanup_security:
+       security_task_free(p);
+bad_fork_cleanup_policy:
+#ifdef CONFIG_NUMA
+       mpol_free(p->mempolicy);
+bad_fork_cleanup_cpuset:
+#endif
+       cpuset_exit(p);
+bad_fork_cleanup:
+       if (p->binfmt)
+               module_put(p->binfmt->module);
+bad_fork_cleanup_put_domain:
+       module_put(task_thread_info(p)->exec_domain->module);
+bad_fork_cleanup_count:
+       put_group_info(p->group_info);
+       atomic_dec(&p->user->processes);
+       free_uid(p->user);
+bad_fork_free:
+       free_task(p);
+fork_out:
+       return ERR_PTR(retval);
+}
+
+struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs 
*regs)
+{
+       memset(regs, 0, sizeof(struct pt_regs));
+       return regs;
+}
+
+task_t * __devinit fork_idle(int cpu)
+{
+       task_t *task;
+       struct pt_regs regs;
+
+       task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+       if (!task)
+               return ERR_PTR(-ENOMEM);
+       init_idle(task, cpu);
+       unhash_process(task);
+       return task;
+}
+
+static inline int fork_traceflag (unsigned clone_flags)
+{
+       if (clone_flags & CLONE_UNTRACED)
+               return 0;
+       else if (clone_flags & CLONE_VFORK) {
+               if (current->ptrace & PT_TRACE_VFORK)
+                       return PTRACE_EVENT_VFORK;
+       } else if ((clone_flags & CSIGNAL) != SIGCHLD) {
+               if (current->ptrace & PT_TRACE_CLONE)
+                       return PTRACE_EVENT_CLONE;
+       } else if (current->ptrace & PT_TRACE_FORK)
+               return PTRACE_EVENT_FORK;
+
+       return 0;
+}
+
+/*
+ *  Ok, this is the main fork-routine.
+ *
+ * It copies the process, and if successful kick-starts
+ * it and waits for it to finish using the VM if required.
+ */
+long do_fork(unsigned long clone_flags,
+             unsigned long stack_start,
+             struct pt_regs *regs,
+             unsigned long stack_size,
+             int __user *parent_tidptr,
+             int __user *child_tidptr)
+{
+       struct task_struct *p;
+       int trace = 0;
+       long pid = alloc_pidmap();
+
+       if (pid < 0)
+               return -EAGAIN;
+       if (unlikely(current->ptrace)) {
+               trace = fork_traceflag (clone_flags);
+               if (trace)
+                       clone_flags |= CLONE_PTRACE;
+       }
+
+       p = copy_process(clone_flags, stack_start, regs, stack_size, 
parent_tidptr, child_tidptr, pid);
+       /*
+        * Do this prior waking up the new thread - the thread pointer
+        * might get invalid after that point, if the thread exits quickly.
+        */
+       if (!IS_ERR(p)) {
+               struct completion vfork;
+
+               if (clone_flags & CLONE_VFORK) {
+                       p->vfork_done = &vfork;
+                       init_completion(&vfork);
+               }
+
+               if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
+                       /*
+                        * We'll start up with an immediate SIGSTOP.
+                        */
+                       sigaddset(&p->pending.signal, SIGSTOP);
+                       set_tsk_thread_flag(p, TIF_SIGPENDING);
+               }
+
+               if (!(clone_flags & CLONE_STOPPED))
+                       wake_up_new_task(p, clone_flags);
+               else
+                       p->state = TASK_STOPPED;
+
+               if (unlikely (trace)) {
+                       current->ptrace_message = pid;
+                       ptrace_notify ((trace << 8) | SIGTRAP);
+               }
+
+               if (clone_flags & CLONE_VFORK) {
+                       wait_for_completion(&vfork);
+                       if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+                               ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | 
SIGTRAP);
+               }
+       } else {
+               free_pidmap(pid);
+               pid = PTR_ERR(p);
+       }
+       return pid;
+}
+
+#ifndef ARCH_MIN_MMSTRUCT_ALIGN
+#define ARCH_MIN_MMSTRUCT_ALIGN 0
+#endif
+
+void __init proc_caches_init(void)
+{
+       sighand_cachep = kmem_cache_create("sighand_cache",
+                       sizeof(struct sighand_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       signal_cachep = kmem_cache_create("signal_cache",
+                       sizeof(struct signal_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       files_cachep = kmem_cache_create("files_cache", 
+                       sizeof(struct files_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       fs_cachep = kmem_cache_create("fs_cache", 
+                       sizeof(struct fs_struct), 0,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+       vm_area_cachep = kmem_cache_create("vm_area_struct",
+                       sizeof(struct vm_area_struct), 0,
+                       SLAB_PANIC, NULL, NULL);
+       mm_cachep = kmem_cache_create("mm_struct",
+                       sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+}
+
+
+/*
+ * Check constraints on flags passed to the unshare system call and
+ * force unsharing of additional process context as appropriate.
+ */
+static inline void check_unshare_flags(unsigned long *flags_ptr)
+{
+       /*
+        * If unsharing a thread from a thread group, must also
+        * unshare vm.
+        */
+       if (*flags_ptr & CLONE_THREAD)
+               *flags_ptr |= CLONE_VM;
+
+       /*
+        * If unsharing vm, must also unshare signal handlers.
+        */
+       if (*flags_ptr & CLONE_VM)
+               *flags_ptr |= CLONE_SIGHAND;
+
+       /*
+        * If unsharing signal handlers and the task was created
+        * using CLONE_THREAD, then must unshare the thread
+        */
+       if ((*flags_ptr & CLONE_SIGHAND) &&
+           (atomic_read(&current->signal->count) > 1))
+               *flags_ptr |= CLONE_THREAD;
+
+       /*
+        * If unsharing namespace, must also unshare filesystem information.
+        */
+       if (*flags_ptr & CLONE_NEWNS)
+               *flags_ptr |= CLONE_FS;
+}
+
+/*
+ * Unsharing of tasks created with CLONE_THREAD is not supported yet
+ */
+static int unshare_thread(unsigned long unshare_flags)
+{
+       if (unshare_flags & CLONE_THREAD)
+               return -EINVAL;
+
+       return 0;
+}
+
+/*
+ * Unshare the filesystem structure if it is being shared
+ */
+static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
+{
+       struct fs_struct *fs = current->fs;
+
+       if ((unshare_flags & CLONE_FS) &&
+           (fs && atomic_read(&fs->count) > 1)) {
+               *new_fsp = __copy_fs_struct(current->fs);
+               if (!*new_fsp)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/*
+ * Unshare the namespace structure if it is being shared
+ */
+static int unshare_namespace(unsigned long unshare_flags, struct namespace 
**new_nsp, struct fs_struct *new_fs)
+{
+       struct namespace *ns = current->namespace;
+
+       if ((unshare_flags & CLONE_NEWNS) &&
+           (ns && atomic_read(&ns->count) > 1)) {
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               *new_nsp = dup_namespace(current, new_fs ? new_fs : 
current->fs);
+               if (!*new_nsp)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/*
+ * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
+ * supported yet
+ */
+static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct 
**new_sighp)
+{
+       struct sighand_struct *sigh = current->sighand;
+
+       if ((unshare_flags & CLONE_SIGHAND) &&
+           (sigh && atomic_read(&sigh->count) > 1))
+               return -EINVAL;
+       else
+               return 0;
+}
+
+/*
+ * Unshare vm if it is being shared
+ */
+static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
+{
+       struct mm_struct *mm = current->mm;
+
+       if ((unshare_flags & CLONE_VM) &&
+           (mm && atomic_read(&mm->mm_users) > 1)) {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Unshare file descriptor table if it is being shared
+ */
+static int unshare_fd(unsigned long unshare_flags, struct files_struct 
**new_fdp)
+{
+       struct files_struct *fd = current->files;
+       int error = 0;
+
+       if ((unshare_flags & CLONE_FILES) &&
+           (fd && atomic_read(&fd->count) > 1)) {
+               *new_fdp = dup_fd(fd, &error);
+               if (!*new_fdp)
+                       return error;
+       }
+
+       return 0;
+}
+
+/*
+ * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
+ * supported yet
+ */
+static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list 
**new_ulistp)
+{
+       if (unshare_flags & CLONE_SYSVSEM)
+               return -EINVAL;
+
+       return 0;
+}
+
+/*
+ * unshare allows a process to 'unshare' part of the process
+ * context which was originally shared using clone.  copy_*
+ * functions used by do_fork() cannot be used here directly
+ * because they modify an inactive task_struct that is being
+ * constructed. Here we are modifying the current, active,
+ * task_struct.
+ */
+asmlinkage long sys_unshare(unsigned long unshare_flags)
+{
+       int err = 0;
+       struct fs_struct *fs, *new_fs = NULL;
+       struct namespace *ns, *new_ns = NULL;
+       struct sighand_struct *sigh, *new_sigh = NULL;
+       struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
+       struct files_struct *fd, *new_fd = NULL;
+       struct sem_undo_list *new_ulist = NULL;
+
+       check_unshare_flags(&unshare_flags);
+
+       if ((err = unshare_thread(unshare_flags)))
+               goto bad_unshare_out;
+       if ((err = unshare_fs(unshare_flags, &new_fs)))
+               goto bad_unshare_cleanup_thread;
+       if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
+               goto bad_unshare_cleanup_fs;
+       if ((err = unshare_sighand(unshare_flags, &new_sigh)))
+               goto bad_unshare_cleanup_ns;
+       if ((err = unshare_vm(unshare_flags, &new_mm)))
+               goto bad_unshare_cleanup_sigh;
+       if ((err = unshare_fd(unshare_flags, &new_fd)))
+               goto bad_unshare_cleanup_vm;
+       if ((err = unshare_semundo(unshare_flags, &new_ulist)))
+               goto bad_unshare_cleanup_fd;
+
+       if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+
+               task_lock(current);
+
+               if (new_fs) {
+                       fs = current->fs;
+                       current->fs = new_fs;
+                       new_fs = fs;
+               }
+
+               if (new_ns) {
+                       ns = current->namespace;
+                       current->namespace = new_ns;
+                       new_ns = ns;
+               }
+
+               if (new_sigh) {
+                       sigh = current->sighand;
+                       rcu_assign_pointer(current->sighand, new_sigh);
+                       new_sigh = sigh;
+               }
+
+               if (new_mm) {
+                       mm = current->mm;
+                       active_mm = current->active_mm;
+                       current->mm = new_mm;
+                       current->active_mm = new_mm;
+                       activate_mm(active_mm, new_mm);
+                       new_mm = mm;
+               }
+
+               if (new_fd) {
+                       fd = current->files;
+                       current->files = new_fd;
+                       new_fd = fd;
+               }
+
+               task_unlock(current);
+       }
+
+bad_unshare_cleanup_fd:
+       if (new_fd)
+               put_files_struct(new_fd);
+
+bad_unshare_cleanup_vm:
+       if (new_mm)
+               mmput(new_mm);
+
+bad_unshare_cleanup_sigh:
+       if (new_sigh)
+               if (atomic_dec_and_test(&new_sigh->count))
+                       kmem_cache_free(sighand_cachep, new_sigh);
+
+bad_unshare_cleanup_ns:
+       if (new_ns)
+               put_namespace(new_ns);
+
+bad_unshare_cleanup_fs:
+       if (new_fs)
+               put_fs_struct(new_fs);
+
+bad_unshare_cleanup_thread:
+bad_unshare_out:
+       return err;
+}
diff -r 7f67c15e2c91 -r fbc0e953732e 
tools/security/python/xensec_tools/acm_getdecision
--- a/tools/security/python/xensec_tools/acm_getdecision        Thu Jun 15 
10:02:53 2006 -0600
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-#  -*- mode: python; -*-
-import sys
-import traceback
-import getopt
-
-# add fallback path for non-native python path installs if needed
-sys.path.insert(-1, '/usr/lib/python')
-sys.path.insert(-1, '/usr/lib64/python')
-
-from xen.util.security import ACMError, err, get_decision, active_policy
-
-def usage():
-    print "Usage: acm_getdecision -i domainid --label labelname"
-    print "  Test program illustrating the retrieval of"
-    print "  access control decisions from Xen. At this time,"
-    print "  only sharing (STE) policy decisions are supported."
-    print "  Arguments are two paramters in any combination:"
-    print "\t -i domain_id or --domid domain_id"
-    print "\t -l labelname or --label labelname"
-    print "  Return value:"
-    print "\t PERMITTED if access is permitted"
-    print "\t DENIED if access is denied"
-    print "\t ACMError -- e.g., unknown label or domain id"
-    err("Usage")
-
-try:
-
-    if len(sys.argv) != 5:
-        usage()
-
-    decision_args = []
-
-    for idx in range(1, len(sys.argv), 2):
-        if sys.argv[idx] in ['-i', '--domid']:
-            decision_args.append(['domid', sys.argv[idx+1]])
-        elif sys.argv[idx] in ['-l', '--label']:
-            decision_args.append(['access_control',
-                                  ['policy', active_policy],
-                                  ['label', sys.argv[idx+1]]
-                                  ])
-        else:
-            print "unknown argument %s" % sys.argv[idx]
-            usage()
-
-    if len(decision_args) != 2:
-        print "too many arguments"
-        usage()
-
-    print get_decision(decision_args[0], decision_args[1])
-
-except ACMError:
-       pass
-except:
-    traceback.print_exc(limit=1)

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>