WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merged.

# HG changeset patch
# User emellor@ewan
# Node ID 1ac39c7a043541cfa94655f0e9ab98d4503c29a2
# Parent  0e7c48861e95b738fdf96d4a4df6b0ba90a8423d
# Parent  b7dce4fe2488bf354e5718a84fdb82bed3919761
Merged.

diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Mon Oct 10 
13:46:53 2005
@@ -1327,18 +1327,14 @@
        .callback = handle_vcpu_hotplug_event
 };
 
-/* NB: Assumes xenbus_lock is held! */
 static int setup_cpu_watcher(struct notifier_block *notifier,
                              unsigned long event, void *data)
 {
-       int err = 0;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
+       int err;
+
        err = register_xenbus_watch(&cpu_watch);
-
-       if (err) {
+       if (err)
                printk("Failed to register watch on /cpu\n");
-       }
 
        return NOTIFY_DONE;
 }
@@ -1368,7 +1364,7 @@
                        return;
 
                /* get the state value */
-               err = xenbus_scanf(dir, "availability", "%s", state);
+               err = xenbus_scanf(NULL, dir, "availability", "%s", state);
 
                if (err != 1) {
                        printk(KERN_ERR
@@ -1578,7 +1574,7 @@
 void smp_resume(void)
 {
        smp_intr_init();
-       local_setup_timer_irq();
+       local_setup_timer();
 }
 
 void vcpu_prepare(int vcpu)
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Mon Oct 10 13:46:53 2005
@@ -122,7 +122,7 @@
 static u64 processed_system_time;   /* System time (ns) at last processing. */
 static DEFINE_PER_CPU(u64, processed_system_time);
 
-#define NS_PER_TICK (1000000000L/HZ)
+#define NS_PER_TICK (1000000000ULL/HZ)
 
 static inline void __normalize_time(time_t *sec, s64 *nsec)
 {
@@ -800,9 +800,9 @@
                delta = j - jiffies;
                /* NB. The next check can trigger in some wrap-around cases,
                 * but that's ok: we'll just end up with a shorter timeout. */
-               if (delta < 1)
+               if (delta < 1) 
                        delta = 1;
-               st = processed_system_time + (delta * NS_PER_TICK);
+               st = processed_system_time + ((u64)delta * NS_PER_TICK);
        } while (read_seqretry(&xtime_lock, seq));
 
        return st;
@@ -816,7 +816,7 @@
 {
        unsigned int cpu = smp_processor_id();
        unsigned long j;
-
+       
        /* s390 does this /before/ checking rcu_pending(). We copy them. */
        cpu_set(cpu, nohz_cpu_mask);
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Mon Oct 10 13:46:53 2005
@@ -275,22 +275,23 @@
 {
        static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
        char *str;
+       struct xenbus_transaction *xbt;
        int err;
 
  again:
-       err = xenbus_transaction_start();
-       if (err)
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt))
                return;
-       str = (char *)xenbus_read("control", "shutdown", NULL);
+       str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
        /* Ignore read errors and empty reads. */
        if (XENBUS_IS_ERR_READ(str)) {
-               xenbus_transaction_end(1);
+               xenbus_transaction_end(xbt, 1);
                return;
        }
 
-       xenbus_write("control", "shutdown", "");
-
-       err = xenbus_transaction_end(0);
+       xenbus_write(xbt, "control", "shutdown", "");
+
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN) {
                kfree(str);
                goto again;
@@ -320,23 +321,24 @@
                          unsigned int len)
 {
        char sysrq_key = '\0';
+       struct xenbus_transaction *xbt;
        int err;
 
  again:
-       err = xenbus_transaction_start();
-       if (err)
+       xbt  = xenbus_transaction_start();
+       if (IS_ERR(xbt))
                return;
-       if (!xenbus_scanf("control", "sysrq", "%c", &sysrq_key)) {
+       if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
                printk(KERN_ERR "Unable to read sysrq code in "
                       "control/sysrq\n");
-               xenbus_transaction_end(1);
+               xenbus_transaction_end(xbt, 1);
                return;
        }
 
        if (sysrq_key != '\0')
-               xenbus_printf("control", "sysrq", "%c", '\0');
-
-       err = xenbus_transaction_end(0);
+               xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
 
@@ -360,9 +362,6 @@
 
 static struct notifier_block xenstore_notifier;
 
-/* Setup our watcher
-   NB: Assumes xenbus_lock is held!
-*/
 static int setup_shutdown_watcher(struct notifier_block *notifier,
                                   unsigned long event,
                                   void *data)
@@ -371,8 +370,6 @@
 #ifdef CONFIG_MAGIC_SYSRQ
        int err2 = 0;
 #endif
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
 
        err1 = register_xenbus_watch(&shutdown_watch);
 #ifdef CONFIG_MAGIC_SYSRQ
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Mon Oct 10 
13:46:53 2005
@@ -357,7 +357,7 @@
        unsigned long long new_target;
        int err;
 
-       err = xenbus_scanf("memory", "target", "%llu", &new_target);
+       err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
        if (err != 1) {
                printk(KERN_ERR "Unable to read memory/target\n");
                return;
@@ -370,16 +370,11 @@
     
 }
 
-/* Setup our watcher
-   NB: Assumes xenbus_lock is held!
-*/
 int balloon_init_watcher(struct notifier_block *notifier,
                          unsigned long event,
                          void *data)
 {
        int err;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
 
        err = register_xenbus_watch(&target_watch);
        if (err)
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Oct 10 13:46:53 2005
@@ -61,18 +61,19 @@
        unsigned long ring_ref;
        unsigned int evtchn;
        int err;
+       struct xenbus_transaction *xbt;
        struct backend_info *be
                = container_of(watch, struct backend_info, watch);
 
        /* If other end is gone, delete ourself. */
-       if (vec && !xenbus_exists(be->frontpath, "")) {
+       if (vec && !xenbus_exists(NULL, be->frontpath, "")) {
                device_unregister(&be->dev->dev);
                return;
        }
        if (be->blkif == NULL || be->blkif->status == CONNECTED)
                return;
 
-       err = xenbus_gather(be->frontpath, "ring-ref", "%lu", &ring_ref,
+       err = xenbus_gather(NULL, be->frontpath, "ring-ref", "%lu", &ring_ref,
                            "event-channel", "%u", &evtchn, NULL);
        if (err) {
                xenbus_dev_error(be->dev, err,
@@ -84,7 +85,8 @@
        /* Map the shared frame, irq etc. */
        err = blkif_map(be->blkif, ring_ref, evtchn);
        if (err) {
-               xenbus_dev_error(be->dev, err, "mapping ring-ref %lu port %u",
+               xenbus_dev_error(be->dev, err,
+                                "mapping ring-ref %lu port %u",
                                 ring_ref, evtchn);
                return;
        }
@@ -92,13 +94,13 @@
 
 again:
        /* Supply the information about the device the frontend needs */
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(be->dev, err, "starting transaction");
                return;
        }
 
-       err = xenbus_printf(be->dev->nodename, "sectors", "%lu",
+       err = xenbus_printf(xbt, be->dev->nodename, "sectors", "%lu",
                            vbd_size(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/sectors",
@@ -107,14 +109,14 @@
        }
 
        /* FIXME: use a typename instead */
-       err = xenbus_printf(be->dev->nodename, "info", "%u",
+       err = xenbus_printf(xbt, be->dev->nodename, "info", "%u",
                            vbd_info(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/info",
                                 be->dev->nodename);
                goto abort;
        }
-       err = xenbus_printf(be->dev->nodename, "sector-size", "%lu",
+       err = xenbus_printf(xbt, be->dev->nodename, "sector-size", "%lu",
                            vbd_secsize(&be->blkif->vbd));
        if (err) {
                xenbus_dev_error(be->dev, err, "writing %s/sector-size",
@@ -122,7 +124,7 @@
                goto abort;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
        if (err) {
@@ -136,7 +138,7 @@
        return;
 
  abort:
-       xenbus_transaction_end(1);
+       xenbus_transaction_end(xbt, 1);
 }
 
 /* 
@@ -154,7 +156,8 @@
                = container_of(watch, struct backend_info, backend_watch);
        struct xenbus_device *dev = be->dev;
 
-       err = xenbus_scanf(dev->nodename, "physical-device", "%li", &pdev);
+       err = xenbus_scanf(NULL, dev->nodename,
+                          "physical-device", "%li", &pdev);
        if (XENBUS_EXIST_ERR(err))
                return;
        if (err < 0) {
@@ -169,7 +172,7 @@
        be->pdev = pdev;
 
        /* If there's a read-only node, we're read only. */
-       p = xenbus_read(dev->nodename, "read-only", NULL);
+       p = xenbus_read(NULL, dev->nodename, "read-only", NULL);
        if (!IS_ERR(p)) {
                be->readonly = 1;
                kfree(p);
@@ -184,7 +187,8 @@
                if (IS_ERR(be->blkif)) {
                        err = PTR_ERR(be->blkif);
                        be->blkif = NULL;
-                       xenbus_dev_error(dev, err, "creating block interface");
+                       xenbus_dev_error(dev, err,
+                                        "creating block interface");
                        return;
                }
 
@@ -192,7 +196,8 @@
                if (err) {
                        blkif_put(be->blkif);
                        be->blkif = NULL;
-                       xenbus_dev_error(dev, err, "creating vbd structure");
+                       xenbus_dev_error(dev, err,
+                                        "creating vbd structure");
                        return;
                }
 
@@ -210,13 +215,14 @@
 
        be = kmalloc(sizeof(*be), GFP_KERNEL);
        if (!be) {
-               xenbus_dev_error(dev, -ENOMEM, "allocating backend structure");
+               xenbus_dev_error(dev, -ENOMEM,
+                                "allocating backend structure");
                return -ENOMEM;
        }
        memset(be, 0, sizeof(*be));
 
        frontend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "frontend-id", "%li", &be->frontend_id,
                            "frontend", NULL, &frontend,
                            NULL);
@@ -228,7 +234,7 @@
                                 dev->nodename);
                goto free_be;
        }
-       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+       if (strlen(frontend) == 0 || !xenbus_exists(NULL, frontend, "")) {
                /* If we can't get a frontend path and a frontend-id,
                 * then our bus-id is no longer valid and we need to
                 * destroy the backend device.
@@ -244,7 +250,8 @@
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
-               xenbus_dev_error(dev, err, "adding backend watch on %s",
+               xenbus_dev_error(dev, err,
+                                "adding backend watch on %s",
                                 dev->nodename);
                goto free_be;
        }
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Mon Oct 10 
13:46:53 2005
@@ -460,7 +460,7 @@
        if (info->connected == BLKIF_STATE_CONNECTED)
                return;
 
-       err = xenbus_gather(watch->node,
+       err = xenbus_gather(NULL, watch->node,
                            "sectors", "%lu", &sectors,
                            "info", "%u", &binfo,
                            "sector-size", "%lu", &sector_size,
@@ -532,10 +532,11 @@
 {
        char *backend;
        const char *message;
+       struct xenbus_transaction *xbt;
        int err;
 
        backend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "backend-id", "%i", &info->backend_id,
                            "backend", NULL, &backend,
                            NULL);
@@ -559,25 +560,26 @@
        }
 
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(dev, err, "starting transaction");
                goto destroy_blkring;
        }
 
-       err = xenbus_printf(dev->nodename, "ring-ref","%u", info->ring_ref);
+       err = xenbus_printf(xbt, dev->nodename,
+                           "ring-ref","%u", info->ring_ref);
        if (err) {
                message = "writing ring-ref";
                goto abort_transaction;
        }
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
                message = "writing event-channel";
                goto abort_transaction;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err) {
                if (err == -EAGAIN)
                        goto again;
@@ -598,8 +600,7 @@
        return 0;
 
  abort_transaction:
-       xenbus_transaction_end(1);
-       /* Have to do this *outside* transaction.  */
+       xenbus_transaction_end(xbt, 1);
        xenbus_dev_error(dev, err, "%s", message);
  destroy_blkring:
        blkif_free(info);
@@ -620,7 +621,8 @@
        struct blkfront_info *info;
 
        /* FIXME: Use dynamic device id if this is not set. */
-       err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
+       err = xenbus_scanf(NULL, dev->nodename,
+                          "virtual-device", "%i", &vdevice);
        if (XENBUS_EXIST_ERR(err))
                return err;
        if (err < 0) {
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Mon Oct 10 13:46:53 2005
@@ -160,7 +160,8 @@
 
        mi = ((major_info[index] != NULL) ? major_info[index] :
              xlbd_alloc_major_info(major, minor, index));
-       mi->usage++;
+       if (mi)
+               mi->usage++;
        return mi;
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Oct 10 13:46:53 2005
@@ -69,15 +69,15 @@
        int i;
 
        /* If other end is gone, delete ourself. */
-       if (vec && !xenbus_exists(be->frontpath, "")) {
-               xenbus_rm(be->dev->nodename, "");
+       if (vec && !xenbus_exists(NULL, be->frontpath, "")) {
+               xenbus_rm(NULL, be->dev->nodename, "");
                device_unregister(&be->dev->dev);
                return;
        }
        if (be->netif == NULL || be->netif->status == CONNECTED)
                return;
 
-       mac = xenbus_read(be->frontpath, "mac", NULL);
+       mac = xenbus_read(NULL, be->frontpath, "mac", NULL);
        if (IS_ERR(mac)) {
                err = PTR_ERR(mac);
                xenbus_dev_error(be->dev, err, "reading %s/mac",
@@ -98,7 +98,8 @@
        }
        kfree(mac);
 
-       err = xenbus_gather(be->frontpath, "tx-ring-ref", "%lu", &tx_ring_ref,
+       err = xenbus_gather(NULL, be->frontpath,
+                           "tx-ring-ref", "%lu", &tx_ring_ref,
                            "rx-ring-ref", "%lu", &rx_ring_ref,
                            "event-channel", "%u", &evtchn, NULL);
        if (err) {
@@ -137,7 +138,7 @@
        struct xenbus_device *dev = be->dev;
        u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
 
-       err = xenbus_scanf(dev->nodename, "handle", "%li", &handle);
+       err = xenbus_scanf(NULL, dev->nodename, "handle", "%li", &handle);
        if (XENBUS_EXIST_ERR(err))
                return;
        if (err < 0) {
@@ -188,7 +189,7 @@
 
        key = env_vars;
        while (*key != NULL) {
-               val = xenbus_read(xdev->nodename, *key, NULL);
+               val = xenbus_read(NULL, xdev->nodename, *key, NULL);
                if (!IS_ERR(val)) {
                        char buf[strlen(*key) + 4];
                        sprintf(buf, "%s=%%s", *key);
@@ -220,7 +221,7 @@
        memset(be, 0, sizeof(*be));
 
        frontend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "frontend-id", "%li", &be->frontend_id,
                            "frontend", NULL, &frontend,
                            NULL);
@@ -232,7 +233,7 @@
                                 dev->nodename);
                goto free_be;
        }
-       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+       if (strlen(frontend) == 0 || !xenbus_exists(NULL, frontend, "")) {
                /* If we can't get a frontend path and a frontend-id,
                 * then our bus-id is no longer valid and we need to
                 * destroy the backend device.
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Oct 10 
13:46:53 2005
@@ -1083,10 +1083,11 @@
 {
        char *backend, *mac, *e, *s;
        const char *message;
+       struct xenbus_transaction *xbt;
        int err, i;
 
        backend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "backend-id", "%i", &info->backend_id,
                            "backend", NULL, &backend,
                            NULL);
@@ -1102,7 +1103,7 @@
                goto out;
        }
 
-       mac = xenbus_read(dev->nodename, "mac", NULL);
+       mac = xenbus_read(NULL, dev->nodename, "mac", NULL);
        if (IS_ERR(mac)) {
                err = PTR_ERR(mac);
                xenbus_dev_error(dev, err, "reading %s/mac",
@@ -1131,32 +1132,32 @@
        }
 
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(dev, err, "starting transaction");
                goto destroy_ring;
        }
 
-       err = xenbus_printf(dev->nodename, "tx-ring-ref","%u",
+       err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
                            info->tx_ring_ref);
        if (err) {
                message = "writing tx ring-ref";
                goto abort_transaction;
        }
-       err = xenbus_printf(dev->nodename, "rx-ring-ref","%u",
+       err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
                            info->rx_ring_ref);
        if (err) {
                message = "writing rx ring-ref";
                goto abort_transaction;
        }
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", info->evtchn);
        if (err) {
                message = "writing event-channel";
                goto abort_transaction;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err) {
                if (err == -EAGAIN)
                        goto again;
@@ -1177,8 +1178,7 @@
        return 0;
 
  abort_transaction:
-       xenbus_transaction_end(1);
-       /* Have to do this *outside* transaction.  */
+       xenbus_transaction_end(xbt, 1);
        xenbus_dev_error(dev, err, "%s", message);
  destroy_ring:
        shutdown_device(info);
@@ -1201,7 +1201,7 @@
        struct netfront_info *info;
        unsigned int handle;
 
-       err = xenbus_scanf(dev->nodename, "handle", "%u", &handle);
+       err = xenbus_scanf(NULL, dev->nodename, "handle", "%u", &handle);
        if (XENBUS_EXIST_ERR(err))
                return err;
        if (err < 0) {
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Mon Oct 10 
13:46:53 2005
@@ -246,7 +246,10 @@
                                   PAGE_SHIFT);
                ret = xen_start_info->store_mfn;
 
-               /* We'll return then this will wait for daemon to answer */
+               /* 
+               ** Complete initialization of xenbus (viz. set up the 
+               ** connection to xenstored now that it has started). 
+               */
                kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
        }
        break;
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Mon Oct 10 13:46:53 2005
@@ -66,12 +66,13 @@
        unsigned int evtchn;
        unsigned long ready = 1;
        int err;
+       struct xenbus_transaction *xbt;
        struct backend_info *be
                = container_of(watch, struct backend_info, watch);
 
        /* If other end is gone, delete ourself. */
-       if (vec && !xenbus_exists(be->frontpath, "")) {
-               xenbus_rm(be->dev->nodename, "");
+       if (vec && !xenbus_exists(NULL, be->frontpath, "")) {
+               xenbus_rm(NULL, be->dev->nodename, "");
                device_unregister(&be->dev->dev);
                return;
        }
@@ -79,7 +80,7 @@
        if (be->tpmif == NULL || be->tpmif->status == CONNECTED)
                return;
 
-       err = xenbus_gather(be->frontpath,
+       err = xenbus_gather(NULL, be->frontpath,
                            "ring-ref", "%lu", &ringref,
                            "event-channel", "%u", &evtchn, NULL);
        if (err) {
@@ -115,20 +116,20 @@
         * unless something bad happens
         */
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(be->dev, err, "starting transaction");
                return;
        }
 
-       err = xenbus_printf(be->dev->nodename,
+       err = xenbus_printf(xbt, be->dev->nodename,
                            "ready", "%lu", ready);
        if (err) {
                xenbus_dev_error(be->dev, err, "writing 'ready'");
                goto abort;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
        if (err) {
@@ -139,7 +140,7 @@
        xenbus_dev_ok(be->dev);
        return;
 abort:
-       xenbus_transaction_end(1);
+       xenbus_transaction_end(xbt, 1);
 }
 
 
@@ -152,7 +153,7 @@
                = container_of(watch, struct backend_info, backend_watch);
        struct xenbus_device *dev = be->dev;
 
-       err = xenbus_scanf(dev->nodename, "instance", "%li", &instance);
+       err = xenbus_scanf(NULL, dev->nodename, "instance", "%li", &instance);
        if (XENBUS_EXIST_ERR(err))
                return;
        if (err < 0) {
@@ -205,7 +206,7 @@
        memset(be, 0, sizeof(*be));
 
        frontend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "frontend-id", "%li", &be->frontend_id,
                            "frontend", NULL, &frontend,
                            NULL);
@@ -217,7 +218,7 @@
                                 dev->nodename);
                goto free_be;
        }
-       if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
+       if (strlen(frontend) == 0 || !xenbus_exists(NULL, frontend, "")) {
                /* If we can't get a frontend path and a frontend-id,
                 * then our bus-id is no longer valid and we need to
                 * destroy the backend device.
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Mon Oct 10 
13:46:53 2005
@@ -226,7 +226,7 @@
        if (tp->connected)
                return;
 
-       err = xenbus_gather(watch->node,
+       err = xenbus_gather(NULL, watch->node,
                            "ready", "%lu", &ready,
                            NULL);
        if (err) {
@@ -311,9 +311,10 @@
        const char *message;
        int err;
        int backend_id;
+       struct xenbus_transaction *xbt;
 
        backend = NULL;
-       err = xenbus_gather(dev->nodename,
+       err = xenbus_gather(NULL, dev->nodename,
                            "backend-id", "%i", &backend_id,
                            "backend", NULL, &backend,
                            NULL);
@@ -339,27 +340,27 @@
        }
 
 again:
-       err = xenbus_transaction_start();
-       if (err) {
+       xbt = xenbus_transaction_start();
+       if (IS_ERR(xbt)) {
                xenbus_dev_error(dev, err, "starting transaction");
                goto destroy_tpmring;
        }
 
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "ring-ref","%u", info->ring_ref);
        if (err) {
                message = "writing ring-ref";
                goto abort_transaction;
        }
 
-       err = xenbus_printf(dev->nodename,
+       err = xenbus_printf(xbt, dev->nodename,
                            "event-channel", "%u", my_private.evtchn);
        if (err) {
                message = "writing event-channel";
                goto abort_transaction;
        }
 
-       err = xenbus_transaction_end(0);
+       err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
                goto again;
        if (err) {
@@ -380,8 +381,7 @@
        return 0;
 
 abort_transaction:
-       xenbus_transaction_end(1);
-       /* Have to do this *outside* transaction.  */
+       xenbus_transaction_end(xbt, 1);
        xenbus_dev_error(dev, err, "%s", message);
 destroy_tpmring:
        destroy_tpmring(info, &my_private);
@@ -399,7 +399,7 @@
        struct tpmfront_info *info;
        int handle;
 
-       err = xenbus_scanf(dev->nodename,
+       err = xenbus_scanf(NULL, dev->nodename,
                           "handle", "%i", &handle);
        if (XENBUS_EXIST_ERR(err))
                return err;
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Mon Oct 10 
13:46:53 2005
@@ -130,15 +130,10 @@
 
                wait_event(xb_waitq, output_avail(out));
 
-               /* Read, then check: not that we don't trust store.
-                * Hell, some of my best friends are daemons.  But,
-                * in this post-911 world... */
+               mb();
                h = *out;
-               mb();
-               if (!check_buffer(&h)) {
-                       set_current_state(TASK_RUNNING);
-                       return -EIO; /* ETERRORIST! */
-               }
+               if (!check_buffer(&h))
+                       return -EIO;
 
                dst = get_output_chunk(&h, out->buf, &avail);
                if (avail > len)
@@ -173,12 +168,11 @@
                const char *src;
 
                wait_event(xb_waitq, xs_input_avail());
+
+               mb();
                h = *in;
-               mb();
-               if (!check_buffer(&h)) {
-                       set_current_state(TASK_RUNNING);
+               if (!check_buffer(&h))
                        return -EIO;
-               }
 
                src = get_input_chunk(&h, in->buf, &avail);
                if (avail > len)
@@ -195,10 +189,6 @@
                        notify_remote_via_evtchn(xen_start_info->store_evtchn);
        }
 
-       /* If we left something, wake watch thread to deal with it. */
-       if (xs_input_avail())
-               wake_up(&xb_waitq);
-
        return 0;
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Mon Oct 10 
13:46:53 2005
@@ -46,85 +46,113 @@
 #include <asm/hypervisor.h>
 
 struct xenbus_dev_data {
-       /* Are there bytes left to be read in this message? */
-       int bytes_left;
-       /* Are we still waiting for the reply to a message we wrote? */
-       int awaiting_reply;
-       /* Buffer for outgoing messages. */
+       int in_transaction;
+
+       /* Partial request. */
        unsigned int len;
        union {
                struct xsd_sockmsg msg;
                char buffer[PAGE_SIZE];
        } u;
+
+       /* Response queue. */
+#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1))
+       char read_buffer[PAGE_SIZE];
+       unsigned int read_cons, read_prod;
+       wait_queue_head_t read_waitq;
 };
 
 static struct proc_dir_entry *xenbus_dev_intf;
 
-/* Reply can be long (dir, getperm): don't buffer, just examine
- * headers so we can discard rest if they die. */
 static ssize_t xenbus_dev_read(struct file *filp,
                               char __user *ubuf,
                               size_t len, loff_t *ppos)
 {
-       struct xenbus_dev_data *data = filp->private_data;
-       struct xsd_sockmsg msg;
-       int err;
-
-       /* Refill empty buffer? */
-       if (data->bytes_left == 0) {
-               if (len < sizeof(msg))
-                       return -EINVAL;
-
-               err = xb_read(&msg, sizeof(msg));
-               if (err)
-                       return err;
-               data->bytes_left = msg.len;
-               if (ubuf && copy_to_user(ubuf, &msg, sizeof(msg)) != 0)
-                       return -EFAULT;
-               /* We can receive spurious XS_WATCH_EVENT messages. */
-               if (msg.type != XS_WATCH_EVENT)
-                       data->awaiting_reply = 0;
-               return sizeof(msg);
+       struct xenbus_dev_data *u = filp->private_data;
+       int i;
+
+       if (wait_event_interruptible(u->read_waitq,
+                                    u->read_prod != u->read_cons))
+               return -EINTR;
+
+       for (i = 0; i < len; i++) {
+               if (u->read_cons == u->read_prod)
+                       break;
+               put_user(u->read_buffer[MASK_READ_IDX(u->read_cons)], ubuf+i);
+               u->read_cons++;
        }
 
-       /* Don't read over next header, or over temporary buffer. */
-       if (len > sizeof(data->u.buffer))
-               len = sizeof(data->u.buffer);
-       if (len > data->bytes_left)
-               len = data->bytes_left;
-
-       err = xb_read(data->u.buffer, len);
-       if (err)
-               return err;
-
-       data->bytes_left -= len;
-       if (ubuf && copy_to_user(ubuf, data->u.buffer, len) != 0)
-               return -EFAULT;
-       return len;
-}
-
-/* We do v. basic sanity checking so they don't screw up kernel later. */
+       return i;
+}
+
+static void queue_reply(struct xenbus_dev_data *u,
+                       char *data, unsigned int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++, u->read_prod++)
+               u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
+
+       BUG_ON((u->read_prod - u->read_cons) > sizeof(u->read_buffer));
+
+       wake_up(&u->read_waitq);
+}
+
 static ssize_t xenbus_dev_write(struct file *filp,
                                const char __user *ubuf,
                                size_t len, loff_t *ppos)
 {
-       struct xenbus_dev_data *data = filp->private_data;
-       int err;
-
-       /* We gather data in buffer until we're ready to send it. */
-       if (len > data->len + sizeof(data->u))
+       struct xenbus_dev_data *u = filp->private_data;
+       void *reply;
+       int err = 0;
+
+       if ((len + u->len) > sizeof(u->u.buffer))
                return -EINVAL;
-       if (copy_from_user(data->u.buffer + data->len, ubuf, len) != 0)
+
+       if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0)
                return -EFAULT;
-       data->len += len;
-       if (data->len >= sizeof(data->u.msg) + data->u.msg.len) {
-               err = xb_write(data->u.buffer, data->len);
-               if (err)
-                       return err;
-               data->len = 0;
-               data->awaiting_reply = 1;
+
+       u->len += len;
+       if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
+               return len;
+
+       switch (u->u.msg.type) {
+       case XS_TRANSACTION_START:
+       case XS_TRANSACTION_END:
+       case XS_DIRECTORY:
+       case XS_READ:
+       case XS_GET_PERMS:
+       case XS_RELEASE:
+       case XS_GET_DOMAIN_PATH:
+       case XS_WRITE:
+       case XS_MKDIR:
+       case XS_RM:
+       case XS_SET_PERMS:
+               reply = xenbus_dev_request_and_reply(&u->u.msg);
+               if (IS_ERR(reply))
+                       err = PTR_ERR(reply);
+               else {
+                       if (u->u.msg.type == XS_TRANSACTION_START)
+                               u->in_transaction = 1;
+                       if (u->u.msg.type == XS_TRANSACTION_END)
+                               u->in_transaction = 0;
+                       queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
+                       queue_reply(u, (char *)reply, u->u.msg.len);
+                       kfree(reply);
+               }
+               break;
+
+       default:
+               err = -EINVAL;
+               break;
        }
-       return len;
+
+       if (err == 0) {
+               u->len = 0;
+               err = len;
+       }
+
+       return err;
 }
 
 static int xenbus_dev_open(struct inode *inode, struct file *filp)
@@ -134,7 +162,6 @@
        if (xen_start_info->store_evtchn == 0)
                return -ENOENT;
 
-       /* Don't try seeking. */
        nonseekable_open(inode, filp);
 
        u = kmalloc(sizeof(*u), GFP_KERNEL);
@@ -142,28 +169,21 @@
                return -ENOMEM;
 
        memset(u, 0, sizeof(*u));
+       init_waitqueue_head(&u->read_waitq);
 
        filp->private_data = u;
 
-       down(&xenbus_lock);
-
        return 0;
 }
 
 static int xenbus_dev_release(struct inode *inode, struct file *filp)
 {
-       struct xenbus_dev_data *data = filp->private_data;
-
-       /* Discard any unread replies. */
-       while (data->bytes_left || data->awaiting_reply)
-               xenbus_dev_read(filp, NULL, sizeof(data->u.buffer), NULL);
-
-       /* Harmless if no transaction in progress. */
-       xenbus_transaction_end(1);
-
-       up(&xenbus_lock);
-
-       kfree(data);
+       struct xenbus_dev_data *u = filp->private_data;
+
+       if (u->in_transaction)
+               xenbus_transaction_end((struct xenbus_transaction *)1, 1);
+
+       kfree(u);
 
        return 0;
 }
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Mon Oct 10 
13:46:53 2005
@@ -43,6 +43,9 @@
 
 static struct notifier_block *xenstore_chain;
 
+/* Now used to protect xenbus probes against save/restore. */
+static DECLARE_MUTEX(xenbus_lock);
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -125,7 +128,7 @@
 
        devid = strrchr(nodename, '/') + 1;
 
-       err = xenbus_gather(nodename, "frontend-id", "%i", &domid,
+       err = xenbus_gather(NULL, nodename, "frontend-id", "%i", &domid,
                            "frontend", NULL, &frontend,
                            NULL);
        if (err)
@@ -133,7 +136,7 @@
        if (strlen(frontend) == 0)
                err = -ERANGE;
 
-       if (!err && !xenbus_exists(frontend, ""))
+       if (!err && !xenbus_exists(NULL, frontend, ""))
                err = -ENOENT;
 
        if (err) {
@@ -447,7 +450,7 @@
        if (!nodename)
                return -ENOMEM;
 
-       dir = xenbus_directory(nodename, "", &dir_n);
+       dir = xenbus_directory(NULL, nodename, "", &dir_n);
        if (IS_ERR(dir)) {
                kfree(nodename);
                return PTR_ERR(dir);
@@ -470,7 +473,7 @@
        unsigned int dir_n = 0;
        int i;
 
-       dir = xenbus_directory(bus->root, type, &dir_n);
+       dir = xenbus_directory(NULL, bus->root, type, &dir_n);
        if (IS_ERR(dir))
                return PTR_ERR(dir);
 
@@ -489,7 +492,7 @@
        char **dir;
        unsigned int i, dir_n;
 
-       dir = xenbus_directory(bus->root, "", &dir_n);
+       dir = xenbus_directory(NULL, bus->root, "", &dir_n);
        if (IS_ERR(dir))
                return PTR_ERR(dir);
 
@@ -535,7 +538,7 @@
        if (char_count(node, '/') < 2)
                return;
 
-       exists = xenbus_exists(node, "");
+       exists = xenbus_exists(NULL, node, "");
        if (!exists) {
                xenbus_cleanup_devices(node, &bus->bus);
                return;
@@ -625,12 +628,13 @@
        down(&xenbus_lock);
        bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
        bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+       xs_suspend();
 }
 
 void xenbus_resume(void)
 {
        xb_init_comms();
-       reregister_xenbus_watches();
+       xs_resume();
        bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
        bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
        up(&xenbus_lock);
@@ -662,12 +666,16 @@
 }
 EXPORT_SYMBOL(unregister_xenstore_notifier);
 
-/* called from a thread in privcmd/privcmd.c */
+/* 
+** Called either from below xenbus_probe_init() initcall (for domUs) 
+** or, for dom0, from a thread created in privcmd/privcmd.c (after 
+** the user-space tools have invoked initDomainStore()) 
+*/
 int do_xenbus_probe(void *unused)
 {
        int err = 0;
 
-       /* Initialize xenstore comms unless already done. */
+       /* Initialize the interface to xenstore. */
        err = xs_init();
        if (err) {
                printk("XENBUS: Error initializing xenstore comms:"
@@ -685,6 +693,7 @@
        /* Notify others that xenstore is up */
        notifier_call_chain(&xenstore_chain, 0, 0);
        up(&xenbus_lock);
+
        return 0;
 }
 
@@ -698,6 +707,10 @@
        device_register(&xenbus_frontend.dev);
        device_register(&xenbus_backend.dev);
 
+       /* 
+       ** Domain0 doesn't have a store_evtchn yet - this will
+       ** be set up later by xend invoking initDomainStore() 
+       */
        if (!xen_start_info->store_evtchn)
                return 0;
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Mon Oct 10 
13:42:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Mon Oct 10 
13:46:53 2005
@@ -42,11 +42,67 @@
 
 #define streq(a, b) (strcmp((a), (b)) == 0)
 
-static char printf_buffer[4096];
+struct xs_stored_msg {
+       struct xsd_sockmsg hdr;
+
+       union {
+               /* Stored replies. */
+               struct {
+                       struct list_head list;
+                       char *body;
+               } reply;
+
+               /* Queued watch callbacks. */
+               struct {
+                       struct work_struct work;
+                       struct xenbus_watch *handle;
+                       char **vec;
+                       unsigned int vec_size;
+               } watch;
+       } u;
+};
+
+struct xs_handle {
+       /* A list of replies. Currently only one will ever be outstanding. */
+       struct list_head reply_list;
+       spinlock_t reply_lock;
+       wait_queue_head_t reply_waitq;
+
+       /* One request at a time. */
+       struct semaphore request_mutex;
+
+       /* One transaction at a time. */
+       struct semaphore transaction_mutex;
+       int transaction_pid;
+};
+
+static struct xs_handle xs_state;
+
 static LIST_HEAD(watches);
-
-DECLARE_MUTEX(xenbus_lock);
-EXPORT_SYMBOL(xenbus_lock);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* Can wait on !xs_resuming for suspend/resume cycle to complete. */
+static int xs_resuming;
+static DECLARE_WAIT_QUEUE_HEAD(xs_resuming_waitq);
+
+static void request_mutex_acquire(void)
+{
+       /*
+        * We can't distinguish non-transactional from transactional
+        * requests right now. So temporarily acquire the transaction mutex
+        * if this task is outside transaction context.
+        */
+       if (xs_state.transaction_pid != current->pid)
+               down(&xs_state.transaction_mutex);
+       down(&xs_state.request_mutex);
+}
+
+static void request_mutex_release(void)
+{
+       up(&xs_state.request_mutex);
+       if (xs_state.transaction_pid != current->pid)
+               up(&xs_state.transaction_mutex);
+}
 
 static int get_error(const char *errorstring)
 {
@@ -65,29 +121,32 @@
 
 static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
 {
-       struct xsd_sockmsg msg;
-       void *ret;
-       int err;
-
-       err = xb_read(&msg, sizeof(msg));
-       if (err)
-               return ERR_PTR(err);
-
-       ret = kmalloc(msg.len + 1, GFP_KERNEL);
-       if (!ret)
-               return ERR_PTR(-ENOMEM);
-
-       err = xb_read(ret, msg.len);
-       if (err) {
-               kfree(ret);
-               return ERR_PTR(err);
-       }
-       ((char*)ret)[msg.len] = '\0';
-
-       *type = msg.type;
+       struct xs_stored_msg *msg;
+       char *body;
+
+       spin_lock(&xs_state.reply_lock);
+
+       while (list_empty(&xs_state.reply_list)) {
+               spin_unlock(&xs_state.reply_lock);
+               wait_event(xs_state.reply_waitq,
+                          !list_empty(&xs_state.reply_list));
+               spin_lock(&xs_state.reply_lock);
+       }
+
+       msg = list_entry(xs_state.reply_list.next,
+                        struct xs_stored_msg, u.reply.list);
+       list_del(&msg->u.reply.list);
+
+       spin_unlock(&xs_state.reply_lock);
+
+       *type = msg->hdr.type;
        if (len)
-               *len = msg.len;
-       return ret;
+               *len = msg->hdr.len;
+       body = msg->u.reply.body;
+
+       kfree(msg);
+
+       return body;
 }
 
 /* Emergency write. */
@@ -98,10 +157,45 @@
        msg.type = XS_DEBUG;
        msg.len = sizeof("print") + count + 1;
 
+       request_mutex_acquire();
        xb_write(&msg, sizeof(msg));
        xb_write("print", sizeof("print"));
        xb_write(str, count);
        xb_write("", 1);
+       request_mutex_release();
+}
+
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+{
+       void *ret;
+       struct xsd_sockmsg req_msg = *msg;
+       int err;
+
+       if (req_msg.type == XS_TRANSACTION_START) {
+               down(&xs_state.transaction_mutex);
+               xs_state.transaction_pid = current->pid;
+       }
+
+       request_mutex_acquire();
+
+       err = xb_write(msg, sizeof(*msg) + msg->len);
+       if (err) {
+               msg->type = XS_ERROR;
+               ret = ERR_PTR(err);
+       } else {
+               ret = read_reply(&msg->type, &msg->len);
+       }
+
+       request_mutex_release();
+
+       if ((msg->type == XS_TRANSACTION_END) ||
+           ((req_msg.type == XS_TRANSACTION_START) &&
+            (msg->type == XS_ERROR))) {
+               xs_state.transaction_pid = -1;
+               up(&xs_state.transaction_mutex);
+       }
+
+       return ret;
 }
 
 /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
@@ -115,31 +209,33 @@
        unsigned int i;
        int err;
 
-       WARN_ON(down_trylock(&xenbus_lock) == 0);
-
        msg.type = type;
        msg.len = 0;
        for (i = 0; i < num_vecs; i++)
                msg.len += iovec[i].iov_len;
 
+       request_mutex_acquire();
+
        err = xb_write(&msg, sizeof(msg));
-       if (err)
+       if (err) {
+               up(&xs_state.request_mutex);
                return ERR_PTR(err);
+       }
 
        for (i = 0; i < num_vecs; i++) {
                err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
-               if (err)
+               if (err) {
+                       request_mutex_release();
                        return ERR_PTR(err);
-       }
-
-       /* Watches can have fired before reply comes: daemon detects
-        * and re-transmits, so we can ignore this. */
-       do {
-               kfree(ret);
-               ret = read_reply(&msg.type, len);
-               if (IS_ERR(ret))
-                       return ret;
-       } while (msg.type == XS_WATCH_EVENT);
+               }
+       }
+
+       ret = read_reply(&msg.type, len);
+
+       request_mutex_release();
+
+       if (IS_ERR(ret))
+               return ret;
 
        if (msg.type == XS_ERROR) {
                err = get_error(ret);
@@ -187,8 +283,6 @@
 {
        static char buffer[4096];
 
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
-       /* XXX FIXME: might not be correct if name == "" */
        BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
 
        strcpy(buffer, dir);
@@ -207,7 +301,7 @@
        *num = count_strings(strings, len);
 
        /* Transfer to one big alloc for easy freeing. */
-       ret = kmalloc(*num * sizeof(char *) + len, GFP_ATOMIC);
+       ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
        if (!ret) {
                kfree(strings);
                return ERR_PTR(-ENOMEM);
@@ -222,7 +316,8 @@
        return ret;
 }
 
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num)
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num)
 {
        char *strings;
        unsigned int len;
@@ -236,12 +331,13 @@
 EXPORT_SYMBOL(xenbus_directory);
 
 /* Check if a path exists. Return 1 if it does. */
-int xenbus_exists(const char *dir, const char *node)
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node)
 {
        char **d;
        int dir_n;
 
-       d = xenbus_directory(dir, node, &dir_n);
+       d = xenbus_directory(t, dir, node, &dir_n);
        if (IS_ERR(d))
                return 0;
        kfree(d);
@@ -253,7 +349,8 @@
  * Returns a kmalloced value: call free() on it after use.
  * len indicates length in bytes.
  */
-void *xenbus_read(const char *dir, const char *node, unsigned int *len)
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len)
 {
        return xs_single(XS_READ, join(dir, node), len);
 }
@@ -262,7 +359,8 @@
 /* Write the value of a single file.
  * Returns -err on failure.
  */
-int xenbus_write(const char *dir, const char *node, const char *string)
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string)
 {
        const char *path;
        struct kvec iovec[2];
@@ -279,14 +377,15 @@
 EXPORT_SYMBOL(xenbus_write);
 
 /* Create a new directory. */
-int xenbus_mkdir(const char *dir, const char *node)
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node)
 {
        return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL));
 }
 EXPORT_SYMBOL(xenbus_mkdir);
 
 /* Destroy a file or directory (directories must be empty). */
-int xenbus_rm(const char *dir, const char *node)
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node)
 {
        return xs_error(xs_single(XS_RM, join(dir, node), NULL));
 }
@@ -294,37 +393,57 @@
 
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
- * You can only have one transaction at any time.
  */
-int xenbus_transaction_start(void)
-{
-       return xs_error(xs_single(XS_TRANSACTION_START, "", NULL));
+struct xenbus_transaction *xenbus_transaction_start(void)
+{
+       int err;
+
+       down(&xs_state.transaction_mutex);
+       xs_state.transaction_pid = current->pid;
+
+       err = xs_error(xs_single(XS_TRANSACTION_START, "", NULL));
+       if (err) {
+               xs_state.transaction_pid = -1;
+               up(&xs_state.transaction_mutex);
+       }
+
+       return err ? ERR_PTR(err) : (struct xenbus_transaction *)1;
 }
 EXPORT_SYMBOL(xenbus_transaction_start);
 
 /* End a transaction.
  * If abandon is true, transaction is discarded instead of committed.
  */
-int xenbus_transaction_end(int abort)
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
 {
        char abortstr[2];
+       int err;
+
+       BUG_ON(t == NULL);
 
        if (abort)
                strcpy(abortstr, "F");
        else
                strcpy(abortstr, "T");
-       return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
+
+       err = xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
+
+       xs_state.transaction_pid = -1;
+       up(&xs_state.transaction_mutex);
+
+       return err;
 }
 EXPORT_SYMBOL(xenbus_transaction_end);
 
 /* Single read and scanf: returns -errno or num scanned. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
        char *val;
 
-       val = xenbus_read(dir, node, NULL);
+       val = xenbus_read(t, dir, node, NULL);
        if (IS_ERR(val))
                return PTR_ERR(val);
 
@@ -340,18 +459,28 @@
 EXPORT_SYMBOL(xenbus_scanf);
 
 /* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
+#define PRINTF_BUFFER_SIZE 4096
+       char *printf_buffer;
+
+       printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+       if (printf_buffer == NULL)
+               return -ENOMEM;
+
        va_start(ap, fmt);
-       ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap);
+       ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
        va_end(ap);
 
-       BUG_ON(ret > sizeof(printf_buffer)-1);
-       return xenbus_write(dir, node, printf_buffer);
+       BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+       ret = xenbus_write(t, dir, node, printf_buffer);
+
+       kfree(printf_buffer);
+
+       return ret;
 }
 EXPORT_SYMBOL(xenbus_printf);
 
@@ -361,19 +490,28 @@
        va_list ap;
        int ret;
        unsigned int len;
-
-       BUG_ON(down_trylock(&xenbus_lock) == 0);
+       char *printf_buffer;
+
+       printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+       if (printf_buffer == NULL)
+               goto fail;
 
        len = sprintf(printf_buffer, "%i ", -err);
        va_start(ap, fmt);
-       ret = vsnprintf(printf_buffer+len, sizeof(printf_buffer)-len, fmt, ap);
+       ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
        va_end(ap);
 
-       BUG_ON(len + ret > sizeof(printf_buffer)-1);
+       BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
        dev->has_error = 1;
-       if (xenbus_write(dev->nodename, "error", printf_buffer) != 0)
-               printk("xenbus: failed to write error node for %s (%s)\n",
-                      dev->nodename, printf_buffer);
+       if (xenbus_write(NULL, dev->nodename, "error", printf_buffer) != 0)
+               goto fail;
+
+       kfree(printf_buffer);
+       return;
+
+ fail:
+       printk("xenbus: failed to write error node for %s (%s)\n",
+              dev->nodename, printf_buffer);
 }
 EXPORT_SYMBOL(xenbus_dev_error);
 
@@ -381,7 +519,7 @@
 void xenbus_dev_ok(struct xenbus_device *dev)
 {
        if (dev->has_error) {
-               if (xenbus_rm(dev->nodename, "error") != 0)
+               if (xenbus_rm(NULL, dev->nodename, "error") != 0)
                        printk("xenbus: failed to clear error node for %s\n",
                               dev->nodename);
                else
@@ -391,7 +529,7 @@
 EXPORT_SYMBOL(xenbus_dev_ok);
        
 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xenbus_gather(const char *dir, ...)
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...)
 {
        va_list ap;
        const char *name;
@@ -403,7 +541,7 @@
                void *result = va_arg(ap, void *);
                char *p;
 
-               p = xenbus_read(dir, name, NULL);
+               p = xenbus_read(t, dir, name, NULL);
                if (IS_ERR(p)) {
                        ret = PTR_ERR(p);
                        break;
@@ -432,26 +570,6 @@
        return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
 }
 
-static char **xs_read_watch(unsigned int *num)
-{
-       enum xsd_sockmsg_type type;
-       char *strings;
-       unsigned int len;
-
-       strings = read_reply(&type, &len);
-       if (IS_ERR(strings))
-               return (char **)strings;
-
-       BUG_ON(type != XS_WATCH_EVENT);
-
-       return split(strings, len, num);
-}
-
-static int xs_acknowledge_watch(const char *token)
-{
-       return xs_error(xs_single(XS_WATCH_ACK, token, NULL));
-}
-
 static int xs_unwatch(const char *path, const char *token)
 {
        struct kvec iov[2];
@@ -464,7 +582,6 @@
        return xs_error(xs_talkv(XS_UNWATCH, iov, ARRAY_SIZE(iov), NULL));
 }
 
-/* A little paranoia: we don't just trust token. */
 static struct xenbus_watch *find_watch(const char *token)
 {
        struct xenbus_watch *i, *cmp;
@@ -474,6 +591,7 @@
        list_for_each_entry(i, &watches, list)
                if (i == cmp)
                        return i;
+
        return NULL;
 }
 
@@ -485,11 +603,20 @@
        int err;
 
        sprintf(token, "%lX", (long)watch);
+
+       spin_lock(&watches_lock);
        BUG_ON(find_watch(token));
+       spin_unlock(&watches_lock);
 
        err = xs_watch(watch->node, token);
-       if (!err)
+
+       /* Ignore errors due to multiple registration. */
+       if ((err == 0) || (err == -EEXIST)) {
+               spin_lock(&watches_lock);
                list_add(&watch->list, &watches);
+               spin_unlock(&watches_lock);
+       }
+
        return err;
 }
 EXPORT_SYMBOL(register_xenbus_watch);
@@ -500,77 +627,192 @@
        int err;
 
        sprintf(token, "%lX", (long)watch);
+
+       spin_lock(&watches_lock);
        BUG_ON(!find_watch(token));
+       list_del(&watch->list);
+       spin_unlock(&watches_lock);
+
+       /* Ensure xs_resume() is not in progress (see comments there). */
+       wait_event(xs_resuming_waitq, !xs_resuming);
 
        err = xs_unwatch(watch->node, token);
-       list_del(&watch->list);
-
        if (err)
                printk(KERN_WARNING
                       "XENBUS Failed to release watch %s: %i\n",
                       watch->node, err);
+
+       /* Make sure watch is not in use. */
+       flush_scheduled_work();
 }
 EXPORT_SYMBOL(unregister_xenbus_watch);
 
-/* Re-register callbacks to all watches. */
-void reregister_xenbus_watches(void)
-{
+void xs_suspend(void)
+{
+       down(&xs_state.transaction_mutex);
+       down(&xs_state.request_mutex);
+}
+
+void xs_resume(void)
+{
+       struct list_head *ent, *prev_ent = &watches;
        struct xenbus_watch *watch;
        char token[sizeof(watch) * 2 + 1];
 
-       list_for_each_entry(watch, &watches, list) {
-               sprintf(token, "%lX", (long)watch);
-               xs_watch(watch->node, token);
-       }
-}
-
-static int watch_thread(void *unused)
-{
+       /* Protect against concurrent unregistration and freeing of watches. */
+       BUG_ON(xs_resuming);
+       xs_resuming = 1;
+
+       up(&xs_state.request_mutex);
+       up(&xs_state.transaction_mutex);
+
+       /*
+        * Iterate over the watch list re-registering each node. We must
+        * be careful about concurrent registrations and unregistrations.
+        * We search for the node immediately following the previously
+        * re-registered node. If we get no match then either we are done
+        * (previous node is last in list) or the node was unregistered, in
+        * which case we restart from the beginning of the list.
+        * register_xenbus_watch() + unregister_xenbus_watch() is safe because
+        * it will only ever move a watch node earlier in the list, so it
+        * cannot cause us to skip nodes.
+        */
        for (;;) {
-               char **vec = NULL;
-               unsigned int num;
-
-               wait_event(xb_waitq, xs_input_avail());
-
-               /* If this is a spurious wakeup caused by someone
-                * doing an op, they'll hold the lock and the buffer
-                * will be empty by the time we get there.               
-                */
-               down(&xenbus_lock);
-               if (xs_input_avail())
-                       vec = xs_read_watch(&num);
-
-               if (vec && !IS_ERR(vec)) {
-                       struct xenbus_watch *w;
-                       int err;
-
-                       err = xs_acknowledge_watch(vec[XS_WATCH_TOKEN]);
-                       if (err)
-                               printk(KERN_WARNING "XENBUS ack %s fail %i\n",
-                                      vec[XS_WATCH_TOKEN], err);
-                       w = find_watch(vec[XS_WATCH_TOKEN]);
-                       BUG_ON(!w);
-                       w->callback(w, (const char **)vec, num);
-                       kfree(vec);
-               } else if (vec)
-                       printk(KERN_WARNING "XENBUS xs_read_watch: %li\n",
-                              PTR_ERR(vec));
-               up(&xenbus_lock);
-       }
-}
-
+               spin_lock(&watches_lock);
+               list_for_each(ent, &watches)
+                       if (ent->prev == prev_ent)
+                               break;
+               spin_unlock(&watches_lock);
+
+               /* No match because prev_ent is at the end of the list? */
+               if ((ent == &watches) && (watches.prev == prev_ent))
+                        break; /* We're done! */
+
+               if ((prev_ent = ent) != &watches) {
+                       /*
+                        * Safe even with watch_lock not held. We are saved by
+                        * (xs_resumed==1) check in unregister_xenbus_watch.
+                        */
+                       watch = list_entry(ent, struct xenbus_watch, list);
+                       sprintf(token, "%lX", (long)watch);
+                       xs_watch(watch->node, token);
+               }
+       }
+
+       xs_resuming = 0;
+       wake_up(&xs_resuming_waitq);
+}
+
+static void xenbus_fire_watch(void *arg)
+{
+       struct xs_stored_msg *msg = arg;
+
+       msg->u.watch.handle->callback(msg->u.watch.handle,
+                                     (const char **)msg->u.watch.vec,
+                                     msg->u.watch.vec_size);
+
+       kfree(msg->u.watch.vec);
+       kfree(msg);
+}
+
+static int process_msg(void)
+{
+       struct xs_stored_msg *msg;
+       char *body;
+       int err;
+
+       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+       if (msg == NULL)
+               return -ENOMEM;
+
+       err = xb_read(&msg->hdr, sizeof(msg->hdr));
+       if (err) {
+               kfree(msg);
+               return err;
+       }
+
+       body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
+       if (body == NULL) {
+               kfree(msg);
+               return -ENOMEM;
+       }
+
+       err = xb_read(body, msg->hdr.len);
+       if (err) {
+               kfree(body);
+               kfree(msg);
+               return err;
+       }
+       body[msg->hdr.len] = '\0';
+
+       if (msg->hdr.type == XS_WATCH_EVENT) {
+               INIT_WORK(&msg->u.watch.work, xenbus_fire_watch, msg);
+
+               msg->u.watch.vec = split(body, msg->hdr.len,
+                                        &msg->u.watch.vec_size);
+               if (IS_ERR(msg->u.watch.vec)) {
+                       kfree(msg);
+                       return PTR_ERR(msg->u.watch.vec);
+               }
+
+               spin_lock(&watches_lock);
+               msg->u.watch.handle = find_watch(
+                       msg->u.watch.vec[XS_WATCH_TOKEN]);
+               if (msg->u.watch.handle != NULL) {
+                       schedule_work(&msg->u.watch.work);
+               } else {
+                       kfree(msg->u.watch.vec);
+                       kfree(msg);
+               }
+               spin_unlock(&watches_lock);
+       } else {
+               msg->u.reply.body = body;
+               spin_lock(&xs_state.reply_lock);
+               list_add_tail(&msg->u.reply.list, &xs_state.reply_list);
+               spin_unlock(&xs_state.reply_lock);
+               wake_up(&xs_state.reply_waitq);
+       }
+
+       return 0;
+}
+
+static int read_thread(void *unused)
+{
+       int err;
+
+       for (;;) {
+               err = process_msg();
+               if (err)
+                       printk(KERN_WARNING "XENBUS error %d while reading "
+                              "message\n", err);
+       }
+}
+
+/*
+** Initialize the interface to xenstore. 
+*/
 int xs_init(void)
 {
        int err;
-       struct task_struct *watcher;
-
+       struct task_struct *reader;
+
+       INIT_LIST_HEAD(&xs_state.reply_list);
+       spin_lock_init(&xs_state.reply_lock);
+       init_waitqueue_head(&xs_state.reply_waitq);
+
+       init_MUTEX(&xs_state.request_mutex);
+       init_MUTEX(&xs_state.transaction_mutex);
+       xs_state.transaction_pid = -1;
+
+       /* Initialize the shared memory rings to talk to xenstored */
        err = xb_init_comms();
        if (err)
                return err;
        
-       watcher = kthread_run(watch_thread, NULL, "kxbwatch");
-       if (IS_ERR(watcher))
-               return PTR_ERR(watcher);
+       reader = kthread_run(read_thread, NULL, "xenbusd");
+       if (IS_ERR(reader))
+               return PTR_ERR(reader);
+
        return 0;
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Mon Oct 10 13:42:38 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Mon Oct 10 13:46:53 2005
@@ -78,30 +78,35 @@
 int xenbus_register_backend(struct xenbus_driver *drv);
 void xenbus_unregister_driver(struct xenbus_driver *drv);
 
-/* Caller must hold this lock to call these functions: it's also held
- * across watch callbacks. */
-extern struct semaphore xenbus_lock;
+struct xenbus_transaction;
 
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
-void *xenbus_read(const char *dir, const char *node, unsigned int *len);
-int xenbus_write(const char *dir, const char *node, const char *string);
-int xenbus_mkdir(const char *dir, const char *node);
-int xenbus_exists(const char *dir, const char *node);
-int xenbus_rm(const char *dir, const char *node);
-int xenbus_transaction_start(void);
-int xenbus_transaction_end(int abort);
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num);
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len);
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string);
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node);
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node);
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node);
+struct xenbus_transaction *xenbus_transaction_start(void);
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort);
 
 /* Single read and scanf: returns -errno or num scanned if > 0. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(scanf, 3, 4)));
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(scanf, 4, 5)));
 
 /* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(printf, 3, 4)));
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(printf, 4, 5)));
 
 /* Generic read function: NULL-terminated triples of name,
  * sprintf-style type string, and pointer. Returns 0 or errno.*/
-int xenbus_gather(const char *dir, ...);
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...);
 
 /* Report a (negative) errno into the store, with explanation. */
 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,...);
@@ -113,7 +118,11 @@
 struct xenbus_watch
 {
        struct list_head list;
+
+       /* Path being watched. */
        char *node;
+
+       /* Callback (executed in a process context with no locks held). */
        void (*callback)(struct xenbus_watch *,
                         const char **vec, unsigned int len);
 };
@@ -124,7 +133,11 @@
 
 int register_xenbus_watch(struct xenbus_watch *watch);
 void unregister_xenbus_watch(struct xenbus_watch *watch);
-void reregister_xenbus_watches(void);
+void xs_suspend(void);
+void xs_resume(void);
+
+/* Used by xenbus_dev to borrow kernel's store connection. */
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
 
 /* Called from xen core code. */
 void xenbus_suspend(void);
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c     Mon Oct 10 13:42:38 2005
+++ b/tools/blktap/xenbus.c     Mon Oct 10 13:46:53 2005
@@ -260,10 +260,6 @@
     node  = res[XS_WATCH_PATH];
     token = res[XS_WATCH_TOKEN];
 
-    er = xs_acknowledge_watch(h, token);
-    if (er == 0)
-        warn("Couldn't acknowledge watch (%s)", token);
-
     w = find_watch(token);
     if (!w)
     {
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/console/client/main.c
--- a/tools/console/client/main.c       Mon Oct 10 13:42:38 2005
+++ b/tools/console/client/main.c       Mon Oct 10 13:46:53 2005
@@ -220,7 +220,7 @@
        if (path == NULL)
                err(ENOMEM, "realloc");
        strcat(path, "/console/tty");
-       str_pty = xs_read(xs, path, &len);
+       str_pty = xs_read(xs, NULL, path, &len);
 
        /* FIXME consoled currently does not assume domain-0 doesn't have a
           console which is good when we break domain-0 up.  To keep us
@@ -245,7 +245,7 @@
                struct timeval tv = { 0, 500 };
                select(0, NULL, NULL, NULL, &tv); /* pause briefly */
 
-               str_pty = xs_read(xs, path, &len);
+               str_pty = xs_read(xs, NULL, path, &len);
        }
 
        if (str_pty == NULL) {
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Mon Oct 10 13:42:38 2005
+++ b/tools/console/daemon/io.c Mon Oct 10 13:46:53 2005
@@ -179,7 +179,7 @@
                success = asprintf(&path, "%s/tty", dom->conspath) != -1;
                if (!success)
                        goto out;
-               success = xs_write(xs, path, slave, strlen(slave));
+               success = xs_write(xs, NULL, path, slave, strlen(slave));
                free(path);
                if (!success)
                        goto out;
@@ -187,7 +187,7 @@
                success = asprintf(&path, "%s/limit", dom->conspath) != -1;
                if (!success)
                        goto out;
-               data = xs_read(xs, path, &len);
+               data = xs_read(xs, NULL, path, &len);
                if (data) {
                        dom->buffer.max_capacity = strtoul(data, 0, 0);
                        free(data);
@@ -216,7 +216,7 @@
                char *p;
 
                asprintf(&path, "%s/%s", dir, name);
-               p = xs_read(xs, path, NULL);
+               p = xs_read(xs, NULL, path, NULL);
                free(path);
                if (p == NULL) {
                        ret = ENOENT;
@@ -505,7 +505,6 @@
                        domain_create_ring(dom);
        }
 
-       xs_acknowledge_watch(xs, vec[1]);
        free(vec);
 }
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Mon Oct 10 13:42:38 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Mon Oct 10 13:46:53 2005
@@ -80,8 +80,8 @@
 
 static PyObject *xspy_read(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
@@ -89,13 +89,19 @@
     unsigned int xsval_n = 0;
     PyObject *val = NULL;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_read(xh, path, &xsval_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_read(xh, th, path, &xsval_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         if (errno == ENOENT) {
@@ -123,8 +129,8 @@
 
 static PyObject *xspy_write(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", "data", NULL };
-    static char *arg_spec = "ss#";
+    static char *kwd_spec[] = { "transaction", "path", "data", NULL };
+    static char *arg_spec = "sss#";
     char *path = NULL;
     char *data = NULL;
     int data_n = 0;
@@ -133,13 +139,19 @@
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path, &data, &data_n))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_write(xh, path, data, data_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path, &data, &data_n))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_write(xh, th, path, data, data_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -162,8 +174,8 @@
 
 static PyObject *xspy_ls(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
@@ -172,12 +184,20 @@
     unsigned int xsval_n = 0;
     int i;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_directory(xh, path, &xsval_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_directory(xh, th, path, &xsval_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         if (errno == ENOENT) {
@@ -205,20 +225,27 @@
 
 static PyObject *xspy_mkdir(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_mkdir(xh, path);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_mkdir(xh, th, path);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -240,20 +267,27 @@
 
 static PyObject *xspy_rm(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_rm(xh, path);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_rm(xh, th, path);
     Py_END_ALLOW_THREADS
     if (!xsval && errno != ENOENT) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -276,8 +310,8 @@
 static PyObject *xspy_get_permissions(PyObject *self, PyObject *args,
                                       PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", NULL };
-    static char *arg_spec = "s|";
+    static char *kwd_spec[] = { "transaction", "path", NULL };
+    static char *arg_spec = "ss";
     char *path = NULL;
 
     struct xs_handle *xh = xshandle(self);
@@ -286,12 +320,19 @@
     unsigned int perms_n = 0;
     int i;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    perms = xs_get_permissions(xh, path, &perms_n);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    perms = xs_get_permissions(xh, th, path, &perms_n);
     Py_END_ALLOW_THREADS
     if (!perms) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -321,8 +362,8 @@
 static PyObject *xspy_set_permissions(PyObject *self, PyObject *args,
                                       PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", "perms", NULL };
-    static char *arg_spec = "sO";
+    static char *kwd_spec[] = { "transaction", "path", "perms", NULL };
+    static char *arg_spec = "ssO";
     char *path = NULL;
     PyObject *perms = NULL;
     static char *perm_names[] = { "dom", "read", "write", NULL };
@@ -335,11 +376,17 @@
     PyObject *tuple0 = NULL;
     PyObject *val = NULL;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path, &perms))
-        goto exit;
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &path, &perms))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
     if (!PyList_Check(perms)) {
         PyErr_SetString(PyExc_RuntimeError, "perms must be a list");
         goto exit;
@@ -369,7 +416,7 @@
             xsperms[i].perms |= XS_PERM_WRITE;
     }
     Py_BEGIN_ALLOW_THREADS
-    xsval = xs_set_permissions(xh, path, xsperms, xsperms_n);
+    xsval = xs_set_permissions(xh, th, path, xsperms, xsperms_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -442,9 +489,6 @@
 
 #define xspy_read_watch_doc "\n"                               \
        "Read a watch notification.\n"                          \
-       "The notification must be acknowledged by passing\n"    \
-       "the token to acknowledge_watch().\n"                   \
-       " path [string]: xenstore path.\n"                      \
        "\n"                                                    \
        "Returns: [tuple] (path, token).\n"                     \
        "Raises RuntimeError on error.\n"                       \
@@ -492,44 +536,6 @@
  exit:
     if (xsval)
         free(xsval);
-    return val;
-}
-
-#define xspy_acknowledge_watch_doc "\n"                                        
\
-       "Acknowledge a watch notification that has been read.\n"        \
-       " token [string] : from the watch notification\n"               \
-       "\n"                                                            \
-       "Returns None on success.\n"                                    \
-       "Raises RuntimeError on error.\n"                               \
-       "\n"
-
-static PyObject *xspy_acknowledge_watch(PyObject *self, PyObject *args,
-                                        PyObject *kwds)
-{
-    static char *kwd_spec[] = { "token", NULL };
-    static char *arg_spec = "O";
-    PyObject *token;
-    char token_str[MAX_STRLEN(unsigned long) + 1];
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int xsval = 0;
-
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &token))
-        goto exit;
-    sprintf(token_str, "%li", (unsigned long)token);
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_acknowledge_watch(xh, token_str);
-    Py_END_ALLOW_THREADS
-    if (!xsval) {
-        PyErr_SetFromErrno(PyExc_RuntimeError);
-        goto exit;
-    }
-    Py_INCREF(Py_None);
-    val = Py_None;
- exit:
     return val;
 }
 
@@ -584,9 +590,8 @@
 
 #define xspy_transaction_start_doc "\n"                                \
        "Start a transaction.\n"                                \
-       "Only one transaction can be active at a time.\n"       \
        "\n"                                                    \
-       "Returns None on success.\n"                            \
+       "Returns transaction handle on success.\n"              \
        "Raises RuntimeError on error.\n"                       \
        "\n"
 
@@ -599,21 +604,23 @@
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
-    int xsval = 0;
+    struct xs_transaction_handle *th;
+    char thstr[20];
 
     if (!xh)
         goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path))
         goto exit;
     Py_BEGIN_ALLOW_THREADS
-    xsval = xs_transaction_start(xh);
-    Py_END_ALLOW_THREADS
-    if (!xsval) {
-        PyErr_SetFromErrno(PyExc_RuntimeError);
-        goto exit;
-    }
-    Py_INCREF(Py_None);
-    val = Py_None;
+    th = xs_transaction_start(xh);
+    Py_END_ALLOW_THREADS
+    if (th == NULL) {
+        PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+
+    sprintf(thstr, "%lX", (unsigned long)th);
+    val = PyString_FromString(thstr);
  exit:
     return val;
 }
@@ -630,20 +637,27 @@
 static PyObject *xspy_transaction_end(PyObject *self, PyObject *args,
                                       PyObject *kwds)
 {
-    static char *kwd_spec[] = { "abort", NULL };
-    static char *arg_spec = "|i";
+    static char *kwd_spec[] = { "transaction", "abort", NULL };
+    static char *arg_spec = "s|i";
     int abort = 0;
 
     struct xs_handle *xh = xshandle(self);
     PyObject *val = NULL;
     int xsval = 0;
 
-    if (!xh)
-        goto exit;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &abort))
-        goto exit;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_transaction_end(xh, abort);
+    struct xs_transaction_handle *th;
+    char *thstr;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &thstr, &abort))
+        goto exit;
+
+    th = (struct xs_transaction_handle *)strtoul(thstr, NULL, 16);
+
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_transaction_end(xh, th, abort);
     Py_END_ALLOW_THREADS
     if (!xsval) {
        if (errno == EAGAIN) {
@@ -833,7 +847,6 @@
      XSPY_METH(set_permissions),
      XSPY_METH(watch),
      XSPY_METH(read_watch),
-     XSPY_METH(acknowledge_watch),
      XSPY_METH(unwatch),
      XSPY_METH(transaction_start),
      XSPY_METH(transaction_end),
diff -r 0e7c48861e95 -r 1ac39c7a0435 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Mon Oct 10 13:42:38 2005
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Mon Oct 10 13:46:53 2005
@@ -14,29 +14,34 @@
 class xstransact:
 
     def __init__(self, path):
-        self.in_transaction = False
         self.path = path.rstrip("/")
-        xshandle().transaction_start()
+        self.transaction = xshandle().transaction_start()
         self.in_transaction = True
 
     def __del__(self):
         if self.in_transaction:
-            xshandle().transaction_end(True)
+            xshandle().transaction_end(self.transaction, True)
 
     def commit(self):
         if not self.in_transaction:
             raise RuntimeError
         self.in_transaction = False
-        return xshandle().transaction_end(False)
+        rc = xshandle().transaction_end(self.transaction, False)
+        self.transaction = "0"
+        return rc
 
     def abort(self):
+        if not self.in_transaction:
+            return True
         self.in_transaction = False
-        return xshandle().transaction_end(True)
+        rc = xshandle().transaction_end(self.transaction, True)
+        self.transaction = "0"
+        return rc
 
     def _read(self, key):
         path = "%s/%s" % (self.path, key)
         try:
-            return xshandle().read(path)
+            return xshandle().read(self.transaction, path)
         except RuntimeError, ex:
             raise RuntimeError(ex.args[0],
                                '%s, while reading %s' % (ex.args[1], path))
@@ -50,7 +55,7 @@
         instead.
         """
         if len(args) == 0:
-            return xshandle().read(self.path)
+            return xshandle().read(self.transaction, self.path)
         if len(args) == 1:
             return self._read(args[0])
         ret = []
@@ -61,7 +66,7 @@
     def _write(self, key, data):
         path = "%s/%s" % (self.path, key)
         try:
-            xshandle().write(path, data)
+            xshandle().write(self.transaction, path, data)
         except RuntimeError, ex:
             raise RuntimeError(ex.args[0],
                                ('%s, while writing %s : %s' %
@@ -93,7 +98,7 @@
 
     def _remove(self, key):
         path = "%s/%s" % (self.path, key)
-        return xshandle().rm(path)
+        return xshandle().rm(self.transaction, path)
 
     def remove(self, *args):
         """If no arguments are given, remove this transaction's path.
@@ -101,14 +106,14 @@
         path, and remove each of those instead.
         """
         if len(args) == 0:
-            xshandle().rm(self.path)
+            xshandle().rm(self.transaction, self.path)
         else:
             for key in args:
                 self._remove(key)
 
     def _list(self, key):
         path = "%s/%s" % (self.path, key)
-        l = xshandle().ls(path)
+        l = xshandle().ls(self.transaction, path)
         if l:
             return map(lambda x: key + "/" + x, l)
         return []
@@ -120,7 +125,7 @@
         path, and return the cumulative listing of each of those instead.
         """
         if len(args) == 0:
-            ret = xshandle().ls(self.path)
+            ret = xshandle().ls(self.transaction, self.path)
             if ret is None:
                 return []
             else:
@@ -136,11 +141,11 @@
         ret = []
         for key in keys:
             new_subdir = subdir + "/" + key
-            l = xshandle().ls(new_subdir)
+            l = xshandle().ls(self.transaction, new_subdir)
             if l:
                 ret.append([key, self.list_recursive_(new_subdir, l)])
             else:
-                ret.append([key, xshandle().read(new_subdir)])
+                ret.append([key, xshandle().read(self.transaction, 
new_subdir)])
         return ret
 
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/python/xen/xend/xenstore/xswatch.py
--- a/tools/python/xen/xend/xenstore/xswatch.py Mon Oct 10 13:42:38 2005
+++ b/tools/python/xen/xend/xenstore/xswatch.py Mon Oct 10 13:46:53 2005
@@ -8,6 +8,7 @@
 import select
 import threading
 from xen.lowlevel import xs
+from xen.xend.xenstore.xsutil import xshandle
 
 class xswatch:
 
@@ -27,10 +28,7 @@
         if cls.watchThread:
             cls.xslock.release()
             return
-        # XXX: When we fix xenstored to have better watch semantics,
-        # this can change to shared xshandle(). Currently that would result
-        # in duplicate watch firings, thus failed extra xs.acknowledge_watch.
-        cls.xs = xs.open()
+        cls.xs = xshandle()
         cls.watchThread = threading.Thread(name="Watcher",
                                            target=cls.watchMain)
         cls.watchThread.setDaemon(True)
@@ -43,11 +41,10 @@
         while True:
             try:
                 we = cls.xs.read_watch()
-                watch = we[1]
-                cls.xs.acknowledge_watch(watch)
             except RuntimeError, ex:
                 print ex
                 raise
+            watch = we[1]
             watch.fn(*watch.args, **watch.kwargs)
 
     watchMain = classmethod(watchMain)
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile   Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/Makefile   Mon Oct 10 13:46:53 2005
@@ -8,7 +8,7 @@
 INSTALL_DIR     = $(INSTALL) -d -m0755
 
 PROFILE=#-pg
-BASECFLAGS=-Wall -W -g -Werror
+BASECFLAGS=-Wall -g -Werror
 # Make gcc generate dependencies.
 BASECFLAGS += -Wp,-MD,.$(@F).d
 PROG_DEP = .*.d
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/07watch.test
--- a/tools/xenstore/testsuite/07watch.test     Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/07watch.test     Mon Oct 10 13:46:53 2005
@@ -5,7 +5,6 @@
 2 write /test contents2
 expect 1:/test:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Check that reads don't set it off.
@@ -22,15 +21,12 @@
 2 mkdir /dir/newdir
 expect 1:/dir/newdir:token
 1 waitwatch
-1 ackwatch token
 2 setperm /dir/newdir 0 READ
 expect 1:/dir/newdir:token
 1 waitwatch
-1 ackwatch token
 2 rm /dir/newdir
 expect 1:/dir/newdir:token
 1 waitwatch
-1 ackwatch token
 1 close
 2 close
 
@@ -49,7 +45,6 @@
 read /dir/test
 expect /dir/test:token
 waitwatch
-ackwatch token
 close
 
 # watch priority test: all simultaneous
@@ -59,13 +54,10 @@
 write /dir/test contents
 expect 3:/dir/test:token3
 3 waitwatch
-3 ackwatch token3
 expect 2:/dir/test:token2
 2 waitwatch
-2 ackwatch token2
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 2 close
 3 close
@@ -79,7 +71,6 @@
 2 close
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 
 # If one dies (without reading at all), the other should still get ack.
@@ -89,7 +80,6 @@
 2 close
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 2 close
 
@@ -111,7 +101,6 @@
 2 unwatch /dir token2
 expect 1:/dir/test:token1
 1 waitwatch
-1 ackwatch token1
 1 close
 2 close
 
@@ -123,14 +112,12 @@
 write /dir/test contents2
 expect 1:/dir/test:token2
 1 waitwatch
-1 ackwatch token2
 
 # check we only get notified once.
 1 watch /test token
 2 write /test contents2
 expect 1:/test:token
 1 waitwatch
-1 ackwatch token
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
 1 close
@@ -142,13 +129,10 @@
 2 write /test3 contents
 expect 1:/test1:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test2:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test3:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Creation of subpaths should be covered correctly.
@@ -157,10 +141,8 @@
 2 write /test/subnode/subnode contents2
 expect 1:/test/subnode:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test/subnode/subnode:token
 1 waitwatch
-1 ackwatch token
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
 1 close
@@ -171,7 +153,6 @@
 1 watchnoack / token2 0
 expect 1:/test/subnode:token
 1 waitwatch
-1 ackwatch token
 expect 1:/:token2
 1 waitwatch
 expect 1: waitwatch failed: Connection timed out
@@ -183,7 +164,6 @@
 2 rm /test
 expect 1:/test/subnode:token
 1 waitwatch
-1 ackwatch token
 
 # Watch should not double-send after we ack, even if we did something in 
between.
 1 watch /test2 token
@@ -192,6 +172,5 @@
 1 waitwatch
 expect 1:contents2
 1 read /test2/foo
-1 ackwatch token
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/08transaction.test
--- a/tools/xenstore/testsuite/08transaction.test       Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/08transaction.test       Mon Oct 10 13:46:53 2005
@@ -68,7 +68,6 @@
 2 commit
 expect 1:/test/dir/sub:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Rm inside transaction works like rm outside: children get notified.
@@ -78,7 +77,6 @@
 2 commit
 expect 1:/test/dir/sub:token
 1 waitwatch
-1 ackwatch token
 1 close
 
 # Multiple events from single transaction don't trigger assert
@@ -89,8 +87,6 @@
 2 commit
 expect 1:/test/1:token
 1 waitwatch
-1 ackwatch token
 expect 1:/test/2:token
 1 waitwatch
-1 ackwatch token
 1 close
diff -r 0e7c48861e95 -r 1ac39c7a0435 
tools/xenstore/testsuite/10domain-homedir.test
--- a/tools/xenstore/testsuite/10domain-homedir.test    Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/10domain-homedir.test    Mon Oct 10 13:46:53 2005
@@ -16,4 +16,3 @@
 write /home/foo/bar contents
 expect 1:foo/bar:token
 1 waitwatch
-1 ackwatch token
diff -r 0e7c48861e95 -r 1ac39c7a0435 
tools/xenstore/testsuite/11domain-watch.test
--- a/tools/xenstore/testsuite/11domain-watch.test      Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/11domain-watch.test      Mon Oct 10 13:46:53 2005
@@ -10,7 +10,6 @@
 write /test contents2
 expect 1:/test:token
 1 waitwatch
-1 ackwatch token
 1 unwatch /test token
 release 1
 1 close
@@ -25,7 +24,6 @@
 1 write /dir/test4 contents4
 expect 1:/dir/test:token
 1 waitwatch
-1 ackwatch token
 release 1
 1 close
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/12readonly.test
--- a/tools/xenstore/testsuite/12readonly.test  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/12readonly.test  Mon Oct 10 13:46:53 2005
@@ -36,4 +36,3 @@
 1 write /test contents
 expect /test:token
 waitwatch
-ackwatch token
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/testsuite/13watch-ack.test
--- a/tools/xenstore/testsuite/13watch-ack.test Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/testsuite/13watch-ack.test Mon Oct 10 13:46:53 2005
@@ -18,5 +18,4 @@
 1 waitwatch
 3 write /test/1 contents1
 4 write /test/3 contents3
-1 ackwatch token2
 1 close
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstore_client.c
--- a/tools/xenstore/xenstore_client.c  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstore_client.c  Mon Oct 10 13:46:53 2005
@@ -32,6 +32,7 @@
 main(int argc, char **argv)
 {
     struct xs_handle *xsh;
+    struct xs_transaction_handle *xth;
     bool success;
     int ret = 0;
 #if defined(CLIENT_read) || defined(CLIENT_list)
@@ -84,13 +85,13 @@
 #endif
 
   again:
-    success = xs_transaction_start(xsh);
-    if (!success)
+    xth = xs_transaction_start(xsh);
+    if (xth == NULL)
        errx(1, "couldn't start transaction");
 
     while (optind < argc) {
 #if defined(CLIENT_read)
-       char *val = xs_read(xsh, argv[optind], NULL);
+       char *val = xs_read(xsh, xth, argv[optind], NULL);
        if (val == NULL) {
            warnx("couldn't read path %s", argv[optind]);
            ret = 1;
@@ -102,7 +103,7 @@
        free(val);
        optind++;
 #elif defined(CLIENT_write)
-       success = xs_write(xsh, argv[optind], argv[optind + 1],
+       success = xs_write(xsh, xth, argv[optind], argv[optind + 1],
                           strlen(argv[optind + 1]));
        if (!success) {
            warnx("could not write path %s", argv[optind]);
@@ -111,7 +112,7 @@
        }
        optind += 2;
 #elif defined(CLIENT_rm)
-       success = xs_rm(xsh, argv[optind]);
+       success = xs_rm(xsh, xth, argv[optind]);
        if (!success) {
            warnx("could not remove path %s", argv[optind]);
            ret = 1;
@@ -119,7 +120,7 @@
        }
        optind++;
 #elif defined(CLIENT_exists)
-       char *val = xs_read(xsh, argv[optind], NULL);
+       char *val = xs_read(xsh, xth, argv[optind], NULL);
        if (val == NULL) {
            ret = 1;
            goto out;
@@ -128,7 +129,7 @@
        optind++;
 #elif defined(CLIENT_list)
        unsigned int i, num;
-       char **list = xs_directory(xsh, argv[optind], &num);
+       char **list = xs_directory(xsh, xth, argv[optind], &num);
        if (list == NULL) {
            warnx("could not list path %s", argv[optind]);
            ret = 1;
@@ -145,7 +146,7 @@
     }
 
  out:
-    success = xs_transaction_end(xsh, ret ? true : false);
+    success = xs_transaction_end(xsh, xth, ret ? true : false);
     if (!success) {
        if (ret == 0 && errno == EAGAIN)
            goto again;
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_core.c   Mon Oct 10 13:46:53 2005
@@ -154,7 +154,6 @@
        case XS_READ: return "READ";
        case XS_GET_PERMS: return "GET_PERMS";
        case XS_WATCH: return "WATCH";
-       case XS_WATCH_ACK: return "WATCH_ACK";
        case XS_UNWATCH: return "UNWATCH";
        case XS_TRANSACTION_START: return "TRANSACTION_START";
        case XS_TRANSACTION_END: return "TRANSACTION_END";
@@ -236,52 +235,50 @@
        talloc_free(str);
 }
 
-static bool write_message(struct connection *conn)
+static bool write_messages(struct connection *conn)
 {
        int ret;
-       struct buffered_data *out = conn->out;
-
-       if (out->inhdr) {
-               if (verbose)
-                       xprintf("Writing msg %s (%s) out to %p\n",
-                               sockmsg_string(out->hdr.msg.type),
-                               out->buffer, conn);
-               ret = conn->write(conn, out->hdr.raw + out->used,
-                                 sizeof(out->hdr) - out->used);
+       struct buffered_data *out, *tmp;
+
+       list_for_each_entry_safe(out, tmp, &conn->out_list, list) {
+               if (out->inhdr) {
+                       if (verbose)
+                               xprintf("Writing msg %s (%s) out to %p\n",
+                                       sockmsg_string(out->hdr.msg.type),
+                                       out->buffer, conn);
+                       ret = conn->write(conn, out->hdr.raw + out->used,
+                                         sizeof(out->hdr) - out->used);
+                       if (ret < 0)
+                               return false;
+
+                       out->used += ret;
+                       if (out->used < sizeof(out->hdr))
+                               return true;
+
+                       out->inhdr = false;
+                       out->used = 0;
+
+                       /* Second write might block if non-zero. */
+                       if (out->hdr.msg.len && !conn->domain)
+                               return true;
+               }
+
+               ret = conn->write(conn, out->buffer + out->used,
+                                 out->hdr.msg.len - out->used);
+
                if (ret < 0)
                        return false;
 
                out->used += ret;
-               if (out->used < sizeof(out->hdr))
+               if (out->used != out->hdr.msg.len)
                        return true;
 
-               out->inhdr = false;
-               out->used = 0;
-
-               /* Second write might block if non-zero. */
-               if (out->hdr.msg.len && !conn->domain)
-                       return true;
-       }
-
-       ret = conn->write(conn, out->buffer + out->used,
-                         out->hdr.msg.len - out->used);
-
-       if (ret < 0)
-               return false;
-
-       out->used += ret;
-       if (out->used != out->hdr.msg.len)
-               return true;
-
-       trace_io(conn, "OUT", out);
-       conn->out = NULL;
-       talloc_free(out);
-
-       queue_next_event(conn);
-
-       /* No longer busy? */
-       if (!conn->out)
-               conn->state = OK;
+               trace_io(conn, "OUT", out);
+
+               list_del(&out->list);
+               talloc_free(out);
+       }
+
        return true;
 }
 
@@ -298,9 +295,9 @@
                FD_SET(conn->fd, &set);
                none.tv_sec = none.tv_usec = 0;
 
-               while (conn->out
+               while (!list_empty(&conn->out_list)
                       && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
-                       if (!write_message(conn))
+                       if (!write_messages(conn))
                                break;
                close(conn->fd);
        }
@@ -327,9 +324,8 @@
        list_for_each_entry(i, &connections, list) {
                if (i->domain)
                        continue;
-               if (i->state == OK)
-                       FD_SET(i->fd, inset);
-               if (i->out)
+               FD_SET(i->fd, inset);
+               if (!list_empty(&i->out_list))
                        FD_SET(i->fd, outset);
                if (i->fd > max)
                        max = i->fd;
@@ -595,14 +591,7 @@
        bdata->hdr.msg.len = len;
        memcpy(bdata->buffer, data, len);
 
-       /* There might be an event going out now.  Queue behind it. */
-       if (conn->out) {
-               assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
-               assert(!conn->waiting_reply);
-               conn->waiting_reply = bdata;
-       } else
-               conn->out = bdata;
-       conn->state = BUSY;
+       list_add_tail(&bdata->list, &conn->out_list);
 }
 
 /* Some routines (write, mkdir, etc) just need a non-error return */
@@ -1103,10 +1092,6 @@
                do_watch(conn, in);
                break;
 
-       case XS_WATCH_ACK:
-               do_watch_ack(conn, onearg(in));
-               break;
-
        case XS_UNWATCH:
                do_unwatch(conn, in);
                break;
@@ -1153,8 +1138,6 @@
        enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type;
        jmp_buf talloc_fail;
 
-       assert(conn->state == OK);
-
        /* For simplicity, we kill the connection on OOM. */
        talloc_set_fail_handler(out_of_mem, &talloc_fail);
        if (setjmp(talloc_fail)) {
@@ -1168,11 +1151,6 @@
                xprintf("Got message %s len %i from %p\n",
                        sockmsg_string(type), conn->in->hdr.msg.len, conn);
 
-       /* We might get a command while waiting for an ack: this means
-        * the other end discarded it: we will re-transmit. */
-       if (type != XS_WATCH_ACK)
-               conn->waiting_for_ack = NULL;
-
        /* Careful: process_message may free connection.  We detach
         * "in" beforehand and allocate the new buffer to avoid
         * touching conn after process_message.
@@ -1196,10 +1174,7 @@
 static void handle_input(struct connection *conn)
 {
        int bytes;
-       struct buffered_data *in;
-
-       assert(conn->state == OK);
-       in = conn->in;
+       struct buffered_data *in = conn->in;
 
        /* Not finished header yet? */
        if (in->inhdr) {
@@ -1247,7 +1222,7 @@
 
 static void handle_output(struct connection *conn)
 {
-       if (!write_message(conn))
+       if (!write_messages(conn))
                talloc_free(conn);
 }
 
@@ -1264,9 +1239,6 @@
        if (!new)
                return NULL;
 
-       new->state = OK;
-       new->out = new->waiting_reply = NULL;
-       new->waiting_for_ack = NULL;
        new->fd = -1;
        new->id = 0;
        new->domain = NULL;
@@ -1274,6 +1246,7 @@
        new->write = write;
        new->read = read;
        new->can_write = true;
+       INIT_LIST_HEAD(&new->out_list);
        INIT_LIST_HEAD(&new->watches);
 
        talloc_set_fail_handler(out_of_mem, &talloc_fail);
@@ -1328,23 +1301,17 @@
        list_for_each_entry(i, &connections, list) {
                printf("Connection %p:\n", i);
                printf("    state = %s\n",
-                      i->state == OK ? "OK"
-                      : i->state == BUSY ? "BUSY"
-                      : "INVALID");
+                      list_empty(&i->out_list) ? "OK" : "BUSY");
                if (i->id)
                        printf("    id = %i\n", i->id);
                if (!i->in->inhdr || i->in->used)
                        printf("    got %i bytes of %s\n",
                               i->in->used, i->in->inhdr ? "header" : "data");
+#if 0
                if (i->out)
                        printf("    sending message %s (%s) out\n",
                               sockmsg_string(i->out->hdr.msg.type),
                               i->out->buffer);
-               if (i->waiting_reply)
-                       printf("    ... and behind is queued %s (%s)\n",
-                              sockmsg_string(i->waiting_reply->hdr.msg.type),
-                              i->waiting_reply->buffer);
-#if 0
                if (i->transaction)
                        dump_transaction(i);
                if (i->domain)
@@ -1615,3 +1582,13 @@
                max = initialize_set(&inset, &outset, *sock, *ro_sock);
        }
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_core.h
--- a/tools/xenstore/xenstored_core.h   Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_core.h   Mon Oct 10 13:46:53 2005
@@ -31,14 +31,19 @@
 
 struct buffered_data
 {
+       struct list_head list;
+
        /* Are we still doing the header? */
        bool inhdr;
+
        /* How far are we? */
        unsigned int used;
+
        union {
                struct xsd_sockmsg msg;
                char raw[sizeof(struct xsd_sockmsg)];
        } hdr;
+
        /* The actual data. */
        char *buffer;
 };
@@ -47,14 +52,6 @@
 typedef int connwritefn_t(struct connection *, const void *, unsigned int);
 typedef int connreadfn_t(struct connection *, void *, unsigned int);
 
-enum state
-{
-       /* Doing action, not listening */
-       BUSY,
-       /* Completed */
-       OK,
-};
-
 struct connection
 {
        struct list_head list;
@@ -62,26 +59,17 @@
        /* The file descriptor we came in on. */
        int fd;
 
-       /* Who am I?  0 for socket connections. */
+       /* Who am I? 0 for socket connections. */
        domid_t id;
-
-       /* Blocked on transaction?  Busy? */
-       enum state state;
 
        /* Is this a read-only connection? */
        bool can_write;
-
-       /* Are we waiting for a watch event ack? */
-       struct watch *waiting_for_ack;
 
        /* Buffered incoming data. */
        struct buffered_data *in;
 
        /* Buffered output data */
-       struct buffered_data *out;
-
-       /* If we had a watch fire outgoing when we needed to reply... */
-       struct buffered_data *waiting_reply;
+       struct list_head out_list;
 
        /* My transaction, if any. */
        struct transaction *transaction;
@@ -175,3 +163,13 @@
 extern int event_fd;
 
 #endif /* _XENSTORED_CORE_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_domain.c Mon Oct 10 13:46:53 2005
@@ -276,12 +276,13 @@
 
 bool domain_can_read(struct connection *conn)
 {
-       return conn->state == OK && buffer_has_input(conn->domain->input);
+       return buffer_has_input(conn->domain->input);
 }
 
 bool domain_can_write(struct connection *conn)
 {
-       return conn->out && buffer_has_output_room(conn->domain->output);
+       return (!list_empty(&conn->out_list) &&
+                buffer_has_output_room(conn->domain->output));
 }
 
 static struct domain *new_domain(void *context, domid_t domid,
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_transaction.c
--- a/tools/xenstore/xenstored_transaction.c    Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_transaction.c    Mon Oct 10 13:46:53 2005
@@ -154,9 +154,9 @@
                return;
        }
 
-       /* Set to NULL so fire_watches sends events, tdb_context works. */
        trans = conn->transaction;
        conn->transaction = NULL;
+
        /* Attach transaction to arg for auto-cleanup */
        talloc_steal(arg, trans);
 
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_watch.c  Mon Oct 10 13:46:53 2005
@@ -32,17 +32,6 @@
 #include "xenstored_test.h"
 #include "xenstored_domain.h"
 
-/* FIXME: time out unacked watches. */
-struct watch_event
-{
-       /* The events on this watch. */
-       struct list_head list;
-
-       /* Data to send (node\0token\0). */
-       unsigned int len;
-       char *data;
-};
-
 struct watch
 {
        /* Watches on this connection */
@@ -58,54 +47,17 @@
        char *node;
 };
 
-/* Look through our watches: if any of them have an event, queue it. */
-void queue_next_event(struct connection *conn)
-{
-       struct watch_event *event;
-       struct watch *watch;
-
-       /* We had a reply queued already?  Send it: other end will
-        * discard watch. */
-       if (conn->waiting_reply) {
-               conn->out = conn->waiting_reply;
-               conn->waiting_reply = NULL;
-               conn->waiting_for_ack = NULL;
-               return;
-       }
-
-       /* If we're already waiting for ack, don't queue more. */
-       if (conn->waiting_for_ack)
-               return;
-
-       list_for_each_entry(watch, &conn->watches, list) {
-               event = list_top(&watch->events, struct watch_event, list);
-               if (event) {
-                       conn->waiting_for_ack = watch;
-                       send_reply(conn,XS_WATCH_EVENT,event->data,event->len);
-                       break;
-               }
-       }
-}
-
-static int destroy_watch_event(void *_event)
-{
-       struct watch_event *event = _event;
-
-       trace_destroy(event, "watch_event");
-       return 0;
-}
-
 static void add_event(struct connection *conn,
                      struct watch *watch,
                      const char *name)
 {
-       struct watch_event *event;
+       /* Data to send (node\0token\0). */
+       unsigned int len;
+       char *data;
 
        if (!check_event_node(name)) {
                /* Can this conn load node, or see that it doesn't exist? */
-               struct node *node;
-
-               node = get_node(conn, name, XS_PERM_READ);
+               struct node *node = get_node(conn, name, XS_PERM_READ);
                if (!node && errno != ENOENT)
                        return;
        }
@@ -116,14 +68,12 @@
                        name++;
        }
 
-       event = talloc(watch, struct watch_event);
-       event->len = strlen(name) + 1 + strlen(watch->token) + 1;
-       event->data = talloc_array(event, char, event->len);
-       strcpy(event->data, name);
-       strcpy(event->data + strlen(name) + 1, watch->token);
-       talloc_set_destructor(event, destroy_watch_event);
-       list_add_tail(&event->list, &watch->events);
-       trace_create(event, "watch_event");
+       len = strlen(name) + 1 + strlen(watch->token) + 1;
+       data = talloc_array(watch, char, len);
+       strcpy(data, name);
+       strcpy(data + strlen(name) + 1, watch->token);
+        send_reply(conn, XS_WATCH_EVENT, data, len);
+       talloc_free(data);
 }
 
 /* FIXME: we fail to fire on out of memory.  Should drop connections. */
@@ -143,11 +93,6 @@
                                add_event(i, watch, name);
                        else if (recurse && is_child(watch->node, name))
                                add_event(i, watch, watch->node);
-                       else
-                               continue;
-                       /* If connection not doing anything, queue this. */
-                       if (i->state == OK)
-                               queue_next_event(i);
                }
        }
 }
@@ -181,6 +126,15 @@
                }
        }
 
+       /* Check for duplicates. */
+       list_for_each_entry(watch, &conn->watches, list) {
+               if (streq(watch->node, vec[0]) &&
+                    streq(watch->token, vec[1])) {
+                       send_error(conn, EEXIST);
+                       return;
+               }
+       }
+
        watch = talloc(conn, struct watch);
        watch->node = talloc_strdup(watch, vec[0]);
        watch->token = talloc_strdup(watch, vec[1]);
@@ -200,37 +154,6 @@
        add_event(conn, watch, watch->node);
 }
 
-void do_watch_ack(struct connection *conn, const char *token)
-{
-       struct watch_event *event;
-
-       if (!token) {
-               send_error(conn, EINVAL);
-               return;
-       }
-
-       if (!conn->waiting_for_ack) {
-               send_error(conn, ENOENT);
-               return;
-       }
-
-       if (!streq(conn->waiting_for_ack->token, token)) {
-               /* They're confused: this will cause us to send event again */
-               conn->waiting_for_ack = NULL;
-               send_error(conn, EINVAL);
-               return;
-       }
-
-       /* Remove event: after ack sent, core will call queue_next_event */
-       event = list_top(&conn->waiting_for_ack->events, struct watch_event,
-                        list);
-       list_del(&event->list);
-       talloc_free(event);
-
-       conn->waiting_for_ack = NULL;
-       send_ack(conn, XS_WATCH_ACK);
-}
-
 void do_unwatch(struct connection *conn, struct buffered_data *in)
 {
        struct watch *watch;
@@ -241,9 +164,6 @@
                return;
        }
 
-       /* We don't need to worry if we're waiting for an ack for the
-        * watch we're deleting: conn->waiting_for_ack was reset by
-        * this command in consider_message anyway. */
        node = canonicalize(conn, vec[0]);
        list_for_each_entry(watch, &conn->watches, list) {
                if (streq(watch->node, node) && streq(watch->token, vec[1])) {
@@ -260,18 +180,19 @@
 void dump_watches(struct connection *conn)
 {
        struct watch *watch;
-       struct watch_event *event;
 
-       if (conn->waiting_for_ack)
-               printf("    waiting_for_ack for watch on %s token %s\n",
-                      conn->waiting_for_ack->node,
-                      conn->waiting_for_ack->token);
-
-       list_for_each_entry(watch, &conn->watches, list) {
+       list_for_each_entry(watch, &conn->watches, list)
                printf("    watch on %s token %s\n",
                       watch->node, watch->token);
-               list_for_each_entry(event, &watch->events, list)
-                       printf("        event: %s\n", event->data);
-       }
 }
 #endif
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xenstored_watch.h
--- a/tools/xenstore/xenstored_watch.h  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xenstored_watch.h  Mon Oct 10 13:46:53 2005
@@ -23,17 +23,9 @@
 #include "xenstored_core.h"
 
 void do_watch(struct connection *conn, struct buffered_data *in);
-void do_watch_ack(struct connection *conn, const char *token);
 void do_unwatch(struct connection *conn, struct buffered_data *in);
 
-/* Is this a watch event message for this connection? */
-bool is_watch_event(struct connection *conn, struct buffered_data *out);
-
-/* Look through our watches: if any of them have an event, queue it. */
-void queue_next_event(struct connection *conn);
-
-/* Fire all watches: recurse means all the children are affected (ie. rm).
- */
+/* Fire all watches: recurse means all the children are affected (ie. rm). */
 void fire_watches(struct connection *conn, const char *name, bool recurse);
 
 void dump_watches(struct connection *conn);
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c       Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xs.c       Mon Oct 10 13:46:53 2005
@@ -78,9 +78,33 @@
 
        /* One transaction at a time. */
        pthread_mutex_t transaction_mutex;
+       pthread_t transaction_pthread;
 };
 
+struct xs_transaction_handle {
+       int id;
+};
+
 static void *read_thread(void *arg);
+
+static void request_mutex_acquire(struct xs_handle *h)
+{
+       /*
+        * We can't distinguish non-transactional from transactional
+        * requests right now. So temporarily acquire the transaction mutex
+        * if this task is outside transaction context.
+        */
+       if (h->transaction_pthread != pthread_self())
+               pthread_mutex_lock(&h->transaction_mutex);
+       pthread_mutex_lock(&h->request_mutex);
+}
+
+static void request_mutex_release(struct xs_handle *h)
+{
+       pthread_mutex_unlock(&h->request_mutex);
+       if (h->transaction_pthread != pthread_self())
+               pthread_mutex_unlock(&h->transaction_mutex);
+}
 
 int xs_fileno(struct xs_handle *h)
 {
@@ -163,6 +187,7 @@
 
        pthread_mutex_init(&h->request_mutex, NULL);
        pthread_mutex_init(&h->transaction_mutex, NULL);
+       h->transaction_pthread = -1;
 
        if (pthread_create(&h->read_thr, NULL, read_thread, h) != 0)
                goto error;
@@ -316,7 +341,7 @@
        ignorepipe.sa_flags = 0;
        sigaction(SIGPIPE, &ignorepipe, &oldact);
 
-       pthread_mutex_lock(&h->request_mutex);
+       request_mutex_acquire(h);
 
        if (!xs_write_all(h->fd, &msg, sizeof(msg)))
                goto fail;
@@ -329,7 +354,7 @@
        if (!ret)
                goto fail;
 
-       pthread_mutex_unlock(&h->request_mutex);
+       request_mutex_release(h);
 
        sigaction(SIGPIPE, &oldact, NULL);
        if (msg.type == XS_ERROR) {
@@ -350,7 +375,7 @@
 fail:
        /* We're in a bad state, so close fd. */
        saved_errno = errno;
-       pthread_mutex_unlock(&h->request_mutex);
+       request_mutex_release(h);
        sigaction(SIGPIPE, &oldact, NULL);
 close_fd:
        close(h->fd);
@@ -386,7 +411,8 @@
        return true;
 }
 
-char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num)
+char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
+                   const char *path, unsigned int *num)
 {
        char *strings, *p, **ret;
        unsigned int len;
@@ -417,7 +443,8 @@
  * Returns a malloced value: call free() on it after use.
  * len indicates length in bytes, not including the nul.
  */
-void *xs_read(struct xs_handle *h, const char *path, unsigned int *len)
+void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, unsigned int *len)
 {
        return xs_single(h, XS_READ, path, len);
 }
@@ -425,8 +452,8 @@
 /* Write the value of a single file.
  * Returns false on failure.
  */
-bool xs_write(struct xs_handle *h, const char *path,
-             const void *data, unsigned int len)
+bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, const void *data, unsigned int len)
 {
        struct iovec iovec[2];
 
@@ -441,7 +468,8 @@
 /* Create a new directory.
  * Returns false on failure, or success if it already exists.
  */
-bool xs_mkdir(struct xs_handle *h, const char *path)
+bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path)
 {
        return xs_bool(xs_single(h, XS_MKDIR, path, NULL));
 }
@@ -449,7 +477,8 @@
 /* Destroy a file or directory (directories must be empty).
  * Returns false on failure, or success if it doesn't exist.
  */
-bool xs_rm(struct xs_handle *h, const char *path)
+bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
+          const char *path)
 {
        return xs_bool(xs_single(h, XS_RM, path, NULL));
 }
@@ -458,6 +487,7 @@
  * Returns malloced array, or NULL: call free() after use.
  */
 struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+                                         struct xs_transaction_handle *t,
                                          const char *path, unsigned int *num)
 {
        char *strings;
@@ -490,7 +520,9 @@
 /* Set permissions of node (must be owner).
  * Returns false on failure.
  */
-bool xs_set_permissions(struct xs_handle *h, const char *path,
+bool xs_set_permissions(struct xs_handle *h,
+                       struct xs_transaction_handle *t,
+                       const char *path,
                        struct xs_permissions *perms,
                        unsigned int num_perms)
 {
@@ -593,15 +625,6 @@
        return ret;
 }
 
-/* Acknowledge watch on node.  Watches must be acknowledged before
- * any other watches can be read.
- * Returns false on failure.
- */
-bool xs_acknowledge_watch(struct xs_handle *h, const char *token)
-{
-       return xs_bool(xs_single(h, XS_WATCH_ACK, token, NULL));
-}
-
 /* Remove a watch on a node.
  * Returns false on failure (no watch on that node).
  */
@@ -620,12 +643,22 @@
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
  * You can only have one transaction at any time.
- * Returns false on failure.
- */
-bool xs_transaction_start(struct xs_handle *h)
-{
+ * Returns NULL on failure.
+ */
+struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h)
+{
+       bool rc;
+
        pthread_mutex_lock(&h->transaction_mutex);
-       return xs_bool(xs_single(h, XS_TRANSACTION_START, "", NULL));
+       h->transaction_pthread = pthread_self();
+
+       rc = xs_bool(xs_single(h, XS_TRANSACTION_START, "", NULL));
+       if (!rc) {
+               h->transaction_pthread = -1;
+               pthread_mutex_unlock(&h->transaction_mutex);
+       }
+
+       return (struct xs_transaction_handle *)rc;
 }
 
 /* End a transaction.
@@ -633,10 +666,14 @@
  * Returns false on failure, which indicates an error: transactions will
  * not fail spuriously.
  */
-bool xs_transaction_end(struct xs_handle *h, bool abort)
+bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
+                       bool abort)
 {
        char abortstr[2];
        bool rc;
+
+       if (t == NULL)
+               return -EINVAL;
 
        if (abort)
                strcpy(abortstr, "F");
@@ -645,6 +682,7 @@
        
        rc = xs_bool(xs_single(h, XS_TRANSACTION_END, abortstr, NULL));
 
+       h->transaction_pthread = -1;
        pthread_mutex_unlock(&h->transaction_mutex);
 
        return rc;
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h       Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xs.h       Mon Oct 10 13:46:53 2005
@@ -23,6 +23,7 @@
 #include <xs_lib.h>
 
 struct xs_handle;
+struct xs_transaction_handle;
 
 /* On failure, these routines set errno. */
 
@@ -44,41 +45,47 @@
  * Returns a malloced array: call free() on it after use.
  * Num indicates size.
  */
-char **xs_directory(struct xs_handle *h, const char *path, unsigned int *num);
+char **xs_directory(struct xs_handle *h, struct xs_transaction_handle *t,
+                   const char *path, unsigned int *num);
 
 /* Get the value of a single file, nul terminated.
  * Returns a malloced value: call free() on it after use.
  * len indicates length in bytes, not including terminator.
  */
-void *xs_read(struct xs_handle *h, const char *path, unsigned int *len);
+void *xs_read(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, unsigned int *len);
 
 /* Write the value of a single file.
  * Returns false on failure.
  */
-bool xs_write(struct xs_handle *h, const char *path, const void *data,
-             unsigned int len);
+bool xs_write(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path, const void *data, unsigned int len);
 
 /* Create a new directory.
  * Returns false on failure, or success if it already exists.
  */
-bool xs_mkdir(struct xs_handle *h, const char *path);
+bool xs_mkdir(struct xs_handle *h, struct xs_transaction_handle *t,
+             const char *path);
 
 /* Destroy a file or directory (and children).
  * Returns false on failure, or success if it doesn't exist.
  */
-bool xs_rm(struct xs_handle *h, const char *path);
+bool xs_rm(struct xs_handle *h, struct xs_transaction_handle *t,
+          const char *path);
 
 /* Get permissions of node (first element is owner, first perms is "other").
  * Returns malloced array, or NULL: call free() after use.
  */
 struct xs_permissions *xs_get_permissions(struct xs_handle *h,
+                                         struct xs_transaction_handle *t,
                                          const char *path, unsigned int *num);
 
 /* Set permissions of node (must be owner).
  * Returns false on failure.
  */
-bool xs_set_permissions(struct xs_handle *h, const char *path,
-                       struct xs_permissions *perms, unsigned int num_perms);
+bool xs_set_permissions(struct xs_handle *h, struct xs_transaction_handle *t,
+                       const char *path, struct xs_permissions *perms,
+                       unsigned int num_perms);
 
 /* Watch a node for changes (poll on fd to detect, or call read_watch()).
  * When the node (or any child) changes, fd will become readable.
@@ -96,12 +103,6 @@
  */
 char **xs_read_watch(struct xs_handle *h, unsigned int *num);
 
-/* Acknowledge watch on node.  Watches must be acknowledged before
- * any other watches can be read.
- * Returns false on failure.
- */
-bool xs_acknowledge_watch(struct xs_handle *h, const char *token);
-
 /* Remove a watch on a node: implicitly acks any outstanding watch.
  * Returns false on failure (no watch on that node).
  */
@@ -110,16 +111,17 @@
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
  * You can only have one transaction at any time.
- * Returns false on failure.
+ * Returns NULL on failure.
  */
-bool xs_transaction_start(struct xs_handle *h);
+struct xs_transaction_handle *xs_transaction_start(struct xs_handle *h);
 
 /* End a transaction.
  * If abandon is true, transaction is discarded instead of committed.
  * Returns false on failure: if errno == EAGAIN, you have to restart
  * transaction.
  */
-bool xs_transaction_end(struct xs_handle *h, bool abort);
+bool xs_transaction_end(struct xs_handle *h, struct xs_transaction_handle *t,
+                       bool abort);
 
 /* Introduce a new domain.
  * This tells the store daemon about a shared memory page, event channel
@@ -142,3 +144,13 @@
                       void *data, unsigned int len);
 
 #endif /* _XS_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 tools/xenstore/xs_test.c
--- a/tools/xenstore/xs_test.c  Mon Oct 10 13:42:38 2005
+++ b/tools/xenstore/xs_test.c  Mon Oct 10 13:46:53 2005
@@ -42,6 +42,7 @@
 #define XSTEST
 
 static struct xs_handle *handles[10] = { NULL };
+static struct xs_transaction_handle *txh[10] = { NULL };
 
 static unsigned int timeout_ms = 500;
 static bool timeout_suppressed = true;
@@ -201,7 +202,6 @@
             "  watch <path> <token>\n"
             "  watchnoack <path> <token>\n"
             "  waitwatch\n"
-            "  ackwatch <token>\n"
             "  unwatch <path> <token>\n"
             "  close\n"
             "  start <node>\n"
@@ -313,7 +313,7 @@
        char **entries;
        unsigned int i, num;
 
-       entries = xs_directory(handles[handle], path, &num);
+       entries = xs_directory(handles[handle], txh[handle], path, &num);
        if (!entries) {
                failed(handle);
                return;
@@ -332,7 +332,7 @@
        char *value;
        unsigned int len;
 
-       value = xs_read(handles[handle], path, &len);
+       value = xs_read(handles[handle], txh[handle], path, &len);
        if (!value) {
                failed(handle);
                return;
@@ -348,7 +348,7 @@
 
 static void do_write(unsigned int handle, char *path, char *data)
 {
-       if (!xs_write(handles[handle], path, data, strlen(data)))
+       if (!xs_write(handles[handle], txh[handle], path, data, strlen(data)))
                failed(handle);
 }
 
@@ -361,13 +361,13 @@
 
 static void do_mkdir(unsigned int handle, char *path)
 {
-       if (!xs_mkdir(handles[handle], path))
+       if (!xs_mkdir(handles[handle], txh[handle], path))
                failed(handle);
 }
 
 static void do_rm(unsigned int handle, char *path)
 {
-       if (!xs_rm(handles[handle], path))
+       if (!xs_rm(handles[handle], txh[handle], path))
                failed(handle);
 }
 
@@ -376,7 +376,7 @@
        unsigned int i, num;
        struct xs_permissions *perms;
 
-       perms = xs_get_permissions(handles[handle], path, &num);
+       perms = xs_get_permissions(handles[handle], txh[handle], path, &num);
        if (!perms) {
                failed(handle);
                return;
@@ -437,7 +437,7 @@
                        barf("bad flags %s\n", arg);
        }
 
-       if (!xs_set_permissions(handles[handle], path, perms, i))
+       if (!xs_set_permissions(handles[handle], txh[handle], path, perms, i))
                failed(handle);
 }
 
@@ -454,8 +454,6 @@
                if (!vec ||
                    !streq(vec[XS_WATCH_PATH], node) ||
                    !streq(vec[XS_WATCH_TOKEN], token))
-                       failed(handle);
-               if (!xs_acknowledge_watch(handles[handle], token))
                        failed(handle);
        }
 }
@@ -515,12 +513,6 @@
        free(vec);
 }
 
-static void do_ackwatch(unsigned int handle, const char *token)
-{
-       if (!xs_acknowledge_watch(handles[handle], token))
-               failed(handle);
-}
-
 static void do_unwatch(unsigned int handle, const char *node, const char 
*token)
 {
        if (!xs_unwatch(handles[handle], node, token))
@@ -529,14 +521,16 @@
 
 static void do_start(unsigned int handle)
 {
-       if (!xs_transaction_start(handles[handle]))
+       txh[handle] = xs_transaction_start(handles[handle]);
+       if (txh[handle] == NULL)
                failed(handle);
 }
 
 static void do_end(unsigned int handle, bool abort)
 {
-       if (!xs_transaction_end(handles[handle], abort))
-               failed(handle);
+       if (!xs_transaction_end(handles[handle], txh[handle], abort))
+               failed(handle);
+       txh[handle] = NULL;
 }
 
 static void do_introduce(unsigned int handle,
@@ -626,7 +620,8 @@
 
                sprintf(subnode, "%s/%s", node, dir[i]);
 
-               perms = xs_get_permissions(handles[handle], subnode,&numperms);
+               perms = xs_get_permissions(handles[handle], txh[handle],
+                                          subnode,&numperms);
                if (!perms) {
                        failed(handle);
                        return;
@@ -643,7 +638,8 @@
                output("\n");
 
                /* Even directories can have contents. */
-               contents = xs_read(handles[handle], subnode, &len);
+               contents = xs_read(handles[handle], txh[handle], 
+                                  subnode, &len);
                if (!contents) {
                        if (errno != EISDIR)
                                failed(handle);
@@ -653,7 +649,8 @@
                }                       
 
                /* Every node is a directory. */
-               subdirs = xs_directory(handles[handle], subnode, &subnum);
+               subdirs = xs_directory(handles[handle], txh[handle], 
+                                      subnode, &subnum);
                if (!subdirs) {
                        failed(handle);
                        return;
@@ -668,7 +665,7 @@
        char **subdirs;
        unsigned int subnum;
 
-       subdirs = xs_directory(handles[handle], "/", &subnum);
+       subdirs = xs_directory(handles[handle], txh[handle], "/", &subnum);
        if (!subdirs) {
                failed(handle);
                return;
@@ -746,13 +743,12 @@
                do_watch(handle, arg(line, 1), arg(line, 2), false);
        else if (streq(command, "waitwatch"))
                do_waitwatch(handle);
-       else if (streq(command, "ackwatch"))
-               do_ackwatch(handle, arg(line, 1));
        else if (streq(command, "unwatch"))
                do_unwatch(handle, arg(line, 1), arg(line, 2));
        else if (streq(command, "close")) {
                xs_daemon_close(handles[handle]);
                handles[handle] = NULL;
+               txh[handle] = NULL;
        } else if (streq(command, "start"))
                do_start(handle);
        else if (streq(command, "commit"))
@@ -836,3 +832,13 @@
 
        return 0;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 0e7c48861e95 -r 1ac39c7a0435 xen/include/public/io/xs_wire.h
--- a/xen/include/public/io/xs_wire.h   Mon Oct 10 13:42:38 2005
+++ b/xen/include/public/io/xs_wire.h   Mon Oct 10 13:46:53 2005
@@ -30,25 +30,23 @@
 
 enum xsd_sockmsg_type
 {
-       XS_DEBUG,
-       XS_DIRECTORY,
-       XS_READ,
-       XS_GET_PERMS,
-       XS_WATCH,
-       XS_WATCH_ACK,
-       XS_UNWATCH,
-       XS_TRANSACTION_START,
-       XS_TRANSACTION_END,
-       XS_OP_READ_ONLY = XS_TRANSACTION_END,
-       XS_INTRODUCE,
-       XS_RELEASE,
-       XS_GET_DOMAIN_PATH,
-       XS_WRITE,
-       XS_MKDIR,
-       XS_RM,
-       XS_SET_PERMS,
-       XS_WATCH_EVENT,
-       XS_ERROR,
+    XS_DEBUG,
+    XS_DIRECTORY,
+    XS_READ,
+    XS_GET_PERMS,
+    XS_WATCH,
+    XS_UNWATCH,
+    XS_TRANSACTION_START,
+    XS_TRANSACTION_END,
+    XS_INTRODUCE,
+    XS_RELEASE,
+    XS_GET_DOMAIN_PATH,
+    XS_WRITE,
+    XS_MKDIR,
+    XS_RM,
+    XS_SET_PERMS,
+    XS_WATCH_EVENT,
+    XS_ERROR,
 };
 
 #define XS_WRITE_NONE "NONE"
@@ -58,38 +56,40 @@
 /* We hand errors as strings, for portability. */
 struct xsd_errors
 {
-       int errnum;
-       const char *errstring;
+    int errnum;
+    const char *errstring;
 };
 #define XSD_ERROR(x) { x, #x }
 static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
-       XSD_ERROR(EINVAL),
-       XSD_ERROR(EACCES),
-       XSD_ERROR(EEXIST),
-       XSD_ERROR(EISDIR),
-       XSD_ERROR(ENOENT),
-       XSD_ERROR(ENOMEM),
-       XSD_ERROR(ENOSPC),
-       XSD_ERROR(EIO),
-       XSD_ERROR(ENOTEMPTY),
-       XSD_ERROR(ENOSYS),
-       XSD_ERROR(EROFS),
-       XSD_ERROR(EBUSY),
-       XSD_ERROR(EAGAIN),
-       XSD_ERROR(EISCONN),
+    XSD_ERROR(EINVAL),
+    XSD_ERROR(EACCES),
+    XSD_ERROR(EEXIST),
+    XSD_ERROR(EISDIR),
+    XSD_ERROR(ENOENT),
+    XSD_ERROR(ENOMEM),
+    XSD_ERROR(ENOSPC),
+    XSD_ERROR(EIO),
+    XSD_ERROR(ENOTEMPTY),
+    XSD_ERROR(ENOSYS),
+    XSD_ERROR(EROFS),
+    XSD_ERROR(EBUSY),
+    XSD_ERROR(EAGAIN),
+    XSD_ERROR(EISCONN),
 };
 struct xsd_sockmsg
 {
-       u32 type;
-       u32 len;                /* Length of data following this. */
+    u32 type;  /* XS_??? */
+    u32 req_id;/* Request identifier, echoed in daemon's response.  */
+    u32 tx_id; /* Transaction id (0 if not related to a transaction). */
+    u32 len;   /* Length of data following this. */
 
-       /* Generally followed by nul-terminated string(s). */
+    /* Generally followed by nul-terminated string(s). */
 };
 
 enum xs_watch_type
 {
-       XS_WATCH_PATH = 0,
-       XS_WATCH_TOKEN,
+    XS_WATCH_PATH = 0,
+    XS_WATCH_TOKEN,
 };
 
 #endif /* _XS_WIRE_H */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>