WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merged.

# HG changeset patch
# User emellor@xxxxxxxxxxxxxxxxxxxxxx
# Node ID c9772105fead52abf64213aa1eda6419871acf94
# Parent  76bff6c996b0250739229181e40bc9c349f80c15
# Parent  a08aef9f1c8e6f0350e89ee34ad6d3ba54027958
Merged.

diff -r 76bff6c996b0 -r c9772105fead buildconfigs/Rules.mk
--- a/buildconfigs/Rules.mk     Thu Dec  8 15:04:31 2005
+++ b/buildconfigs/Rules.mk     Thu Dec  8 15:04:41 2005
@@ -21,6 +21,7 @@
 
 # Expand Linux series to Linux version
 LINUX_SERIES   ?= 2.6
+LINUX_VER      ?= $(shell grep "^LINUX_VER" buildconfigs/mk.linux-2.6-xen | 
sed -e 's/.*=[ ]*//')
 
 # Setup Linux search path
 LINUX_SRC_PATH ?= .:..
@@ -109,6 +110,13 @@
 %-config:
        $(MAKE) -f buildconfigs/mk.$* config
 
+linux-2.6-xen.patch: ref-linux-$(LINUX_VER)/.valid-ref
+       rm -rf tmp-$@
+       cp -al $(<D) tmp-$@
+       ( cd linux-2.6-xen-sparse && ./mkbuildtree ../tmp-$@ )  
+       diff -Nurp $(<D) tmp-$@ > $@ || true
+       rm -rf tmp-$@
+
 %-xen.patch: ref-%/.valid-ref
        rm -rf tmp-$@
        cp -al $(<D) tmp-$@
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/events.c
--- a/extras/mini-os/events.c   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/events.c   Thu Dec  8 15:04:41 2005
@@ -56,7 +56,7 @@
 
 }
 
-void bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) )
+int bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) )
 {
        if(ev_actions[port].handler)
         printk("WARN: Handler for port %d already registered, replacing\n",
@@ -67,6 +67,16 @@
  
        /* Finally unmask the port */
        unmask_evtchn(port);
+
+       return port;
+}
+
+void unbind_evtchn( u32 port )
+{
+       if (!ev_actions[port].handler)
+               printk("WARN: No handler for port %d when unbinding\n", port);
+       ev_actions[port].handler = NULL;
+       ev_actions[port].status |= EVS_DISABLED;
 }
 
 int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) )
@@ -90,6 +100,10 @@
        return ret;
 }
 
+void unbind_virq( u32 port )
+{
+       unbind_evtchn(port);
+}
 
 
 /*
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/events.h
--- a/extras/mini-os/include/events.h   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/events.h   Thu Dec  8 15:04:41 2005
@@ -40,10 +40,12 @@
 /* prototypes */
 int do_event(u32 port, struct pt_regs *regs);
 int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) );
-void bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) );
+int bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) );
+void unbind_evtchn( u32 port );
 void init_events(void);
+void unbind_virq( u32 port );
 
-static inline int notify_via_evtchn(int port)
+static inline int notify_remote_via_evtchn(int port)
 {
     evtchn_op_t op;
     op.cmd = EVTCHNOP_send;
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/os.h
--- a/extras/mini-os/include/os.h       Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/os.h       Thu Dec  8 15:04:41 2005
@@ -131,9 +131,11 @@
 #if defined(__i386__)
 #define mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
 #define rmb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define wmb()  __asm__ __volatile__ ("": : :"memory")
 #elif defined(__x86_64__)
 #define mb()    __asm__ __volatile__ ("mfence":::"memory")
 #define rmb()   __asm__ __volatile__ ("lfence":::"memory")
+#define wmb()  __asm__ __volatile__ ("sfence" ::: "memory") /* From 
CONFIG_UNORDERED_IO (linux) */
 #endif
 
 
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/semaphore.h
--- a/extras/mini-os/include/semaphore.h        Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/semaphore.h        Thu Dec  8 15:04:41 2005
@@ -2,6 +2,7 @@
 #define _SEMAPHORE_H_
 
 #include <wait.h>
+#include <spinlock.h>
 
 /*
  * Implementation of semaphore in Mini-os is simple, because 
@@ -14,6 +15,15 @@
        struct wait_queue_head wait;
 };
 
+/*
+ * the semaphore definition
+ */
+struct rw_semaphore {
+       signed long             count;
+       spinlock_t              wait_lock;
+       struct list_head        wait_list;
+       int                     debug;
+};
 
 #define __SEMAPHORE_INITIALIZER(name, n)                            \
 {                                                                   \
@@ -31,6 +41,12 @@
 
 #define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
 
+static inline void init_MUTEX(struct semaphore *sem)
+{
+  sem->count = 1;
+  init_waitqueue_head(&sem->wait);
+}
+
 static void inline down(struct semaphore *sem)
 {
     wait_event(sem->wait, sem->count > 0);
@@ -43,4 +59,27 @@
     wake_up(&sem->wait);
 }
 
+/* FIXME! Thre read/write semaphores are unimplemented! */
+static inline void init_rwsem(struct rw_semaphore *sem)
+{
+  sem->count = 1;
+}
+
+static inline void down_read(struct rw_semaphore *sem)
+{
+}
+
+
+static inline void up_read(struct rw_semaphore *sem)
+{
+}
+
+static inline void up_write(struct rw_semaphore *sem)
+{
+}
+
+static inline void down_write(struct rw_semaphore *sem)
+{
+}
+
 #endif /* _SEMAPHORE_H */
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/wait.h
--- a/extras/mini-os/include/wait.h     Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/wait.h     Thu Dec  8 15:04:41 2005
@@ -33,6 +33,10 @@
 }
 
 
+static inline void init_waitqueue_head(struct wait_queue_head *h)
+{
+  INIT_LIST_HEAD(&h->thread_list);
+}
 
 static inline void init_waitqueue_entry(struct wait_queue *q, struct thread 
*thread)
 {
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/xenbus.h   Thu Dec  8 15:04:41 2005
@@ -4,6 +4,7 @@
  * Talks to Xen Store to figure out what devices we have.
  *
  * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
@@ -30,45 +31,98 @@
 #ifndef _ASM_XEN_XENBUS_H
 #define _ASM_XEN_XENBUS_H
 
-
-/* Caller must hold this lock to call these functions: it's also held
- * across watch callbacks. */
-// TODO
-//extern struct semaphore xenbus_lock;
-
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
-void *xenbus_read(const char *dir, const char *node, unsigned int *len);
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags);
-int xenbus_mkdir(const char *dir, const char *node);
-int xenbus_exists(const char *dir, const char *node);
-int xenbus_rm(const char *dir, const char *node);
-int xenbus_transaction_start(const char *subtree);
-int xenbus_transaction_end(int abort);
-
-/* Single read and scanf: returns -errno or num scanned if > 0. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(scanf, 3, 4)));
-
-/* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
-       __attribute__((format(printf, 3, 4)));
-
-/* Generic read function: NULL-terminated triples of name,
- * sprintf-style type string, and pointer. Returns 0 or errno.*/
-int xenbus_gather(const char *dir, ...);
+#include <errno.h>
+#include <xen/io/xenbus.h>
+#include <xen/io/xs_wire.h>
 
 /* Register callback to watch this node. */
 struct xenbus_watch
 {
        struct list_head list;
-       char *node;
-       void (*callback)(struct xenbus_watch *, const char *node);
-};
+
+       /* Path being watched. */
+       const char *node;
+
+       /* Callback (executed in a process context with no locks held). */
+       void (*callback)(struct xenbus_watch *,
+                        const char **vec, unsigned int len);
+};
+
+
+/* A xenbus device. */
+struct xenbus_device {
+       const char *devicetype;
+       const char *nodename;
+       const char *otherend;
+       int otherend_id;
+       struct xenbus_watch otherend_watch;
+       int has_error;
+       void *data;
+};
+
+struct xenbus_device_id
+{
+       /* .../device/<device_type>/<identifier> */
+       char devicetype[32];    /* General class of device. */
+};
+
+/* A xenbus driver. */
+struct xenbus_driver {
+       char *name;
+       struct module *owner;
+       const struct xenbus_device_id *ids;
+       int (*probe)(struct xenbus_device *dev,
+                    const struct xenbus_device_id *id);
+       void (*otherend_changed)(struct xenbus_device *dev,
+                                XenbusState backend_state);
+       int (*remove)(struct xenbus_device *dev);
+       int (*suspend)(struct xenbus_device *dev);
+       int (*resume)(struct xenbus_device *dev);
+       int (*hotplug)(struct xenbus_device *, char **, int, char *, int);
+       int (*read_otherend_details)(struct xenbus_device *dev);
+};
+
+int xenbus_register_frontend(struct xenbus_driver *drv);
+int xenbus_register_backend(struct xenbus_driver *drv);
+void xenbus_unregister_driver(struct xenbus_driver *drv);
+
+struct xenbus_transaction;
+
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num);
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len);
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string);
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node);
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node);
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node);
+struct xenbus_transaction *xenbus_transaction_start(void);
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort);
+
+/* Single read and scanf: returns -errno or num scanned if > 0. */
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(scanf, 4, 5)));
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
+       __attribute__((format(printf, 4, 5)));
+
+/* Generic read function: NULL-terminated triples of name,
+ * sprintf-style type string, and pointer. Returns 0 or errno.*/
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...);
 
 int register_xenbus_watch(struct xenbus_watch *watch);
 void unregister_xenbus_watch(struct xenbus_watch *watch);
-void reregister_xenbus_watches(void);
+void xs_suspend(void);
+void xs_resume(void);
+
+/* Used by xenbus_dev to borrow kernel's store connection. */
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
 
 /* Called from xen core code. */
 void xenbus_suspend(void);
@@ -84,6 +138,87 @@
 
 #define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE)
 
-int xs_init(void);
+
+/**
+ * Register a watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given callback function as the callback.  Return 0 on
+ * success, or -errno on error.  On success, the given path will be saved as
+ * watch->node, and remains the caller's to free.  On error, watch->node will
+ * be NULL, the device will switch to XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
+int xenbus_watch_path(struct xenbus_device *dev, const char *path,
+                     struct xenbus_watch *watch, 
+                     void (*callback)(struct xenbus_watch *,
+                                      const char **, unsigned int));
+
+
+/**
+ * Register a watch on the given path/path2, using the given xenbus_watch
+ * structure for storage, and the given callback function as the callback.
+ * Return 0 on success, or -errno on error.  On success, the watched path
+ * (path/path2) will be saved as watch->node, and becomes the caller's to
+ * kfree().  On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
+                      const char *path2, struct xenbus_watch *watch, 
+                      void (*callback)(struct xenbus_watch *,
+                                       const char **, unsigned int));
+
+
+/**
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Perform the change inside the given transaction xbt.  xbt may be NULL, in
+ * which case this is performed inside its own transaction.  Return 0 on
+ * success, or -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev,
+                       struct xenbus_transaction *xbt,
+                       XenbusState new_state);
+
+
+/**
+ * Grant access to the given ring_mfn to the peer of the given device.  Return
+ * 0 on success, or -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
+
+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port.  Return 0 on success, or -errno on error.  On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
+
+
+/**
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateClosed if no state can be read.
+ */
+XenbusState xenbus_read_driver_state(const char *path);
+
+
+/***
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
+                     ...);
+
+
+/***
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
+                     ...);
+
 
 #endif /* _ASM_XEN_XENBUS_H */
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/xmalloc.h
--- a/extras/mini-os/include/xmalloc.h  Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/xmalloc.h  Thu Dec  8 15:04:41 2005
@@ -6,6 +6,9 @@
 
 /* Allocate space for array of typed objects. */
 #define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), 
__alignof__(_type), _num))
+
+#define malloc(size) _xmalloc(size, 4)
+#define free(ptr) xfree(ptr)
 
 /* Free any of the above. */
 extern void xfree(const void *);
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/kernel.c   Thu Dec  8 15:04:41 2005
@@ -35,6 +35,7 @@
 #include <lib.h>
 #include <sched.h>
 #include <xenbus.h>
+#include "xenbus/xenbus_comms.h"
 
 /*
  * Shared page for communicating with the hypervisor.
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_comms.c
--- a/extras/mini-os/xenbus/xenbus_comms.c      Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/xenbus/xenbus_comms.c      Thu Dec  8 15:04:41 2005
@@ -33,35 +33,19 @@
 #include <events.h>
 #include <os.h>
 #include <lib.h>
+#include <xenbus.h>
+#include "xenbus_comms.h"
 
+static int xenbus_irq;
 
-#ifdef XENBUS_COMMS_DEBUG
-#define DEBUG(_f, _a...) \
-    printk("MINI_OS(file=xenbus_comms.c, line=%d) " _f "\n", __LINE__, ## _a)
-#else
-#define DEBUG(_f, _a...)    ((void)0)
-#endif
-
-
-#define RINGBUF_DATASIZE ((PAGE_SIZE / 2) - sizeof(struct ringbuf_head))
-struct ringbuf_head
-{
-       u32 write; /* Next place to write to */
-       u32 read; /* Next place to read from */
-       u8 flags;
-       char buf[0];
-} __attribute__((packed));
+extern void xenbus_probe(void *);
+extern int xenstored_ready;
 
 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
 
-static inline struct ringbuf_head *outbuf(void)
+static inline struct xenstore_domain_interface *xenstore_domain_interface(void)
 {
        return mfn_to_virt(start_info.store_mfn);
-}
-
-static inline struct ringbuf_head *inbuf(void)
-{
-       return (struct ringbuf_head *)((char 
*)mfn_to_virt(start_info.store_mfn) + PAGE_SIZE/2);
 }
 
 static void wake_waiting(int port, struct pt_regs *regs)
@@ -69,138 +53,112 @@
        wake_up(&xb_waitq);
 }
 
-static int check_buffer(const struct ringbuf_head *h)
+static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
 {
-       return (h->write < RINGBUF_DATASIZE && h->read < RINGBUF_DATASIZE);
+       return ((prod - cons) <= XENSTORE_RING_SIZE);
 }
 
-/* We can't fill last byte: would look like empty buffer. */
-static void *get_output_chunk(const struct ringbuf_head *h,
-                             void *buf, u32 *len)
+static void *get_output_chunk(XENSTORE_RING_IDX cons,
+                             XENSTORE_RING_IDX prod,
+                             char *buf, uint32_t *len)
 {
-       u32 read_mark;
-
-       if (h->read == 0)
-               read_mark = RINGBUF_DATASIZE - 1;
-       else
-               read_mark = h->read - 1;
-
-       /* Here to the end of buffer, unless they haven't read some out. */
-       *len = RINGBUF_DATASIZE - h->write;
-       if (read_mark >= h->write)
-               *len = read_mark - h->write;
-       return (void *)((char *)buf + h->write);
+       *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
+       if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
+               *len = XENSTORE_RING_SIZE - (prod - cons);
+       return buf + MASK_XENSTORE_IDX(prod);
 }
 
-static const void *get_input_chunk(const struct ringbuf_head *h,
-                                  const void *buf, u32 *len)
+static const void *get_input_chunk(XENSTORE_RING_IDX cons,
+                                  XENSTORE_RING_IDX prod,
+                                  const char *buf, uint32_t *len)
 {
-       /* Here to the end of buffer, unless they haven't written some. */
-       *len = RINGBUF_DATASIZE - h->read;
-       if (h->write >= h->read)
-               *len = h->write - h->read;
-       return (void *)((char *)buf + h->read);
-}
-
-static void update_output_chunk(struct ringbuf_head *h, u32 len)
-{
-       h->write += len;
-       if (h->write == RINGBUF_DATASIZE)
-               h->write = 0;
-}
-
-static void update_input_chunk(struct ringbuf_head *h, u32 len)
-{
-       h->read += len;
-       if (h->read == RINGBUF_DATASIZE)
-               h->read = 0;
-}
-
-static int output_avail(struct ringbuf_head *out)
-{
-       unsigned int avail;
-
-       get_output_chunk(out, out->buf, &avail);
-       return avail != 0;
+       *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
+       if ((prod - cons) < *len)
+               *len = prod - cons;
+       return buf + MASK_XENSTORE_IDX(cons);
 }
 
 int xb_write(const void *data, unsigned len)
 {
-       struct ringbuf_head h;
-       struct ringbuf_head *out = outbuf();
+       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       XENSTORE_RING_IDX cons, prod;
 
-       do {
+       while (len != 0) {
                void *dst;
                unsigned int avail;
 
-               wait_event(xb_waitq, output_avail(out));
+               wait_event(xb_waitq, (intf->req_prod - intf->req_cons) !=
+                          XENSTORE_RING_SIZE);
 
-               /* Read, then check: not that we don't trust store.
-                * Hell, some of my best friends are daemons.  But,
-                * in this post-911 world... */
-               h = *out;
+               /* Read indexes, then verify. */
+               cons = intf->req_cons;
+               prod = intf->req_prod;
                mb();
-               if (!check_buffer(&h)) {
-                       return -1; /* ETERRORIST! */
-               }
+               if (!check_indexes(cons, prod))
+                       return -EIO;
 
-               dst = get_output_chunk(&h, out->buf, &avail);
+               dst = get_output_chunk(cons, prod, intf->req, &avail);
+               if (avail == 0)
+                       continue;
                if (avail > len)
                        avail = len;
+
                memcpy(dst, data, avail);
-               data = (void *)((char *)data + avail);
+               data = (void*) ( (unsigned long)data + avail );
                len -= avail;
-               update_output_chunk(out, avail);
-               notify_via_evtchn(start_info.store_evtchn);
-       } while (len != 0);
+
+               /* Other side must not see new header until data is there. */
+               wmb();
+               intf->req_prod += avail;
+
+               /* This implies mb() before other side sees interrupt. */
+               notify_remote_via_evtchn(start_info.store_evtchn);
+       }
 
        return 0;
 }
 
-int xs_input_avail(void)
-{
-       unsigned int avail;
-       struct ringbuf_head *in = inbuf();
-
-       get_input_chunk(in, in->buf, &avail);
-       return avail != 0;
-}
-
 int xb_read(void *data, unsigned len)
 {
-       struct ringbuf_head h;
-       struct ringbuf_head *in = inbuf();
-       int was_full;
+       struct xenstore_domain_interface *intf = xenstore_domain_interface();
+       XENSTORE_RING_IDX cons, prod;
 
        while (len != 0) {
                unsigned int avail;
                const char *src;
 
-               wait_event(xb_waitq, xs_input_avail());
-               h = *in;
+               wait_event(xb_waitq,
+                          intf->rsp_cons != intf->rsp_prod);
+
+               /* Read indexes, then verify. */
+               cons = intf->rsp_cons;
+               prod = intf->rsp_prod;
                mb();
-               if (!check_buffer(&h)) {
-                       return -1;
-               }
+               if (!check_indexes(cons, prod))
+                       return -EIO;
 
-               src = get_input_chunk(&h, in->buf, &avail);
+               src = get_input_chunk(cons, prod, intf->rsp, &avail);
+               if (avail == 0)
+                       continue;
                if (avail > len)
                        avail = len;
-               was_full = !output_avail(&h);
+
+               /* We must read header before we read data. */
+               rmb();
 
                memcpy(data, src, avail);
-               data = (void *)((char *)data + avail);
+               data = (void*) ( (unsigned long)data + avail );
                len -= avail;
-               update_input_chunk(in, avail);
-               DEBUG("Finished read of %i bytes (%i to go)\n", avail, len);
-               /* If it was full, tell them we've taken some. */
-               if (was_full)
-                       notify_via_evtchn(start_info.store_evtchn);
+
+               /* Other side must not see free space until we've copied out */
+               mb();
+               intf->rsp_cons += avail;
+
+               printk("Finished read of %i bytes (%i to go)\n", avail, len);
+
+               /* Implies mb(): they will see new header. */
+               notify_remote_via_evtchn(start_info.store_evtchn);
        }
-
-       /* If we left something, wake watch thread to deal with it. */
-       if (xs_input_avail())
-               wake_up(&xb_waitq);
 
        return 0;
 }
@@ -208,24 +166,19 @@
 /* Set up interrupt handler off store event channel. */
 int xb_init_comms(void)
 {
-    printk("Init xenbus comms, store event channel %d\n", 
start_info.store_evtchn);
-       if (!start_info.store_evtchn)
-               return 0;
-    printk("Binding virq\n");
-       bind_evtchn(start_info.store_evtchn, &wake_waiting);
+       int err;
 
-       /* FIXME zero out page -- domain builder should probably do this*/
-       memset(mfn_to_virt(start_info.store_mfn), 0, PAGE_SIZE);
-    notify_via_evtchn(start_info.store_evtchn);
+       if (xenbus_irq)
+               unbind_evtchn(xenbus_irq);
+
+       err = bind_evtchn(
+               start_info.store_evtchn, wake_waiting);
+       if (err <= 0) {
+               printk("XENBUS request irq failed %i\n", err);
+               return err;
+       }
+
+       xenbus_irq = err;
+
        return 0;
 }
-
-void xb_suspend_comms(void)
-{
-
-       if (!start_info.store_evtchn)
-               return;
-
-    // TODO
-       //unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq);
-}
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_comms.h
--- a/extras/mini-os/xenbus/xenbus_comms.h      Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/xenbus/xenbus_comms.h      Thu Dec  8 15:04:41 2005
@@ -28,8 +28,8 @@
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
 
+int xs_init(void);
 int xb_init_comms(void);
-void xb_suspend_comms(void);
 
 /* Low level routines. */
 int xb_write(const void *data, unsigned len);
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_xs.c
--- a/extras/mini-os/xenbus/xenbus_xs.c Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/xenbus/xenbus_xs.c Thu Dec  8 15:04:41 2005
@@ -39,15 +39,63 @@
 #include <wait.h>
 #include <sched.h>
 #include <semaphore.h>
+#include <spinlock.h>
 #include <xen/io/xs_wire.h>
 #include "xenbus_comms.h"
 
 #define streq(a, b) (strcmp((a), (b)) == 0)
 
-static char printf_buffer[4096];
+struct xs_stored_msg {
+       struct list_head list;
+
+       struct xsd_sockmsg hdr;
+
+       union {
+               /* Queued replies. */
+               struct {
+                       char *body;
+               } reply;
+
+               /* Queued watch events. */
+               struct {
+                       struct xenbus_watch *handle;
+                       char **vec;
+                       unsigned int vec_size;
+               } watch;
+       } u;
+};
+
+struct xs_handle {
+       /* A list of replies. Currently only one will ever be outstanding. */
+       struct list_head reply_list;
+       spinlock_t reply_lock;
+       struct wait_queue_head reply_waitq;
+
+       /* One request at a time. */
+       struct semaphore request_mutex;
+
+       /* Protect transactions against save/restore. */
+       struct rw_semaphore suspend_mutex;
+};
+
+static struct xs_handle xs_state;
+
+/* List of registered watches, and a lock to protect it. */
 static LIST_HEAD(watches);
-//TODO
-DECLARE_MUTEX(xenbus_lock);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* List of pending watch callback events, and a lock to protect it. */
+static LIST_HEAD(watch_events);
+static DEFINE_SPINLOCK(watch_events_lock);
+
+/*
+ * Details of the xenwatch callback kernel thread. The thread waits on the
+ * watch_events_waitq for work to do (queued on watch_events list). When it
+ * wakes up it acquires the xenwatch_mutex before reading the list and
+ * carrying out work.
+ */
+/* static */ DECLARE_MUTEX(xenwatch_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
 
 static int get_error(const char *errorstring)
 {
@@ -65,47 +113,82 @@
 
 static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
 {
-       struct xsd_sockmsg msg;
-       void *ret;
-       int err;
-
-       err = xb_read(&msg, sizeof(msg));
-       if (err)
-               return ERR_PTR(err);
-
-       ret = xmalloc_array(char, msg.len + 1);
-       if (!ret)
-               return ERR_PTR(-ENOMEM);
-
-       err = xb_read(ret, msg.len);
-       if (err) {
-               xfree(ret);
-               return ERR_PTR(err);
-       }
-       ((char*)ret)[msg.len] = '\0';
-
-       *type = msg.type;
+       struct xs_stored_msg *msg;
+       char *body;
+
+       spin_lock(&xs_state.reply_lock);
+
+       while (list_empty(&xs_state.reply_list)) {
+               spin_unlock(&xs_state.reply_lock);
+               wait_event(xs_state.reply_waitq,
+                          !list_empty(&xs_state.reply_list));
+               spin_lock(&xs_state.reply_lock);
+       }
+
+       msg = list_entry(xs_state.reply_list.next,
+                        struct xs_stored_msg, list);
+       list_del(&msg->list);
+
+       spin_unlock(&xs_state.reply_lock);
+
+       *type = msg->hdr.type;
        if (len)
-               *len = msg.len;
-       return ret;
+               *len = msg->hdr.len;
+       body = msg->u.reply.body;
+
+       free(msg);
+
+       return body;
 }
 
 /* Emergency write. */
 void xenbus_debug_write(const char *str, unsigned int count)
 {
-       struct xsd_sockmsg msg;
+       struct xsd_sockmsg msg = { 0 };
 
        msg.type = XS_DEBUG;
        msg.len = sizeof("print") + count + 1;
 
+       down(&xs_state.request_mutex);
        xb_write(&msg, sizeof(msg));
        xb_write("print", sizeof("print"));
        xb_write(str, count);
        xb_write("", 1);
+       up(&xs_state.request_mutex);
+}
+
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+{
+       void *ret;
+       struct xsd_sockmsg req_msg = *msg;
+       int err;
+
+       if (req_msg.type == XS_TRANSACTION_START)
+               down_read(&xs_state.suspend_mutex);
+
+       down(&xs_state.request_mutex);
+
+       err = xb_write(msg, sizeof(*msg) + msg->len);
+       if (err) {
+               msg->type = XS_ERROR;
+               ret = ERR_PTR(err);
+       } else {
+               ret = read_reply(&msg->type, &msg->len);
+       }
+
+       up(&xs_state.request_mutex);
+
+       if ((msg->type == XS_TRANSACTION_END) ||
+           ((req_msg.type == XS_TRANSACTION_START) &&
+            (msg->type == XS_ERROR)))
+               up_read(&xs_state.suspend_mutex);
+
+       return ret;
 }
 
 /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
-static void *xs_talkv(enum xsd_sockmsg_type type,
+static void *xs_talkv(struct xenbus_transaction *t,
+                     enum xsd_sockmsg_type type,
                      const struct kvec *iovec,
                      unsigned int num_vecs,
                      unsigned int *len)
@@ -115,51 +198,57 @@
        unsigned int i;
        int err;
 
-       //WARN_ON(down_trylock(&xenbus_lock) == 0);
-
+       msg.tx_id = (u32)(unsigned long)t;
+       msg.req_id = 0;
        msg.type = type;
        msg.len = 0;
        for (i = 0; i < num_vecs; i++)
                msg.len += iovec[i].iov_len;
 
+       down(&xs_state.request_mutex);
+
        err = xb_write(&msg, sizeof(msg));
-       if (err)
+       if (err) {
+               up(&xs_state.request_mutex);
                return ERR_PTR(err);
+       }
 
        for (i = 0; i < num_vecs; i++) {
-               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
-               if (err)
+               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
+               if (err) {
+                       up(&xs_state.request_mutex);
                        return ERR_PTR(err);
-       }
-
-       /* Watches can have fired before reply comes: daemon detects
-        * and re-transmits, so we can ignore this. */
-       do {
-               xfree(ret);
-               ret = read_reply(&msg.type, len);
-               if (IS_ERR(ret))
-                       return ret;
-       } while (msg.type == XS_WATCH_EVENT);
+               }
+       }
+
+       ret = read_reply(&msg.type, len);
+
+       up(&xs_state.request_mutex);
+
+       if (IS_ERR(ret))
+               return ret;
 
        if (msg.type == XS_ERROR) {
                err = get_error(ret);
-               xfree(ret);
+               free(ret);
                return ERR_PTR(-err);
        }
 
-       //BUG_ON(msg.type != type);
+       //      BUG_ON(msg.type != type);
        return ret;
 }
 
 /* Simplified version of xs_talkv: single message. */
-static void *xs_single(enum xsd_sockmsg_type type,
-                      const char *string, unsigned int *len)
+static void *xs_single(struct xenbus_transaction *t,
+                      enum xsd_sockmsg_type type,
+                      const char *string,
+                      unsigned int *len)
 {
        struct kvec iovec;
 
        iovec.iov_base = (void *)string;
        iovec.iov_len = strlen(string) + 1;
-       return xs_talkv(type, &iovec, 1, len);
+       return xs_talkv(t, type, &iovec, 1, len);
 }
 
 /* Many commands only need an ack, don't care what it says. */
@@ -167,7 +256,7 @@
 {
        if (IS_ERR(reply))
                return PTR_ERR(reply);
-       xfree(reply);
+       free(reply);
        return 0;
 }
 
@@ -182,60 +271,76 @@
        return num;
 }
 
-/* Return the path to dir with /name appended. */ 
+/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ 
 static char *join(const char *dir, const char *name)
 {
-       static char buffer[4096];
-
-       //BUG_ON(down_trylock(&xenbus_lock) == 0);
-       /* XXX FIXME: might not be correct if name == "" */
-       //BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer));
+       char *buffer;
+
+       buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1);
+       if (buffer == NULL)
+               return ERR_PTR(-ENOMEM);
 
        strcpy(buffer, dir);
        if (!streq(name, "")) {
                strcat(buffer, "/");
                strcat(buffer, name);
        }
+
        return buffer;
 }
 
-char **xenbus_directory(const char *dir, const char *node, unsigned int *num)
-{
-       char *strings, *p, **ret;
-       unsigned int len;
-
-       strings = xs_single(XS_DIRECTORY, join(dir, node), &len);
-       if (IS_ERR(strings))
-               return (char **)strings;
+static char **split(char *strings, unsigned int len, unsigned int *num)
+{
+       char *p, **ret;
 
        /* Count the strings. */
        *num = count_strings(strings, len);
 
        /* Transfer to one big alloc for easy freeing. */
-       ret = (char **)xmalloc_array(char, *num * sizeof(char *) + len);
+       ret = malloc(*num * sizeof(char *) + len);
        if (!ret) {
-               xfree(strings);
+               free(strings);
                return ERR_PTR(-ENOMEM);
        }
        memcpy(&ret[*num], strings, len);
-       xfree(strings);
+       free(strings);
 
        strings = (char *)&ret[*num];
        for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
                ret[(*num)++] = p;
-       return ret;
+
+       return ret;
+}
+
+char **xenbus_directory(struct xenbus_transaction *t,
+                       const char *dir, const char *node, unsigned int *num)
+{
+       char *strings, *path;
+       unsigned int len;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return (char **)path;
+
+       strings = xs_single(t, XS_DIRECTORY, path, &len);
+       free(path);
+       if (IS_ERR(strings))
+               return (char **)strings;
+
+       return split(strings, len, num);
 }
 
 /* Check if a path exists. Return 1 if it does. */
-int xenbus_exists(const char *dir, const char *node)
+int xenbus_exists(struct xenbus_transaction *t,
+                 const char *dir, const char *node)
 {
        char **d;
        int dir_n;
 
-       d = xenbus_directory(dir, node, &dir_n);
+       d = xenbus_directory(t, dir, node, &dir_n);
        if (IS_ERR(d))
                return 0;
-       xfree(d);
+       free(d);
        return 1;
 }
 
@@ -243,92 +348,134 @@
  * Returns a kmalloced value: call free() on it after use.
  * len indicates length in bytes.
  */
-void *xenbus_read(const char *dir, const char *node, unsigned int *len)
-{
-       return xs_single(XS_READ, join(dir, node), len);
+void *xenbus_read(struct xenbus_transaction *t,
+                 const char *dir, const char *node, unsigned int *len)
+{
+       char *path;
+       void *ret;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return (void *)path;
+
+       ret = xs_single(t, XS_READ, path, len);
+       free(path);
+       return ret;
 }
 
 /* Write the value of a single file.
- * Returns -err on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns -err on failure.
  */
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags)
-{
-       const char *flags, *path;
-       struct kvec iovec[3];
+int xenbus_write(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *string)
+{
+       const char *path;
+       struct kvec iovec[2];
+       int ret;
 
        path = join(dir, node);
-       /* Format: Flags (as string), path, data. */
-       if (createflags == 0)
-               flags = XS_WRITE_NONE;
-       else if (createflags == O_CREAT)
-               flags = XS_WRITE_CREATE;
-       else if (createflags == (O_CREAT|O_EXCL))
-               flags = XS_WRITE_CREATE_EXCL;
-       else
-               return -EINVAL;
+       if (IS_ERR(path))
+               return PTR_ERR(path);
 
        iovec[0].iov_base = (void *)path;
        iovec[0].iov_len = strlen(path) + 1;
-       iovec[1].iov_base = (void *)flags;
-       iovec[1].iov_len = strlen(flags) + 1;
-       iovec[2].iov_base = (void *)string;
-       iovec[2].iov_len = strlen(string);
-
-       return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+       iovec[1].iov_base = (void *)string;
+       iovec[1].iov_len = strlen(string);
+
+       ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+       free(path);
+       return ret;
 }
 
 /* Create a new directory. */
-int xenbus_mkdir(const char *dir, const char *node)
-{
-       return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL));
+int xenbus_mkdir(struct xenbus_transaction *t,
+                const char *dir, const char *node)
+{
+       char *path;
+       int ret;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+
+       ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
+       free(path);
+       return ret;
 }
 
 /* Destroy a file or directory (directories must be empty). */
-int xenbus_rm(const char *dir, const char *node)
-{
-       return xs_error(xs_single(XS_RM, join(dir, node), NULL));
+int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node)
+{
+       char *path;
+       int ret;
+
+       path = join(dir, node);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+
+       ret = xs_error(xs_single(t, XS_RM, path, NULL));
+       free(path);
+       return ret;
 }
 
 /* Start a transaction: changes by others will not be seen during this
  * transaction, and changes will not be visible to others until end.
- * Transaction only applies to the given subtree.
- * You can only have one transaction at any time.
  */
-int xenbus_transaction_start(const char *subtree)
-{
-       return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL));
+struct xenbus_transaction *xenbus_transaction_start(void)
+{
+       char *id_str;
+       unsigned long id;
+
+       down_read(&xs_state.suspend_mutex);
+
+       id_str = xs_single(NULL, XS_TRANSACTION_START, "", NULL);
+       if (IS_ERR(id_str)) {
+               up_read(&xs_state.suspend_mutex);
+               return (struct xenbus_transaction *)id_str;
+       }
+
+       id = simple_strtoul(id_str, NULL, 0);
+       free(id_str);
+
+       return (struct xenbus_transaction *)id;
 }
 
 /* End a transaction.
  * If abandon is true, transaction is discarded instead of committed.
  */
-int xenbus_transaction_end(int abort)
+int xenbus_transaction_end(struct xenbus_transaction *t, int abort)
 {
        char abortstr[2];
+       int err;
 
        if (abort)
                strcpy(abortstr, "F");
        else
                strcpy(abortstr, "T");
-       return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL));
+
+       err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
+
+       up_read(&xs_state.suspend_mutex);
+
+       return err;
 }
 
 /* Single read and scanf: returns -errno or num scanned. */
-int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_scanf(struct xenbus_transaction *t,
+                const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
        char *val;
 
-       val = xenbus_read(dir, node, NULL);
+       val = xenbus_read(t, dir, node, NULL);
        if (IS_ERR(val))
                return PTR_ERR(val);
 
        va_start(ap, fmt);
        ret = vsscanf(val, fmt, ap);
        va_end(ap);
-       xfree(val);
+       free(val);
        /* Distinctive errno. */
        if (ret == 0)
                return -ERANGE;
@@ -336,23 +483,32 @@
 }
 
 /* Single printf and write: returns -errno or 0. */
-int xenbus_printf(const char *dir, const char *node, const char *fmt, ...)
+int xenbus_printf(struct xenbus_transaction *t,
+                 const char *dir, const char *node, const char *fmt, ...)
 {
        va_list ap;
        int ret;
-
-       //BUG_ON(down_trylock(&xenbus_lock) == 0);
+#define PRINTF_BUFFER_SIZE 4096
+       char *printf_buffer;
+
+       printf_buffer = malloc(PRINTF_BUFFER_SIZE);
+       if (printf_buffer == NULL)
+               return -ENOMEM;
+
        va_start(ap, fmt);
-       ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap);
+       ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
        va_end(ap);
 
-       //BUG_ON(ret > sizeof(printf_buffer)-1);
-       return xenbus_write(dir, node, printf_buffer, O_CREAT);
-}
-
-       
+       //      BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+       ret = xenbus_write(t, dir, node, printf_buffer);
+
+       free(printf_buffer);
+
+       return ret;
+}
+
 /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
-int xenbus_gather(const char *dir, ...)
+int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...)
 {
        va_list ap;
        const char *name;
@@ -364,7 +520,7 @@
                void *result = va_arg(ap, void *);
                char *p;
 
-               p = xenbus_read(dir, name, NULL);
+               p = xenbus_read(t, dir, name, NULL);
                if (IS_ERR(p)) {
                        ret = PTR_ERR(p);
                        break;
@@ -372,7 +528,7 @@
                if (fmt) {
                        if (sscanf(p, fmt, result) == 0)
                                ret = -EINVAL;
-                       xfree(p);
+                       free(p);
                } else
                        *(char **)result = p;
        }
@@ -389,31 +545,8 @@
        iov[1].iov_base = (void *)token;
        iov[1].iov_len = strlen(token) + 1;
 
-       return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL));
-}
-
-static char *xs_read_watch(char **token)
-{
-       enum xsd_sockmsg_type type;
-       char *ret;
-
-       ret = read_reply(&type, NULL);
-       if (IS_ERR(ret))
-               return ret;
-
-       //BUG_ON(type != XS_WATCH_EVENT);
-       *token = ret + strlen(ret) + 1;
-       return ret;
-}
-
-static int xs_acknowledge_watch(const char *token)
-{
-#if 0
-       return xs_error(xs_single(XS_WATCH_ACK, token, NULL));
-#else
-       /* XS_WATCH_ACK is no longer available */
-       return 0;
-#endif
+       return xs_error(xs_talkv(NULL, XS_WATCH, iov,
+                                ARRAY_SIZE(iov), NULL));
 }
 
 static int xs_unwatch(const char *path, const char *token)
@@ -425,10 +558,10 @@
        iov[1].iov_base = (char *)token;
        iov[1].iov_len = strlen(token) + 1;
 
-       return xs_error(xs_talkv(XS_UNWATCH, iov, ARRAY_SIZE(iov), NULL));
-}
-
-/* A little paranoia: we don't just trust token. */
+       return xs_error(xs_talkv(NULL, XS_UNWATCH, iov,
+                                ARRAY_SIZE(iov), NULL));
+}
+
 static struct xenbus_watch *find_watch(const char *token)
 {
        struct xenbus_watch *i, *cmp;
@@ -438,6 +571,7 @@
        list_for_each_entry(i, &watches, list)
                if (i == cmp)
                        return i;
+
        return NULL;
 }
 
@@ -449,111 +583,214 @@
        int err;
 
        sprintf(token, "%lX", (long)watch);
-       //BUG_ON(find_watch(token));
-printk("Registered watch for: %s\n", token);
+
+       down_read(&xs_state.suspend_mutex);
+
+       spin_lock(&watches_lock);
+       //      BUG_ON(find_watch(token));
+       list_add(&watch->list, &watches);
+       spin_unlock(&watches_lock);
+
        err = xs_watch(watch->node, token);
-       if (!err)
-               list_add(&watch->list, &watches);
+
+       /* Ignore errors due to multiple registration. */
+       if ((err != 0) && (err != -EEXIST)) {
+               spin_lock(&watches_lock);
+               list_del(&watch->list);
+               spin_unlock(&watches_lock);
+       }
+
+       up_read(&xs_state.suspend_mutex);
+
        return err;
 }
 
 void unregister_xenbus_watch(struct xenbus_watch *watch)
 {
+       struct xs_stored_msg *msg, *tmp;
        char token[sizeof(watch) * 2 + 1];
        int err;
 
        sprintf(token, "%lX", (long)watch);
-       //BUG_ON(!find_watch(token));
+
+       down_read(&xs_state.suspend_mutex);
+
+       spin_lock(&watches_lock);
+       //      BUG_ON(!find_watch(token));
+       list_del(&watch->list);
+       spin_unlock(&watches_lock);
 
        err = xs_unwatch(watch->node, token);
-       list_del(&watch->list);
-
        if (err)
                printk("XENBUS Failed to release watch %s: %i\n",
                       watch->node, err);
-}
-
-/* Re-register callbacks to all watches. */
-void reregister_xenbus_watches(void)
+
+       up_read(&xs_state.suspend_mutex);
+
+       /* Cancel pending watch events. */
+       spin_lock(&watch_events_lock);
+       list_for_each_entry_safe(msg, tmp, &watch_events, list) {
+               if (msg->u.watch.handle != watch)
+                       continue;
+               list_del(&msg->list);
+               free(msg->u.watch.vec);
+               free(msg);
+       }
+       spin_unlock(&watch_events_lock);
+}
+
+void xs_suspend(void)
+{
+       down_write(&xs_state.suspend_mutex);
+       down(&xs_state.request_mutex);
+}
+
+void xs_resume(void)
 {
        struct xenbus_watch *watch;
        char token[sizeof(watch) * 2 + 1];
 
+       up(&xs_state.request_mutex);
+
+       /* No need for watches_lock: the suspend_mutex is sufficient. */
        list_for_each_entry(watch, &watches, list) {
                sprintf(token, "%lX", (long)watch);
                xs_watch(watch->node, token);
        }
-}
-
-void watch_thread(void *unused)
-{
+
+       up_write(&xs_state.suspend_mutex);
+}
+
+static void xenwatch_thread(void *unused)
+{
+       struct list_head *ent;
+       struct xs_stored_msg *msg;
+
        for (;;) {
-               char *token;
-               char *node = NULL;
-
-               wait_event(xb_waitq, xs_input_avail());
-
-               /* If this is a spurious wakeup caused by someone
-                * doing an op, they'll hold the lock and the buffer
-                * will be empty by the time we get there.               
-                */
-               down(&xenbus_lock);
-               if (xs_input_avail())
-                       node = xs_read_watch(&token);
-
-               if (node && !IS_ERR(node)) {
-                       struct xenbus_watch *w;
-                       int err;
-
-                       err = xs_acknowledge_watch(token);
-                       if (err)
-                               printk("XENBUS ack %s fail %i\n", node, err);
-                       w = find_watch(token);
-                       //BUG_ON(!w);
-                       w->callback(w, node);
-                       xfree(node);
-               } else
-                       printk("XENBUS xs_read_watch: %li\n", PTR_ERR(node));
-               up(&xenbus_lock);
-       }
-}
-
-
-static void ballon_changed(struct xenbus_watch *watch, const char *node)
-{
-    unsigned long new_target;
-    int err;
-    err = xenbus_scanf("memory", "target", "%lu", &new_target);
-
-    if(err != 1)
-    {
-        printk("Unable to read memory/target\n");
-        return;
-    }
-
-    printk("Memory target changed to: %ld bytes, ignoring.\n", new_target);
-}
-
-
-static struct xenbus_watch ballon_watch = {
-    .node = "memory/target",
-    .callback = ballon_changed,
-};
-
-
+               wait_event(watch_events_waitq,
+                          !list_empty(&watch_events));
+
+               down(&xenwatch_mutex);
+
+               spin_lock(&watch_events_lock);
+               ent = watch_events.next;
+               if (ent != &watch_events)
+                       list_del(ent);
+               spin_unlock(&watch_events_lock);
+
+               if (ent != &watch_events) {
+                       msg = list_entry(ent, struct xs_stored_msg, list);
+                       msg->u.watch.handle->callback(
+                               msg->u.watch.handle,
+                               (const char **)msg->u.watch.vec,
+                               msg->u.watch.vec_size);
+                       free(msg->u.watch.vec);
+                       free(msg);
+               }
+
+               up(&xenwatch_mutex);
+       }
+}
+
+static int process_msg(void)
+{
+       struct xs_stored_msg *msg;
+       char *body;
+       int err;
+
+       msg = malloc(sizeof(*msg));
+       if (msg == NULL)
+               return -ENOMEM;
+
+       err = xb_read(&msg->hdr, sizeof(msg->hdr));
+       if (err) {
+               free(msg);
+               return err;
+       }
+
+       body = malloc(msg->hdr.len + 1);
+       if (body == NULL) {
+               free(msg);
+               return -ENOMEM;
+       }
+
+       err = xb_read(body, msg->hdr.len);
+       if (err) {
+               free(body);
+               free(msg);
+               return err;
+       }
+       body[msg->hdr.len] = '\0';
+
+       if (msg->hdr.type == XS_WATCH_EVENT) {
+               msg->u.watch.vec = split(body, msg->hdr.len,
+                                        &msg->u.watch.vec_size);
+               if (IS_ERR(msg->u.watch.vec)) {
+                       free(msg);
+                       return PTR_ERR(msg->u.watch.vec);
+               }
+
+               spin_lock(&watches_lock);
+               msg->u.watch.handle = find_watch(
+                       msg->u.watch.vec[XS_WATCH_TOKEN]);
+               if (msg->u.watch.handle != NULL) {
+                       spin_lock(&watch_events_lock);
+                       list_add_tail(&msg->list, &watch_events);
+                       wake_up(&watch_events_waitq);
+                       spin_unlock(&watch_events_lock);
+               } else {
+                       free(msg->u.watch.vec);
+                       free(msg);
+               }
+               spin_unlock(&watches_lock);
+       } else {
+               msg->u.reply.body = body;
+               spin_lock(&xs_state.reply_lock);
+               list_add_tail(&msg->list, &xs_state.reply_list);
+               spin_unlock(&xs_state.reply_lock);
+               wake_up(&xs_state.reply_waitq);
+       }
+
+       return 0;
+}
+
+static void xenbus_thread(void *unused)
+{
+       int err;
+
+       for (;;) {
+               err = process_msg();
+               if (err)
+                       printk("XENBUS error %d while reading "
+                              "message\n", err);
+       }
+}
 
 int xs_init(void)
 {
        int err;
-       struct thread *watcher;
-    printk("xb_init_comms\n");
+       struct thread *kxwatcher_thread;
+       struct thread *kxenbus_thread;
+
+       INIT_LIST_HEAD(&xs_state.reply_list);
+       spin_lock_init(&xs_state.reply_lock);
+       init_waitqueue_head(&xs_state.reply_waitq);
+
+       init_MUTEX(&xs_state.request_mutex);
+       init_rwsem(&xs_state.suspend_mutex);
+
+       /* Initialize the shared memory rings to talk to xenstored */
        err = xb_init_comms();
        if (err)
                return err;
-       
-       watcher = create_thread("kxwatch", watch_thread, NULL);
-    down(&xenbus_lock);
-    register_xenbus_watch(&ballon_watch);
-    up(&xenbus_lock);
+
+       kxwatcher_thread = create_thread("kxwatch", xenwatch_thread, NULL);
+       if (IS_ERR(kxwatcher_thread))
+               return PTR_ERR(kxwatcher_thread);
+
+       kxenbus_thread = create_thread("kxenbus", xenbus_thread, NULL);
+       if (IS_ERR(kxenbus_thread))
+               return PTR_ERR(kxenbus_thread);
+
        return 0;
 }
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Dec  8 
15:04:41 2005
@@ -192,8 +192,8 @@
                page = balloon_retrieve();
                BUG_ON(page == NULL);
 
-               pfn = page - mem_map;
-               BUG_ON(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+               pfn = page_to_pfn(page);
+               BUG_ON(phys_to_machine_mapping_valid(pfn));
 
                /* Update P->M and M->P tables. */
                set_phys_to_machine(pfn, mfn_list[i]);
@@ -253,8 +253,8 @@
                        break;
                }
 
-               pfn = page - mem_map;
-               mfn_list[i] = phys_to_machine_mapping[pfn];
+               pfn = page_to_pfn(page);
+               mfn_list[i] = pfn_to_mfn(pfn);
 
                if (!PageHighMem(page)) {
                        v = phys_to_virt(pfn << PAGE_SHIFT);
@@ -444,6 +444,9 @@
 
        IPRINTK("Initialising balloon driver.\n");
 
+       if (xen_init() < 0)
+               return -1;
+
        current_pages = min(xen_start_info->nr_pages, max_pfn);
        target_pages  = current_pages;
        balloon_low   = 0;
@@ -465,7 +468,7 @@
     
        /* Initialise the balloon with excess memory space. */
        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
-               page = &mem_map[pfn];
+               page = pfn_to_page(pfn);
                if (!PageReserved(page))
                        balloon_append(page);
        }
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Dec  8 
15:04:41 2005
@@ -65,6 +65,8 @@
 extern unsigned long *phys_to_machine_mapping;
 #define pfn_to_mfn(pfn)        \
 (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL<<31))
+#define        phys_to_machine_mapping_valid(pfn) \
+       (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
        unsigned long pfn;
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Thu Dec  8 
15:04:41 2005
@@ -355,34 +355,27 @@
 #endif
     return 1;
 }
+#endif
 
 static inline int
 HYPERVISOR_update_va_mapping(
     unsigned long va, pte_t new_val, unsigned long flags)
 {
-#if 0
-    int ret;
-    unsigned long ign1, ign2, ign3;
-
-    __asm__ __volatile__ (
-        TRAP_INSTR
-        : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
-       : "0" (__HYPERVISOR_update_va_mapping), 
-          "1" (va), "2" ((new_val).pte_low), "3" (flags)
-       : "memory" );
-
-    if ( unlikely(ret < 0) )
-    {
-        printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
-               va, (new_val).pte_low, flags);
-        BUG();
-    }
-
-    return ret;
-#endif
-    return 1;
-}
-#endif
+    /* no-op */
+    return 1;
+}
+
+static inline int
+HYPERVISOR_memory_op(
+    unsigned int cmd, void *arg)
+{
+    int ret;
+    __asm__ __volatile__ ( ";; mov r14=%2 ; mov r15=%3 ; mov r2=%1 ; break 
0x1000 ;; mov %0=r8 ;;"
+        : "=r" (ret)
+        : "i" (__HYPERVISOR_console_io), "r"(cmd), "r"(arg)
+        : "r14","r15","r2","r8","memory" );
+    return ret;
+}
 
 static inline int
 HYPERVISOR_event_channel_op(
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h        Thu Dec 
 8 15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h        Thu Dec 
 8 15:04:41 2005
@@ -52,4 +52,19 @@
 #define        mfn_to_pfn(x)   (x)
 #define machine_to_phys_mapping 0
 
+// for drivers/xen/balloon/balloon.c
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p,_n) ((void)0)
+#endif
+#define        pte_mfn(_x)     pte_pfn(_x)
+#define INVALID_P2M_ENTRY      (~0UL)
+#define __pte_ma(_x)   ((pte_t) {(_x)})
+#define phys_to_machine_mapping_valid(_x)      (1)
+#define        kmap_flush_unused()     do {} while (0)
+#define set_phys_to_machine(_x,_y)     do {} while (0)
+#define xen_machphys_update(_x,_y)     do {} while (0)
+#define pfn_pte_ma(_x,_y)      __pte_ma(0)
+
 #endif /* __HYPERVISOR_H__ */
diff -r 76bff6c996b0 -r c9772105fead 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Dec  8 
15:04:31 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Dec  8 
15:04:41 2005
@@ -67,6 +67,8 @@
 extern unsigned long *phys_to_machine_mapping;
 #define pfn_to_mfn(pfn)        \
 (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL << 63))
+#define        phys_to_machine_mapping_valid(pfn) \
+       (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
 static inline unsigned long mfn_to_pfn(unsigned long mfn)
 {
        unsigned long pfn;
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/shadow.c     Thu Dec  8 15:04:41 2005
@@ -1450,6 +1450,7 @@
     int changed;
     u32 min_max_shadow, min_max_snapshot;
     int min_shadow, max_shadow, min_snapshot, max_snapshot;
+    struct vcpu *v;
 
     ASSERT(shadow_lock_is_acquired(d));
 
@@ -1739,6 +1740,9 @@
 
         if ( unlikely(unshadow) )
         {
+            for_each_vcpu(d, v)
+                if(smfn == pagetable_get_pfn(v->arch.shadow_table))
+                    return need_flush;
             perfc_incrc(unshadow_l2_count);
             shadow_unpin(smfn);
 #if CONFIG_PAGING_LEVELS == 2
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/shadow32.c   Thu Dec  8 15:04:41 2005
@@ -2326,6 +2326,7 @@
     int changed;
     u32 min_max_shadow, min_max_snapshot;
     int min_shadow, max_shadow, min_snapshot, max_snapshot;
+    struct vcpu *v;
 
     ASSERT(shadow_lock_is_acquired(d));
 
@@ -2527,6 +2528,9 @@
 
         if ( unlikely(unshadow) )
         {
+            for_each_vcpu(d, v)
+                if(smfn == pagetable_get_pfn(v->arch.shadow_table))
+                    return need_flush;
             perfc_incrc(unshadow_l2_count);
             shadow_unpin(smfn);
             if ( unlikely(shadow_mode_external(d)) )
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx.c        Thu Dec  8 15:04:41 2005
@@ -108,7 +108,7 @@
     destroy_vmcs(&v->arch.arch_vmx);
     free_monitor_pagetable(v);
     vpit = &v->domain->arch.vmx_platform.vmx_pit;
-    if ( vpit->ticking && active_ac_timer(&(vpit->pit_timer)) )
+    if ( active_ac_timer(&(vpit->pit_timer)) )
         rem_ac_timer(&vpit->pit_timer);
     if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) ) {
         rem_ac_timer(&v->arch.arch_vmx.hlt_timer);
@@ -905,7 +905,7 @@
 int
 vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
 {
-    unsigned long mfn, old_cr4;
+    unsigned long mfn, old_cr4, old_base_mfn;
     int error = 0;
 
     error |= __vmwrite(GUEST_RIP, c->eip);
@@ -945,7 +945,12 @@
             return 0;
         }
         mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
+        if(!get_page(pfn_to_page(mfn), v->domain))
+                return 0;
+        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
         v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+        if (old_base_mfn)
+             put_page(pfn_to_page(old_base_mfn));
         update_pagetables(v);
         /*
          * arch.shadow_table should now hold the next CR3 for shadow
@@ -1174,9 +1179,11 @@
     }
 
     if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
-        if(v->arch.arch_vmx.cpu_cr3)
+        if(v->arch.arch_vmx.cpu_cr3){
             put_page(pfn_to_page(get_mfn_from_pfn(
                       v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
+            v->arch.guest_table = mk_pagetable(0);
+        }
 
     /*
      * VMX does not implement real-mode virtualization. We emulate
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx_intercept.c      Thu Dec  8 15:04:41 2005
@@ -387,7 +387,6 @@
         }
         else {
             init_ac_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
-            vpit->ticking = 1;
         }
 
         /* init count for this channel */
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx_io.c     Thu Dec  8 15:04:41 2005
@@ -748,7 +748,7 @@
 {
     /* clear the event *before* checking for work. This should avoid
        the set-and-check races */
-    if (vmx_clear_pending_io_event(current))
+    if (vmx_clear_pending_io_event(v))
         vmx_io_assist(v);
 }
 
@@ -793,29 +793,39 @@
     return __fls(pintr[0]);
 }
 
+void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit)
+{
+    u64   drift;
+
+    if ( vpit->first_injected )
+        drift = vpit->period_cycles * vpit->pending_intr_nr;
+    else 
+        drift = 0;
+    drift = v->arch.arch_vmx.tsc_offset - drift;
+    __vmwrite(TSC_OFFSET, drift);
+
+#if defined (__i386__)
+    __vmwrite(TSC_OFFSET_HIGH, (drift >> 32));
+#endif
+}
+
 #define BSP_CPU(v)    (!(v->vcpu_id))
 static inline void
 interrupt_post_injection(struct vcpu * v, int vector, int type)
 {
     struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-    u64    drift;
 
     if ( is_pit_irq(v, vector, type) ) {
         if ( !vpit->first_injected ) {
+            vpit->pending_intr_nr = 0;
+            vpit->scheduled = NOW() + vpit->period;
+            set_ac_timer(&vpit->pit_timer, vpit->scheduled);
             vpit->first_injected = 1;
-            vpit->pending_intr_nr = 0;
         } else {
             vpit->pending_intr_nr--;
         }
         vpit->inject_point = NOW();
-        drift = vpit->period_cycles * vpit->pending_intr_nr;
-        drift = v->arch.arch_vmx.tsc_offset - drift;
-        __vmwrite(TSC_OFFSET, drift);
-
-#if defined (__i386__)
-        __vmwrite(TSC_OFFSET_HIGH, (drift >> 32));
-#endif
-
+        set_tsc_shift (v, vpit);
     }
 
     switch(type)
@@ -982,8 +992,10 @@
             vmx_wait_io();
     }
     /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( vpit->ticking )
+    if ( vpit->first_injected ) {
         pickup_deactive_ticks(vpit);
+    }
+    set_tsc_shift(v,vpit);
 
     /* We can't resume the guest if we're waiting on I/O */
     ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags));
diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Thu Dec  8 15:04:31 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Thu Dec  8 15:04:41 2005
@@ -243,9 +243,6 @@
 {
     struct vmx_platform *platform;
 
-    if (!(VMX_DOMAIN(current) && (current->vcpu_id == 0)))
-        return;
-
     vmx_map_io_shared_page(d);
     vmx_set_vcpu_nr(d);
 
@@ -290,6 +287,7 @@
 /* Update CR3, GDT, LDT, TR */
     unsigned int  error = 0;
     unsigned long cr0, cr4;
+    u64     host_tsc;
 
     if (v->vcpu_id == 0)
         vmx_setup_platform(v->domain);
@@ -337,6 +335,10 @@
     __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
 
     v->arch.schedule_tail = arch_vmx_do_resume;
+    /* init guest tsc to start from 0 */
+    rdtscll(host_tsc);
+    v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
+    set_tsc_shift (v, &v->domain->arch.vmx_platform.vmx_pit);
 }
 
 /*
@@ -366,7 +368,6 @@
     error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
 
     /* TSC */
-    error |= __vmwrite(TSC_OFFSET, 0);
     error |= __vmwrite(CR3_TARGET_COUNT, 0);
 
     /* Guest Selectors */
diff -r 76bff6c996b0 -r c9772105fead xen/include/asm-x86/vmx_vpit.h
--- a/xen/include/asm-x86/vmx_vpit.h    Thu Dec  8 15:04:31 2005
+++ b/xen/include/asm-x86/vmx_vpit.h    Thu Dec  8 15:04:41 2005
@@ -27,7 +27,6 @@
     unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
     u32 period;                /* pit frequency in ns */
     int first_injected;                 /* flag to prevent shadow window */
-    int ticking;    /* indicating it is ticking */
 
     /* virtual PIT state for handle related I/O */
     int read_state;
@@ -51,5 +50,6 @@
     else
         return -1;
 }
+extern void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit);
 
 #endif /* _VMX_VIRPIT_H_ */
diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/spinlock.h
--- /dev/null   Thu Dec  8 15:04:31 2005
+++ b/extras/mini-os/include/spinlock.h Thu Dec  8 15:04:41 2005
@@ -0,0 +1,121 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <lib.h>
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int slock;
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile signed char *)(&(x)->slock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+        "1:\n" \
+       LOCK \
+       "decb %0\n\t" \
+       "jns 3f\n" \
+       "2:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0,%0\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       "3:\n\t"
+
+#define spin_lock_string_flags \
+        "1:\n" \
+       LOCK \
+       "decb %0\n\t" \
+       "jns 4f\n\t" \
+       "2:\t" \
+       "testl $0x200, %1\n\t" \
+       "jz 3f\n\t" \
+       "#sti\n\t" \
+       "3:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0, %0\n\t" \
+       "jle 3b\n\t" \
+       "#cli\n\t" \
+       "jmp 1b\n" \
+       "4:\n\t"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#define spin_unlock_string \
+       "xchgb %b0, %1" \
+               :"=q" (oldval), "=m" (lock->slock) \
+               :"0" (oldval) : "memory"
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+       char oldval = 1;
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1\n"
+               :"=q" (oldval), "=m" (lock->slock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->slock) : : "memory");
+}
+
+static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+{
+       __asm__ __volatile__(
+               spin_lock_string_flags
+               :"=m" (lock->slock) : "r" (flags) : "memory");
+}
+
+#define _spin_trylock(lock)     ({_raw_spin_trylock(lock) ? \
+                                1 : ({ 0;});})
+
+#define _spin_lock(lock)        \
+do {                            \
+        _raw_spin_lock(lock);   \
+} while(0)
+
+#define _spin_unlock(lock)      \
+do {                            \
+        _raw_spin_unlock(lock); \
+} while (0)
+
+
+#define spin_lock(lock)       _spin_lock(lock)
+#define spin_unlock(lock)       _spin_unlock(lock)
+
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+
+#endif

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>