WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merge latest xen-unstable into xen-ia64-unstable

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] Merge latest xen-unstable into xen-ia64-unstable
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 22 Sep 2005 20:02:23 +0000
Delivery-date: Thu, 22 Sep 2005 20:02:36 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 06d84bf87159174ef040a67f4ce90fcb72469b14
# Parent  97dbd9524a7e918f2ffb2d5155a7e32c48f7f355
# Parent  2f83ff9f6bd2a7987c297b84bfce1f8e22409cae
Merge latest xen-unstable into xen-ia64-unstable

diff -r 97dbd9524a7e -r 06d84bf87159 .hgignore
--- a/.hgignore Thu Sep 22 17:34:14 2005
+++ b/.hgignore Thu Sep 22 17:42:01 2005
@@ -86,6 +86,9 @@
 ^tools/check/\..*$
 ^tools/console/xenconsoled$
 ^tools/console/xenconsole$
+^tools/debugger/gdb/gdb-6\.2\.1\.tar\.bz2$
+^tools/debugger/gdb/gdb-6\.2\.1/.*$
+^tools/debugger/gdb/gdb-6\.2\.1-linux-i386-xen/.*$
 ^tools/debugger/pdb/pdb$
 ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.ko$
 ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.mod.c$
@@ -136,9 +139,10 @@
 ^tools/vnet/vnet-module/\..*\.cmd$
 ^tools/vnet/vnet-module/\.tmp_versions/.*$
 ^tools/vnet/vnet-module/vnet_module\.mod\..*$
-^tools/vtpm/vtpm*
-^tools/vtpm/tpm_emulator-*
-^tools/vtpm_manager/manager/vtpm_managerd
+^tools/vtpm/tpm_emulator/.*$
+^tools/vtpm/tpm_emulator-.*\.tar\.gz$
+^tools/vtpm/vtpm/.*$
+^tools/vtpm_manager/manager/vtpm_managerd$
 ^tools/xcutils/xc_restore$
 ^tools/xcutils/xc_save$
 ^tools/xenstat/xentop/xentop$
@@ -156,6 +160,7 @@
 ^tools/xenstore/xs_stress$
 ^tools/xenstore/xs_test$
 ^tools/xenstore/xs_watch_stress$
+^tools/xentrace/xenctx$
 ^tools/xentrace/xentrace$
 ^xen/BLOG$
 ^xen/TAGS$
diff -r 97dbd9524a7e -r 06d84bf87159 Makefile
--- a/Makefile  Thu Sep 22 17:34:14 2005
+++ b/Makefile  Thu Sep 22 17:42:01 2005
@@ -98,11 +98,14 @@
        $(MAKE) -C tools clean
        $(MAKE) -C docs clean
 
-# clean, but blow away kernel build tree plus tar balls
-mrproper: clean
+# clean, but blow away kernel build tree plus tarballs
+distclean: clean
        rm -rf dist patches/tmp
        for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done
        for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done
+
+# Linux name for GNU distclean
+mrproper: distclean
 
 install-logging: LOGGING=logging-0.4.9.2
 install-logging:
@@ -142,7 +145,7 @@
        @echo 'Cleaning targets:'
        @echo '  clean            - clean the Xen, tools and docs (but not'
        @echo '                     guest kernel) trees'
-       @echo '  mrproper         - clean plus delete kernel tarballs and 
kernel'
+       @echo '  distclean        - clean plus delete kernel tarballs and 
kernel'
        @echo '                     build trees'
        @echo '  kdelete          - delete guest kernel build trees'
        @echo '  kclean           - clean guest kernel build trees'
@@ -163,27 +166,25 @@
 uninstall:
        [ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s`
        rm -rf $(D)/etc/init.d/xend*
-       rm -rf $(D)/usr/$(LIBDIR)/libxc* $(D)/usr/$(LIBDIR)/libxutil*
-       rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/include/xen
-       rm -rf $(D)/usr/$(LIBDIR)/share/xen $(D)/usr/$(LIBDIR)/libxenstore*
+       rm -rf $(D)/etc/hotplug/xen-backend.agent
        rm -rf $(D)/var/run/xen* $(D)/var/lib/xen*
-       rm -rf $(D)/usr/include/xcs_proto.h $(D)/usr/include/xc.h
-       rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
-       rm -rf $(D)/usr/sbin/xcs $(D)/usr/sbin/xcsdump $(D)/usr/sbin/xen*
-       rm -rf $(D)/usr/sbin/netfix
-       rm -rf $(D)/usr/sbin/xfrd $(D)/usr/sbin/xm
-       rm -rf $(D)/usr/share/doc/xen  $(D)/usr/man/man*/xentrace*
-       rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm
        rm -rf $(D)/boot/*xen*
        rm -rf $(D)/lib/modules/*xen*
+       rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/lomount
        rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen
        rm -rf $(D)/usr/bin/xc_shadow
-       rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen
+       rm -rf $(D)/usr/include/xenctrl.h
+       rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
+       rm -rf $(D)/usr/include/xen
+       rm -rf $(D)/usr/$(LIBDIR)/libxenctrl* $(D)/usr/$(LIBDIR)/libxenguest*
+       rm -rf $(D)/usr/$(LIBDIR)/libxenstore*
+       rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/$(LIBDIR)/xen 
+       rm -rf $(D)/usr/libexec/xen
+       rm -rf $(D)/usr/sbin/xen* $(D)/usr/sbin/netfix $(D)/usr/sbin/xm
+       rm -rf $(D)/usr/share/doc/xen
+       rm -rf $(D)/usr/share/xen
        rm -rf $(D)/usr/share/man/man1/xen*
        rm -rf $(D)/usr/share/man/man8/xen*
-       rm -rf $(D)/usr/lib/xen
-       rm -rf $(D)/etc/hotplug.d/xen-backend
-       rm -rf $(D)/etc/hotplug/xen-backend.agent
 
 # Legacy targets for compatibility
 linux24:
diff -r 97dbd9524a7e -r 06d84bf87159 docs/Makefile
--- a/docs/Makefile     Thu Sep 22 17:34:14 2005
+++ b/docs/Makefile     Thu Sep 22 17:42:01 2005
@@ -12,7 +12,7 @@
 
 pkgdocdir      := /usr/share/doc/xen
 
-DOC_TEX                := $(wildcard src/*.tex)
+DOC_TEX                := src/user.tex src/interface.tex
 DOC_PS         := $(patsubst src/%.tex,ps/%.ps,$(DOC_TEX))
 DOC_PDF                := $(patsubst src/%.tex,pdf/%.pdf,$(DOC_TEX))
 DOC_HTML       := $(patsubst src/%.tex,html/%/index.html,$(DOC_TEX))
@@ -36,11 +36,12 @@
        $(MAKE) $(DOC_HTML); fi
 
 python-dev-docs:
-       mkdir -p api/tools/python
+       @mkdir -v -p api/tools/python
        @if which $(DOXYGEN) 1>/dev/null 2>/dev/null; then         \
         echo "Running doxygen to generate Python tools APIs ... "; \
        $(DOXYGEN) Doxyfile;                                       \
-       $(MAKE) -C api/tools/python/latex ; fi
+       $(MAKE) -C api/tools/python/latex ; else                   \
+        echo "Doxygen not installed; skipping python-dev-docs."; fi
 
 clean:
        rm -rf .word_count *.aux *.dvi *.bbl *.blg *.glo *.idx *~ 
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface.tex
--- a/docs/src/interface.tex    Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface.tex    Thu Sep 22 17:42:01 2005
@@ -87,1084 +87,23 @@
 mechanism and policy within the system.
 
 
+%% chapter Virtual Architecture moved to architecture.tex
+\include{src/interface/architecture}
 
-\chapter{Virtual Architecture}
+%% chapter Memory moved to memory.tex
+\include{src/interface/memory}
 
-On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
-has full access to the physical memory available in the system and is
-responsible for allocating portions of it to the domains.  Guest
-operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
-they see fit. Segmentation is used to prevent the guest OS from
-accessing the portion of the address space that is reserved for
-Xen. We expect most guest operating systems will use ring 1 for their
-own operation and place applications in ring 3.
+%% chapter Devices moved to devices.tex
+\include{src/interface/devices}
 
-In this chapter we consider the basic virtual architecture provided 
-by Xen: the basic CPU state, exception and interrupt handling, and
-time. Other aspects such as memory and device access are discussed 
-in later chapters. 
-
-\section{CPU state}
-
-All privileged state must be handled by Xen.  The guest OS has no
-direct access to CR3 and is not permitted to update privileged bits in
-EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen; 
-these are analogous to system calls but occur from ring 1 to ring 0. 
-
-A list of all hypercalls is given in Appendix~\ref{a:hypercalls}. 
-
-
-
-\section{Exceptions}
-
-A virtual IDT is provided --- a domain can submit a table of trap
-handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
-handlers are identical to native x86 handlers, although the page-fault
-handler is somewhat different.
-
-
-\section{Interrupts and events}
-
-Interrupts are virtualized by mapping them to \emph{events}, which are
-delivered asynchronously to the target domain using a callback
-supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
-these events onto its standard interrupt dispatch mechanisms.  Xen is
-responsible for determining the target domain that will handle each
-physical interrupt source. For more details on the binding of event
-sources to events, see Chapter~\ref{c:devices}. 
-
-
-
-\section{Time}
-
-Guest operating systems need to be aware of the passage of both real
-(or wallclock) time and their own `virtual time' (the time for
-which they have been executing). Furthermore, Xen has a notion of 
-time which is used for scheduling. The following notions of 
-time are provided: 
-
-\begin{description}
-\item[Cycle counter time.]
-
-This provides a fine-grained time reference.  The cycle counter time is
-used to accurately extrapolate the other time references.  On SMP machines
-it is currently assumed that the cycle counter time is synchronized between
-CPUs.  The current x86-based implementation achieves this within inter-CPU
-communication latencies.
-
-\item[System time.]
-
-This is a 64-bit counter which holds the number of nanoseconds that
-have elapsed since system boot.
-
-
-\item[Wall clock time.]
-
-This is the time of day in a Unix-style {\tt struct timeval} (seconds
-and microseconds since 1 January 1970, adjusted by leap seconds).  An
-NTP client hosted by {\it domain 0} can keep this value accurate.  
-
-
-\item[Domain virtual time.]
-
-This progresses at the same pace as system time, but only while a
-domain is executing --- it stops while a domain is de-scheduled.
-Therefore the share of the CPU that a domain receives is indicated by
-the rate at which its virtual time increases.
-
-\end{description}
-
-
-Xen exports timestamps for system time and wall-clock time to guest
-operating systems through a shared page of memory.  Xen also provides
-the cycle counter time at the instant the timestamps were calculated,
-and the CPU frequency in Hertz.  This allows the guest to extrapolate
-system and wall-clock times accurately based on the current cycle
-counter time.
-
-Since all time stamps need to be updated and read \emph{atomically}
-two version numbers are also stored in the shared info page. The 
-first is incremented prior to an update, while the second is only
-incremented afterwards. Thus a guest can be sure that it read a consistent 
-state by checking the two version numbers are equal. 
-
-Xen includes a periodic ticker which sends a timer event to the
-currently executing domain every 10ms.  The Xen scheduler also sends a
-timer event whenever a domain is scheduled; this allows the guest OS
-to adjust for the time that has passed while it has been inactive.  In
-addition, Xen allows each domain to request that they receive a timer
-event sent at a specified system time by using the {\tt
-set\_timer\_op()} hypercall.  Guest OSes may use this timer to
-implement timeout values when they block.
-
-
-
-%% % akw: demoting this to a section -- not sure if there is any point
-%% % though, maybe just remove it.
-
-\section{Xen CPU Scheduling}
-
-Xen offers a uniform API for CPU schedulers.  It is possible to choose
-from a number of schedulers at boot and it should be easy to add more.
-The BVT, Atropos and Round Robin schedulers are part of the normal
-Xen distribution.  BVT provides proportional fair shares of the CPU to
-the running domains.  Atropos can be used to reserve absolute shares
-of the CPU for each domain.  Round-robin is provided as an example of
-Xen's internal scheduler API.
-
-\paragraph*{Note: SMP host support}
-Xen has always supported SMP host systems.  Domains are statically assigned to
-CPUs, either at creation time or when manually pinning to a particular CPU.
-The current schedulers then run locally on each CPU to decide which of the
-assigned domains should be run there. The user-level control software 
-can be used to perform coarse-grain load-balancing between CPUs. 
-
-
-%% More information on the characteristics and use of these schedulers is
-%% available in {\tt Sched-HOWTO.txt}.
-
-
-\section{Privileged operations}
-
-Xen exports an extended interface to privileged domains (viz.\ {\it
-  Domain 0}). This allows such domains to build and boot other domains 
-on the server, and provides control interfaces for managing 
-scheduling, memory, networking, and block devices. 
-
-
-\chapter{Memory}
-\label{c:memory} 
-
-Xen is responsible for managing the allocation of physical memory to
-domains, and for ensuring safe use of the paging and segmentation
-hardware.
-
-
-\section{Memory Allocation}
-
-
-Xen resides within a small fixed portion of physical memory; it also
-reserves the top 64MB of every virtual address space. The remaining
-physical memory is available for allocation to domains at a page
-granularity.  Xen tracks the ownership and use of each page, which
-allows it to enforce secure partitioning between domains.
-
-Each domain has a maximum and current physical memory allocation. 
-A guest OS may run a `balloon driver' to dynamically adjust its 
-current memory allocation up to its limit. 
-
-
-%% XXX SMH: I use machine and physical in the next section (which 
-%% is kinda required for consistency with code); wonder if this 
-%% section should use same terms? 
-%%
-%% Probably. 
-%%
-%% Merging this and below section at some point prob makes sense. 
-
-\section{Pseudo-Physical Memory}
-
-Since physical memory is allocated and freed on a page granularity,
-there is no guarantee that a domain will receive a contiguous stretch
-of physical memory. However most operating systems do not have good
-support for operating in a fragmented physical address space. To aid
-porting such operating systems to run on top of Xen, we make a
-distinction between \emph{machine memory} and \emph{pseudo-physical
-memory}.
-
-Put simply, machine memory refers to the entire amount of memory
-installed in the machine, including that reserved by Xen, in use by
-various domains, or currently unallocated. We consider machine memory
-to comprise a set of 4K \emph{machine page frames} numbered
-consecutively starting from 0. Machine frame numbers mean the same
-within Xen or any domain.
-
-Pseudo-physical memory, on the other hand, is a per-domain
-abstraction. It allows a guest operating system to consider its memory
-allocation to consist of a contiguous range of physical page frames
-starting at physical frame 0, despite the fact that the underlying
-machine page frames may be sparsely allocated and in any order.
-
-To achieve this, Xen maintains a globally readable {\it
-machine-to-physical} table which records the mapping from machine page
-frames to pseudo-physical ones. In addition, each domain is supplied
-with a {\it physical-to-machine} table which performs the inverse
-mapping. Clearly the machine-to-physical table has size proportional
-to the amount of RAM installed in the machine, while each
-physical-to-machine table has size proportional to the memory
-allocation of the given domain.
-
-Architecture dependent code in guest operating systems can then use
-the two tables to provide the abstraction of pseudo-physical
-memory. In general, only certain specialized parts of the operating
-system (such as page table management) needs to understand the
-difference between machine and pseudo-physical addresses.
-
-\section{Page Table Updates}
-
-In the default mode of operation, Xen enforces read-only access to
-page tables and requires guest operating systems to explicitly request
-any modifications.  Xen validates all such requests and only applies
-updates that it deems safe.  This is necessary to prevent domains from
-adding arbitrary mappings to their page tables.
-
-To aid validation, Xen associates a type and reference count with each
-memory page. A page has one of the following
-mutually-exclusive types at any point in time: page directory ({\sf
-PD}), page table ({\sf PT}), local descriptor table ({\sf LDT}),
-global descriptor table ({\sf GDT}), or writable ({\sf RW}). Note that
-a guest OS may always create readable mappings of its own memory 
-regardless of its current type. 
-%%% XXX: possibly explain more about ref count 'lifecyle' here?
-This mechanism is used to
-maintain the invariants required for safety; for example, a domain
-cannot have a writable mapping to any part of a page table as this
-would require the page concerned to simultaneously be of types {\sf
-  PT} and {\sf RW}.
-
-
-%\section{Writable Page Tables}
-
-Xen also provides an alternative mode of operation in which guests be
-have the illusion that their page tables are directly writable.  Of
-course this is not really the case, since Xen must still validate
-modifications to ensure secure partitioning. To this end, Xen traps
-any write attempt to a memory page of type {\sf PT} (i.e., that is
-currently part of a page table).  If such an access occurs, Xen
-temporarily allows write access to that page while at the same time
-{\em disconnecting} it from the page table that is currently in
-use. This allows the guest to safely make updates to the page because
-the newly-updated entries cannot be used by the MMU until Xen
-revalidates and reconnects the page.
-Reconnection occurs automatically in a number of situations: for
-example, when the guest modifies a different page-table page, when the
-domain is preempted, or whenever the guest uses Xen's explicit
-page-table update interfaces.
-
-Finally, Xen also supports a form of \emph{shadow page tables} in
-which the guest OS uses a independent copy of page tables which are
-unknown to the hardware (i.e.\ which are never pointed to by {\tt
-cr3}). Instead Xen propagates changes made to the guest's tables to the
-real ones, and vice versa. This is useful for logging page writes
-(e.g.\ for live migration or checkpoint). A full version of the shadow
-page tables also allows guest OS porting with less effort.
-
-\section{Segment Descriptor Tables}
-
-On boot a guest is supplied with a default GDT, which does not reside
-within its own memory allocation.  If the guest wishes to use other
-than the default `flat' ring-1 and ring-3 segments that this GDT
-provides, it must register a custom GDT and/or LDT with Xen,
-allocated from its own memory. Note that a number of GDT 
-entries are reserved by Xen -- any custom GDT must also include
-sufficient space for these entries. 
-
-For example, the following hypercall is used to specify a new GDT: 
-
-\begin{quote}
-int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em entries})
-
-{\em frame\_list}: An array of up to 16 machine page frames within
-which the GDT resides.  Any frame registered as a GDT frame may only
-be mapped read-only within the guest's address space (e.g., no
-writable mappings, no use as a page-table page, and so on).
-
-{\em entries}: The number of descriptor-entry slots in the GDT.  Note
-that the table must be large enough to contain Xen's reserved entries;
-thus we must have `{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}\ '.
-Note also that, after registering the GDT, slots {\em FIRST\_} through
-{\em LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest and
-may be overwritten by Xen.
-\end{quote}
-
-The LDT is updated via the generic MMU update mechanism (i.e., via 
-the {\tt mmu\_update()} hypercall. 
-
-\section{Start of Day} 
-
-The start-of-day environment for guest operating systems is rather
-different to that provided by the underlying hardware. In particular,
-the processor is already executing in protected mode with paging
-enabled.
-
-{\it Domain 0} is created and booted by Xen itself. For all subsequent
-domains, the analogue of the boot-loader is the {\it domain builder},
-user-space software running in {\it domain 0}. The domain builder 
-is responsible for building the initial page tables for a domain  
-and loading its kernel image at the appropriate virtual address. 
-
-
-
-\chapter{Devices}
-\label{c:devices}
-
-Devices such as network and disk are exported to guests using a
-split device driver.  The device driver domain, which accesses the
-physical device directly also runs a {\em backend} driver, serving
-requests to that device from guests.  Each guest will use a simple
-{\em frontend} driver, to access the backend.  Communication between these
-domains is composed of two parts:  First, data is placed onto a shared
-memory page between the domains.  Second, an event channel between the
-two domains is used to pass notification that data is outstanding.
-This separation of notification from data transfer allows message
-batching, and results in very efficient device access.  
-
-Event channels are used extensively in device virtualization; each
-domain has a number of end-points or \emph{ports} each of which
-may be bound to one of the following \emph{event sources}:
-\begin{itemize} 
-  \item a physical interrupt from a real device, 
-  \item a virtual interrupt (callback) from Xen, or 
-  \item a signal from another domain 
-\end{itemize}
-
-Events are lightweight and do not carry much information beyond 
-the source of the notification. Hence when performing bulk data
-transfer, events are typically used as synchronization primitives
-over a shared memory transport. Event channels are managed via 
-the {\tt event\_channel\_op()} hypercall; for more details see
-Section~\ref{s:idc}. 
-
-This chapter focuses on some individual device interfaces
-available to Xen guests. 
-
-\section{Network I/O}
-
-Virtual network device services are provided by shared memory
-communication with a backend domain.  From the point of view of
-other domains, the backend may be viewed as a virtual ethernet switch
-element with each domain having one or more virtual network interfaces
-connected to it.
-
-\subsection{Backend Packet Handling}
-
-The backend driver is responsible for a variety of actions relating to
-the transmission and reception of packets from the physical device.
-With regard to transmission, the backend performs these key actions:
-
-\begin{itemize}
-\item {\bf Validation:} To ensure that domains do not attempt to
-  generate invalid (e.g. spoofed) traffic, the backend driver may
-  validate headers ensuring that source MAC and IP addresses match the
-  interface that they have been sent from.
-
-  Validation functions can be configured using standard firewall rules
-  ({\small{\tt iptables}} in the case of Linux).
-  
-\item {\bf Scheduling:} Since a number of domains can share a single
-  physical network interface, the backend must mediate access when
-  several domains each have packets queued for transmission.  This
-  general scheduling function subsumes basic shaping or rate-limiting
-  schemes.
-  
-\item {\bf Logging and Accounting:} The backend domain can be
-  configured with classifier rules that control how packets are
-  accounted or logged.  For example, log messages might be generated
-  whenever a domain attempts to send a TCP packet containing a SYN.
-\end{itemize}
-
-On receipt of incoming packets, the backend acts as a simple
-demultiplexer:  Packets are passed to the appropriate virtual
-interface after any necessary logging and accounting have been carried
-out.
-
-\subsection{Data Transfer}
-
-Each virtual interface uses two ``descriptor rings'', one for transmit,
-the other for receive.  Each descriptor identifies a block of contiguous
-physical memory allocated to the domain.  
-
-The transmit ring carries packets to transmit from the guest to the
-backend domain.  The return path of the transmit ring carries messages
-indicating that the contents have been physically transmitted and the
-backend no longer requires the associated pages of memory.
-
-To receive packets, the guest places descriptors of unused pages on
-the receive ring.  The backend will return received packets by
-exchanging these pages in the domain's memory with new pages
-containing the received data, and passing back descriptors regarding
-the new packets on the ring.  This zero-copy approach allows the
-backend to maintain a pool of free pages to receive packets into, and
-then deliver them to appropriate domains after examining their
-headers.
-
-%
-%Real physical addresses are used throughout, with the domain performing 
-%translation from pseudo-physical addresses if that is necessary.
-
-If a domain does not keep its receive ring stocked with empty buffers then 
-packets destined to it may be dropped.  This provides some defence against 
-receive livelock problems because an overload domain will cease to receive
-further data.  Similarly, on the transmit path, it provides the application
-with feedback on the rate at which packets are able to leave the system.
-
-
-Flow control on rings is achieved by including a pair of producer
-indexes on the shared ring page.  Each side will maintain a private
-consumer index indicating the next outstanding message.  In this
-manner, the domains cooperate to divide the ring into two message
-lists, one in each direction.  Notification is decoupled from the
-immediate placement of new messages on the ring; the event channel
-will be used to generate notification when {\em either} a certain
-number of outstanding messages are queued, {\em or} a specified number
-of nanoseconds have elapsed since the oldest message was placed on the
-ring.
-
-% Not sure if my version is any better -- here is what was here before:
-%% Synchronization between the backend domain and the guest is achieved using 
-%% counters held in shared memory that is accessible to both.  Each ring has
-%% associated producer and consumer indices indicating the area in the ring
-%% that holds descriptors that contain data.  After receiving {\it n} packets
-%% or {\t nanoseconds} after receiving the first packet, the hypervisor sends
-%% an event to the domain. 
-
-\section{Block I/O}
-
-All guest OS disk access goes through the virtual block device VBD
-interface.  This interface allows domains access to portions of block
-storage devices visible to the the block backend device.  The VBD
-interface is a split driver, similar to the network interface
-described above.  A single shared memory ring is used between the
-frontend and backend drivers, across which read and write messages are
-sent.
-
-Any block device accessible to the backend domain, including
-network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
-can be exported as a VBD.  Each VBD is mapped to a device node in the
-guest, specified in the guest's startup configuration.
-
-Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
-similar functionality can be achieved using the more complete LVM
-system, which is already in widespread use.
-
-\subsection{Data Transfer}
-
-The single ring between the guest and the block backend supports three
-messages:
-
-\begin{description}
-\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to this guest
-  from the backend.  The request includes a descriptor of a free page
-  into which the reply will be written by the backend.
-
-\item [{\small {\tt READ}}:] Read data from the specified block device.  The
-  front end identifies the device and location to read from and
-  attaches pages for the data to be copied to (typically via DMA from
-  the device).  The backend acknowledges completed read requests as
-  they finish.
-
-\item [{\small {\tt WRITE}}:] Write data to the specified block device.  This
-  functions essentially as {\small {\tt READ}}, except that the data moves to
-  the device instead of from it.
-\end{description}
-
-% um... some old text
-%% In overview, the same style of descriptor-ring that is used for
-%% network packets is used here.  Each domain has one ring that carries
-%% operation requests to the hypervisor and carries the results back
-%% again.
-
-%% Rather than copying data, the backend simply maps the domain's buffers
-%% in order to enable direct DMA to them.  The act of mapping the buffers
-%% also increases the reference counts of the underlying pages, so that
-%% the unprivileged domain cannot try to return them to the hypervisor,
-%% install them as page tables, or any other unsafe behaviour.
-%% %block API here 
-
-
-\chapter{Further Information} 
-
-
-If you have questions that are not answered by this manual, the
-sources of information listed below may be of interest to you.  Note
-that bug reports, suggestions and contributions related to the
-software (or the documentation) should be sent to the Xen developers'
-mailing list (address below).
-
-\section{Other documentation}
-
-If you are mainly interested in using (rather than developing for)
-Xen, the {\em Xen Users' Manual} is distributed in the {\tt docs/}
-directory of the Xen source distribution.  
-
-% Various HOWTOs are also available in {\tt docs/HOWTOS}.
-
-\section{Online references}
-
-The official Xen web site is found at:
-\begin{quote}
-{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
-\end{quote}
-
-This contains links to the latest versions of all on-line 
-documentation. 
-
-\section{Mailing lists}
-
-There are currently four official Xen mailing lists:
-
-\begin{description}
-\item[xen-devel@xxxxxxxxxxxxxxxxxxx] Used for development
-discussions and bug reports.  Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-devel}}
-\item[xen-users@xxxxxxxxxxxxxxxxxxx] Used for installation and usage
-discussions and requests for help.  Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-users}}
-\item[xen-announce@xxxxxxxxxxxxxxxxxxx] Used for announcements only.
-Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-announce}}
-\item[xen-changelog@xxxxxxxxxxxxxxxxxxx]  Changelog feed
-from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
-{\small {\tt http://lists.xensource.com/xen-changelog}}
-\end{description}
-
-Of these, xen-devel is the most active.
-
-
+%% chapter Further Information moved to further_info.tex
+\include{src/interface/further_info}
 
 
 \appendix
 
-%\newcommand{\hypercall}[1]{\vspace{5mm}{\large\sf #1}}
-
-
-
-
-
-\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
-
-
-
-
-
-
-\chapter{Xen Hypercalls}
-\label{a:hypercalls}
-
-Hypercalls represent the procedural interface to Xen; this appendix 
-categorizes and describes the current set of hypercalls. 
-
-\section{Invoking Hypercalls} 
-
-Hypercalls are invoked in a manner analogous to system calls in a
-conventional operating system; a software interrupt is issued which
-vectors to an entry point within Xen. On x86\_32 machines the
-instruction required is {\tt int \$82}; the (real) IDT is setup so
-that this may only be issued from within ring 1. The particular 
-hypercall to be invoked is contained in {\tt EAX} --- a list 
-mapping these values to symbolic hypercall names can be found 
-in {\tt xen/include/public/xen.h}. 
-
-On some occasions a set of hypercalls will be required to carry
-out a higher-level function; a good example is when a guest 
-operating wishes to context switch to a new process which 
-requires updating various privileged CPU state. As an optimization
-for these cases, there is a generic mechanism to issue a set of 
-hypercalls as a batch: 
-
-\begin{quote}
-\hypercall{multicall(void *call\_list, int nr\_calls)}
-
-Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
-the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
-call\_list}. Each entry contains the hypercall operation code followed
-by up to 7 word-sized arguments.
-\end{quote}
-
-Note that multicalls are provided purely as an optimization; there is
-no requirement to use them when first porting a guest operating
-system.
-
-
-\section{Virtual CPU Setup} 
-
-At start of day, a guest operating system needs to setup the virtual
-CPU it is executing on. This includes installing vectors for the
-virtual IDT so that the guest OS can handle interrupts, page faults,
-etc. However the very first thing a guest OS must setup is a pair 
-of hypervisor callbacks: these are the entry points which Xen will
-use when it wishes to notify the guest OS of an occurrence. 
-
-\begin{quote}
-\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
-  event\_address, unsigned long failsafe\_selector, unsigned long
-  failsafe\_address) }
-
-Register the normal (``event'') and failsafe callbacks for 
-event processing. In each case the code segment selector and 
-address within that segment are provided. The selectors must
-have RPL 1; in XenLinux we simply use the kernel's CS for both 
-{\tt event\_selector} and {\tt failsafe\_selector}.
-
-The value {\tt event\_address} specifies the address of the guest OSes
-event handling and dispatch routine; the {\tt failsafe\_address}
-specifies a separate entry point which is used only if a fault occurs
-when Xen attempts to use the normal callback. 
-\end{quote} 
-
-
-After installing the hypervisor callbacks, the guest OS can 
-install a `virtual IDT' by using the following hypercall: 
-
-\begin{quote} 
-\hypercall{set\_trap\_table(trap\_info\_t *table)} 
-
-Install one or more entries into the per-domain 
-trap handler table (essentially a software version of the IDT). 
-Each entry in the array pointed to by {\tt table} includes the 
-exception vector number with the corresponding segment selector 
-and entry point. Most guest OSes can use the same handlers on 
-Xen as when running on the real hardware; an exception is the 
-page fault handler (exception vector 14) where a modified 
-stack-frame layout is used. 
-
-
-\end{quote} 
-
-
-
-\section{Scheduling and Timer}
-
-Domains are preemptively scheduled by Xen according to the 
-parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
-In addition, however, a domain may choose to explicitly 
-control certain behavior with the following hypercall: 
-
-\begin{quote} 
-\hypercall{sched\_op(unsigned long op)} 
-
-Request scheduling operation from hypervisor. The options are: {\it
-yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
-calling domain runnable but may cause a reschedule if other domains
-are runnable.  {\it block} removes the calling domain from the run
-queue and cause is to sleeps until an event is delivered to it.  {\it
-shutdown} is used to end the domain's execution; the caller can
-additionally specify whether the domain should reboot, halt or
-suspend.
-\end{quote} 
-
-To aid the implementation of a process scheduler within a guest OS,
-Xen provides a virtual programmable timer:
-
-\begin{quote}
-\hypercall{set\_timer\_op(uint64\_t timeout)} 
-
-Request a timer event to be sent at the specified system time (time 
-in nanoseconds since system boot). The hypercall actually passes the 
-64-bit timeout value as a pair of 32-bit values. 
-
-\end{quote} 
-
-Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
-allows block-with-timeout semantics. 
-
-
-\section{Page Table Management} 
-
-Since guest operating systems have read-only access to their page 
-tables, Xen must be involved when making any changes. The following
-multi-purpose hypercall can be used to modify page-table entries, 
-update the machine-to-physical mapping table, flush the TLB, install 
-a new page-table base pointer, and more.
-
-\begin{quote} 
-\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
-
-Update the page table for the domain; a set of {\tt count} updates are
-submitted for processing in a batch, with {\tt success\_count} being 
-updated to report the number of successful updates.  
-
-Each element of {\tt req[]} contains a pointer (address) and value; 
-the least significant 2-bits of the pointer are used to distinguish 
-the type of update requested as follows:
-\begin{description} 
-
-\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
-page table entry to the associated value; Xen will check that the
-update is safe, as described in Chapter~\ref{c:memory}.
-
-\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
-  machine-to-physical table. The calling domain must own the machine
-  page in question (or be privileged).
-
-\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
-The set of additional MMU operations is considerable, and includes
-updating {\tt cr3} (or just re-installing it for a TLB flush),
-flushing the cache, installing a new LDT, or pinning \& unpinning
-page-table pages (to ensure their reference count doesn't drop to zero
-which would require a revalidation of all entries).
-
-Further extended commands are used to deal with granting and 
-acquiring page ownership; see Section~\ref{s:idc}. 
-
-
-\end{description}
-
-More details on the precise format of all commands can be 
-found in {\tt xen/include/public/xen.h}. 
-
-
-\end{quote}
-
-Explicitly updating batches of page table entries is extremely
-efficient, but can require a number of alterations to the guest
-OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
-recommended for new OS ports.
-
-Regardless of which page table update mode is being used, however,
-there are some occasions (notably handling a demand page fault) where
-a guest OS will wish to modify exactly one PTE rather than a
-batch. This is catered for by the following:
-
-\begin{quote} 
-\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
-val, \\ unsigned long flags)}
-
-Update the currently installed PTE for the page {\tt page\_nr} to 
-{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
-is safe before applying it. The {\tt flags} determine which kind
-of TLB flush, if any, should follow the update. 
-
-\end{quote} 
-
-Finally, sufficiently privileged domains may occasionally wish to manipulate 
-the pages of others: 
-\begin{quote}
-
-\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
-unsigned long val, unsigned long flags, uint16\_t domid)}
-
-Identical to {\tt update\_va\_mapping()} save that the pages being
-mapped must belong to the domain {\tt domid}. 
-
-\end{quote}
-
-This privileged operation is currently used by backend virtual device
-drivers to safely map pages containing I/O data. 
-
-
-
-\section{Segmentation Support}
-
-Xen allows guest OSes to install a custom GDT if they require it; 
-this is context switched transparently whenever a domain is 
-[de]scheduled.  The following hypercall is effectively a 
-`safe' version of {\tt lgdt}: 
-
-\begin{quote}
-\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
-
-Install a global descriptor table for a domain; {\tt frame\_list} is
-an array of up to 16 machine page frames within which the GDT resides,
-with {\tt entries} being the actual number of descriptor-entry
-slots. All page frames must be mapped read-only within the guest's
-address space, and the table must be large enough to contain Xen's
-reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
-
-\end{quote}
-
-Many guest OSes will also wish to install LDTs; this is achieved by
-using {\tt mmu\_update()} with an extended command, passing the
-linear address of the LDT base along with the number of entries. No
-special safety checks are required; Xen needs to perform this task
-simply since {\tt lldt} requires CPL 0.
-
-
-Xen also allows guest operating systems to update just an 
-individual segment descriptor in the GDT or LDT:  
-
-\begin{quote}
-\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
-unsigned long word2)}
-
-Update the GDT/LDT entry at machine address {\tt ma}; the new
-8-byte descriptor is stored in {\tt word1} and {\tt word2}.
-Xen performs a number of checks to ensure the descriptor is 
-valid. 
-
-\end{quote}
-
-Guest OSes can use the above in place of context switching entire 
-LDTs (or the GDT) when the number of changing descriptors is small. 
-
-\section{Context Switching} 
-
-When a guest OS wishes to context switch between two processes, 
-it can use the page table and segmentation hypercalls described
-above to perform the the bulk of the privileged work. In addition, 
-however, it will need to invoke Xen to switch the kernel (ring 1) 
-stack pointer: 
-
-\begin{quote} 
-\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
-
-Request kernel stack switch from hypervisor; {\tt ss} is the new 
-stack segment, which {\tt esp} is the new stack pointer. 
-
-\end{quote} 
-
-A final useful hypercall for context switching allows ``lazy'' 
-save and restore of floating point state: 
-
-\begin{quote}
-\hypercall{fpu\_taskswitch(void)} 
-
-This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
-control register; this means that the next attempt to use floating
-point will cause a trap which the guest OS can trap. Typically it will
-then save/restore the FP state, and clear the {\tt TS} bit. 
-\end{quote} 
-
-This is provided as an optimization only; guest OSes can also choose
-to save and restore FP state on all context switches for simplicity. 
-
-
-\section{Physical Memory Management}
-
-As mentioned previously, each domain has a maximum and current 
-memory allocation. The maximum allocation, set at domain creation 
-time, cannot be modified. However a domain can choose to reduce 
-and subsequently grow its current allocation by using the
-following call: 
-
-\begin{quote} 
-\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
-  unsigned long nr\_extents, unsigned int extent\_order)}
-
-Increase or decrease current memory allocation (as determined by 
-the value of {\tt op}). Each invocation provides a list of 
-extents each of which is $2^s$ pages in size, 
-where $s$ is the value of {\tt extent\_order}. 
-
-\end{quote} 
-
-In addition to simply reducing or increasing the current memory
-allocation via a `balloon driver', this call is also useful for 
-obtaining contiguous regions of machine memory when required (e.g. 
-for certain PCI devices, or if using superpages).  
-
-
-\section{Inter-Domain Communication}
-\label{s:idc} 
-
-Xen provides a simple asynchronous notification mechanism via
-\emph{event channels}. Each domain has a set of end-points (or
-\emph{ports}) which may be bound to an event source (e.g. a physical
-IRQ, a virtual IRQ, or an port in another domain). When a pair of
-end-points in two different domains are bound together, then a `send'
-operation on one will cause an event to be received by the destination
-domain.
-
-The control and use of event channels involves the following hypercall: 
-
-\begin{quote}
-\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
-
-Inter-domain event-channel management; {\tt op} is a discriminated 
-union which allows the following 7 operations: 
-
-\begin{description} 
-
-\item[\it alloc\_unbound:] allocate a free (unbound) local
-  port and prepare for connection from a specified domain. 
-\item[\it bind\_virq:] bind a local port to a virtual 
-IRQ; any particular VIRQ can be bound to at most one port per domain. 
-\item[\it bind\_pirq:] bind a local port to a physical IRQ;
-once more, a given pIRQ can be bound to at most one port per
-domain. Furthermore the calling domain must be sufficiently
-privileged.
-\item[\it bind\_interdomain:] construct an interdomain event 
-channel; in general, the target domain must have previously allocated 
-an unbound port for this channel, although this can be bypassed by 
-privileged domains during domain setup. 
-\item[\it close:] close an interdomain event channel. 
-\item[\it send:] send an event to the remote end of a 
-interdomain event channel. 
-\item[\it status:] determine the current status of a local port. 
-\end{description} 
-
-For more details see
-{\tt xen/include/public/event\_channel.h}. 
-
-\end{quote} 
-
-Event channels are the fundamental communication primitive between 
-Xen domains and seamlessly support SMP. However they provide little
-bandwidth for communication {\sl per se}, and hence are typically 
-married with a piece of shared memory to produce effective and 
-high-performance inter-domain communication. 
-
-Safe sharing of memory pages between guest OSes is carried out by
-granting access on a per page basis to individual domains. This is
-achieved by using the {\tt grant\_table\_op()} hypercall.
-
-\begin{quote}
-\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
-
-Grant or remove access to a particular page to a particular domain. 
-
-\end{quote} 
-
-This is not currently widely in use by guest operating systems, but 
-we intend to integrate support more fully in the near future. 
-
-\section{PCI Configuration} 
-
-Domains with physical device access (i.e.\ driver domains) receive
-limited access to certain PCI devices (bus address space and
-interrupts). However many guest operating systems attempt to 
-determine the PCI configuration by directly access the PCI BIOS, 
-which cannot be allowed for safety. 
-
-Instead, Xen provides the following hypercall: 
-
-\begin{quote}
-\hypercall{physdev\_op(void *physdev\_op)}
-
-Perform a PCI configuration option; depending on the value 
-of {\tt physdev\_op} this can be a PCI config read, a PCI config 
-write, or a small number of other queries. 
-
-\end{quote} 
-
-
-For examples of using {\tt physdev\_op()}, see the 
-Xen-specific PCI code in the linux sparse tree. 
-
-\section{Administrative Operations}
-\label{s:dom0ops}
-
-A large number of control operations are available to a sufficiently
-privileged domain (typically domain 0). These allow the creation and
-management of new domains, for example. A complete list is given 
-below: for more details on any or all of these, please see 
-{\tt xen/include/public/dom0\_ops.h} 
-
-
-\begin{quote}
-\hypercall{dom0\_op(dom0\_op\_t *op)} 
-
-Administrative domain operations for domain management. The options are:
-
-\begin{description} 
-\item [\it DOM0\_CREATEDOMAIN:] create a new domain
-
-\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
-queue. 
-
-\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
-  once again. 
-
-\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
-with a domain
-
-\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
-
-\item [\it DOM0\_SCHEDCTL:]
-
-\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
-
-\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
-
-\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
-
-\item [\it DOM0\_GETPAGEFRAMEINFO:] 
-
-\item [\it DOM0\_GETPAGEFRAMEINFO2:]
-
-\item [\it DOM0\_IOPL:] set I/O privilege level
-
-\item [\it DOM0\_MSR:] read or write model specific registers
-
-\item [\it DOM0\_DEBUG:] interactively invoke the debugger
-
-\item [\it DOM0\_SETTIME:] set system time
-
-\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
-
-\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
-
-\item [\it DOM0\_GETTBUFS:] get information about the size and location of
-                      the trace buffers (only on trace-buffer enabled builds)
-
-\item [\it DOM0\_PHYSINFO:] get information about the host machine
-
-\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
-
-\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
-
-\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
-
-\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a 
domain
-
-\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
-
-\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
-\end{description} 
-\end{quote} 
-
-Most of the above are best understood by looking at the code 
-implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
-the user-space tools that use them (mostly in {\tt tools/libxc}). 
-
-\section{Debugging Hypercalls} 
-
-A few additional hypercalls are mainly useful for debugging: 
-
-\begin{quote} 
-\hypercall{console\_io(int cmd, int count, char *str)}
-
-Use Xen to interact with the console; operations are:
-
-{\it CONSOLEIO\_write}: Output count characters from buffer str.
-
-{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
-\end{quote} 
-
-A pair of hypercalls allows access to the underlying debug registers: 
-\begin{quote}
-\hypercall{set\_debugreg(int reg, unsigned long value)}
-
-Set debug register {\tt reg} to {\tt value} 
-
-\hypercall{get\_debugreg(int reg)}
-
-Return the contents of the debug register {\tt reg}
-\end{quote}
-
-And finally: 
-\begin{quote}
-\hypercall{xen\_version(int cmd)}
-
-Request Xen version number.
-\end{quote} 
-
-This is useful to ensure that user-space tools are in sync 
-with the underlying hypervisor. 
-
-\section{Deprecated Hypercalls}
-
-Xen is under constant development and refinement; as such there 
-are plans to improve the way in which various pieces of functionality 
-are exposed to guest OSes. 
-
-\begin{quote} 
-\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
-
-Toggle various memory management modes (in particular wrritable page
-tables and superpage support). 
-
-\end{quote} 
-
-This is likely to be replaced with mode values in the shared 
-information page since this is more resilient for resumption 
-after migration or checkpoint. 
-
-
-
-
-
-
+%% chapter hypercalls moved to hypercalls.tex
+\include{src/interface/hypercalls}
 
 
 %% 
@@ -1173,279 +112,9 @@
 %% new scheduler... not clear how many of them there are...
 %%
 
-\begin{comment}
-
-\chapter{Scheduling API}  
-
-The scheduling API is used by both the schedulers described above and should
-also be used by any new schedulers.  It provides a generic interface and also
-implements much of the ``boilerplate'' code.
-
-Schedulers conforming to this API are described by the following
-structure:
-
-\begin{verbatim}
-struct scheduler
-{
-    char *name;             /* full name for this scheduler      */
-    char *opt_name;         /* option name for this scheduler    */
-    unsigned int sched_id;  /* ID for this scheduler             */
-
-    int          (*init_scheduler) ();
-    int          (*alloc_task)     (struct task_struct *);
-    void         (*add_task)       (struct task_struct *);
-    void         (*free_task)      (struct task_struct *);
-    void         (*rem_task)       (struct task_struct *);
-    void         (*wake_up)        (struct task_struct *);
-    void         (*do_block)       (struct task_struct *);
-    task_slice_t (*do_schedule)    (s_time_t);
-    int          (*control)        (struct sched_ctl_cmd *);
-    int          (*adjdom)         (struct task_struct *,
-                                    struct sched_adjdom_cmd *);
-    s32          (*reschedule)     (struct task_struct *);
-    void         (*dump_settings)  (void);
-    void         (*dump_cpu_state) (int);
-    void         (*dump_runq_el)   (struct task_struct *);
-};
-\end{verbatim}
-
-The only method that {\em must} be implemented is
-{\tt do\_schedule()}.  However, if there is not some implementation for the
-{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
-
-The fields of the above structure are described in more detail below.
-
-\subsubsection{name}
-
-The name field should point to a descriptive ASCII string.
-
-\subsubsection{opt\_name}
-
-This field is the value of the {\tt sched=} boot-time option that will select
-this scheduler.
-
-\subsubsection{sched\_id}
-
-This is an integer that uniquely identifies this scheduler.  There should be a
-macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
-
-\subsubsection{init\_scheduler}
-
-\paragraph*{Purpose}
-
-This is a function for performing any scheduler-specific initialisation.  For
-instance, it might allocate memory for per-CPU scheduler data and initialise it
-appropriately.
-
-\paragraph*{Call environment}
-
-This function is called after the initialisation performed by the generic
-layer.  The function is called exactly once, for the scheduler that has been
-selected.
-
-\paragraph*{Return values}
-
-This should return negative on failure --- this will cause an
-immediate panic and the system will fail to boot.
-
-\subsubsection{alloc\_task}
-
-\paragraph*{Purpose}
-Called when a {\tt task\_struct} is allocated by the generic scheduler
-layer.  A particular scheduler implementation may use this method to
-allocate per-task data for this task.  It may use the {\tt
-sched\_priv} pointer in the {\tt task\_struct} to point to this data.
-
-\paragraph*{Call environment}
-The generic layer guarantees that the {\tt sched\_priv} field will
-remain intact from the time this method is called until the task is
-deallocated (so long as the scheduler implementation does not change
-it explicitly!).
-
-\paragraph*{Return values}
-Negative on failure.
-
-\subsubsection{add\_task}
-
-\paragraph*{Purpose}
-
-Called when a task is initially added by the generic layer.
-
-\paragraph*{Call environment}
-
-The fields in the {\tt task\_struct} are now filled out and available for use.
-Schedulers should implement appropriate initialisation of any per-task private
-information in this method.
-
-\subsubsection{free\_task}
-
-\paragraph*{Purpose}
-
-Schedulers should free the space used by any associated private data
-structures.
-
-\paragraph*{Call environment}
-
-This is called when a {\tt task\_struct} is about to be deallocated.
-The generic layer will have done generic task removal operations and
-(if implemented) called the scheduler's {\tt rem\_task} method before
-this method is called.
-
-\subsubsection{rem\_task}
-
-\paragraph*{Purpose}
-
-This is called when a task is being removed from scheduling (but is
-not yet being freed).
-
-\subsubsection{wake\_up}
-
-\paragraph*{Purpose}
-
-Called when a task is woken up, this method should put the task on the runqueue
-(or do the scheduler-specific equivalent action).
-
-\paragraph*{Call environment}
-
-The task is already set to state RUNNING.
-
-\subsubsection{do\_block}
-
-\paragraph*{Purpose}
-
-This function is called when a task is blocked.  This function should
-not remove the task from the runqueue.
-
-\paragraph*{Call environment}
-
-The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
-TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
-  do\_schedule} method will be made after this method returns, in
-order to select the next task to run.
-
-\subsubsection{do\_schedule}
-
-This method must be implemented.
-
-\paragraph*{Purpose}
-
-The method is called each time a new task must be chosen for scheduling on the
-current CPU.  The current time as passed as the single argument (the current
-task can be found using the {\tt current} macro).
-
-This method should select the next task to run on this CPU and set it's minimum
-time to run as well as returning the data described below.
-
-This method should also take the appropriate action if the previous
-task has blocked, e.g. removing it from the runqueue.
-
-\paragraph*{Call environment}
-
-The other fields in the {\tt task\_struct} are updated by the generic layer,
-which also performs all Xen-specific tasks and performs the actual task switch
-(unless the previous task has been chosen again).
-
-This method is called with the {\tt schedule\_lock} held for the current CPU
-and local interrupts disabled.
-
-\paragraph*{Return values}
-
-Must return a {\tt struct task\_slice} describing what task to run and how long
-for (at maximum).
-
-\subsubsection{control}
-
-\paragraph*{Purpose}
-
-This method is called for global scheduler control operations.  It takes a
-pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
-source data from or populate with data, depending on the value of the
-{\tt direction} field.
-
-\paragraph*{Call environment}
-
-The generic layer guarantees that when this method is called, the
-caller selected the correct scheduler ID, hence the scheduler's
-implementation does not need to sanity-check these parts of the call.
-
-\paragraph*{Return values}
-
-This function should return the value to be passed back to user space, hence it
-should either be 0 or an appropriate errno value.
-
-\subsubsection{sched\_adjdom}
-
-\paragraph*{Purpose}
-
-This method is called to adjust the scheduling parameters of a particular
-domain, or to query their current values.  The function should check
-the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
-order to determine which of these operations is being performed.
-
-\paragraph*{Call environment}
-
-The generic layer guarantees that the caller has specified the correct
-control interface version and scheduler ID and that the supplied {\tt
-task\_struct} will not be deallocated during the call (hence it is not
-necessary to {\tt get\_task\_struct}).
-
-\paragraph*{Return values}
-
-This function should return the value to be passed back to user space, hence it
-should either be 0 or an appropriate errno value.
-
-\subsubsection{reschedule}
-
-\paragraph*{Purpose}
-
-This method is called to determine if a reschedule is required as a result of a
-particular task.
-
-\paragraph*{Call environment}
-The generic layer will cause a reschedule if the current domain is the idle
-task or it has exceeded its minimum time slice before a reschedule.  The
-generic layer guarantees that the task passed is not currently running but is
-on the runqueue.
-
-\paragraph*{Return values}
-
-Should return a mask of CPUs to cause a reschedule on.
-
-\subsubsection{dump\_settings}
-
-\paragraph*{Purpose}
-
-If implemented, this should dump any private global settings for this
-scheduler to the console.
-
-\paragraph*{Call environment}
-
-This function is called with interrupts enabled.
-
-\subsubsection{dump\_cpu\_state}
-
-\paragraph*{Purpose}
-
-This method should dump any private settings for the specified CPU.
-
-\paragraph*{Call environment}
-
-This function is called with interrupts disabled and the {\tt schedule\_lock}
-for the specified CPU held.
-
-\subsubsection{dump\_runq\_el}
-
-\paragraph*{Purpose}
-
-This method should dump any private settings for the specified task.
-
-\paragraph*{Call environment}
-
-This function is called with interrupts disabled and the {\tt schedule\_lock}
-for the task's CPU held.
-
-\end{comment} 
-
+%% \include{src/interface/scheduling}
+%% scheduling information moved to scheduling.tex
+%% still commented out
 
 
 
@@ -1457,74 +126,9 @@
 %% (and/or kip's stuff?) and write about that instead? 
 %%
 
-\begin{comment} 
-
-\chapter{Debugging}
-
-Xen provides tools for debugging both Xen and guest OSes.  Currently, the
-Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
-debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
-Buffer provides a lightweight means to log data about Xen's internal state and
-behaviour at runtime, for later analysis.
-
-\section{Pervasive Debugger}
-
-Information on using the pervasive debugger is available in pdb.txt.
-
-
-\section{Trace Buffer}
-
-The trace buffer provides a means to observe Xen's operation from domain 0.
-Trace events, inserted at key points in Xen's code, record data that can be
-read by the {\tt xentrace} tool.  Recording these events has a low overhead
-and hence the trace buffer may be useful for debugging timing-sensitive
-behaviours.
-
-\subsection{Internal API}
-
-To use the trace buffer functionality from within Xen, you must {\tt \#include
-<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
-events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
-2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
-(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
-will insert the event ID and data into the trace buffer, along with the current
-value of the CPU cycle-counter.  For builds without the trace buffer enabled,
-the macros expand to no-ops and thus can be left in place without incurring
-overheads.
-
-\subsection{Trace-enabled builds}
-
-By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
-is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
-either in {\tt <xen/config.h>} or on the gcc command line.
-
-The size (in pages) of the per-CPU trace buffers can be specified using the
-{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
-buffers will be disabled.
-
-\subsection{Dumping trace data}
-
-When running a trace buffer build of Xen, trace data are written continuously
-into the buffer data areas, with newer data overwriting older data.  This data
-can be captured using the {\tt xentrace} program in domain 0.
-
-The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
-buffers into its address space.  It then periodically polls all the buffers for
-new data, dumping out any new records from each buffer in turn.  As a result,
-for machines with multiple (logical) CPUs, the trace buffer output will not be
-in overall chronological order.
-
-The output from {\tt xentrace} can be post-processed using {\tt
-xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
-{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
-trace points, there is an example format file in {\tt tools/xentrace/formats }.
-
-For more information, see the manual pages for {\tt xentrace}, {\tt
-xentrace\_format} and {\tt xentrace\_cpusplit}.
-
-\end{comment} 
-
-
+%% \include{src/interface/debugging}
+%% debugging information moved to debugging.tex
+%% still commented out
 
 
 \end{document}
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user.tex
--- a/docs/src/user.tex Thu Sep 22 17:34:14 2005
+++ b/docs/src/user.tex Thu Sep 22 17:42:01 2005
@@ -59,1803 +59,36 @@
 \renewcommand{\floatpagefraction}{.8}
 \setstretch{1.1}
 
+
 \part{Introduction and Tutorial}
-\chapter{Introduction}
-
-Xen is a {\em paravirtualising} virtual machine monitor (VMM), or
-`hypervisor', for the x86 processor architecture.  Xen can securely
-execute multiple virtual machines on a single physical system with
-close-to-native performance.  The virtual machine technology
-facilitates enterprise-grade functionality, including:
-
-\begin{itemize}
-\item Virtual machines with performance close to native
-  hardware.
-\item Live migration of running virtual machines between physical hosts.
-\item Excellent hardware support (supports most Linux device drivers).
-\item Sandboxed, restartable device drivers.
-\end{itemize}
-
-Paravirtualisation permits very high performance virtualisation,
-even on architectures like x86 that are traditionally
-very hard to virtualise.
-The drawback of this approach is that it requires operating systems to
-be {\em ported} to run on Xen.  Porting an OS to run on Xen is similar
-to supporting a new hardware platform, however the process
-is simplified because the paravirtual machine architecture is very
-similar to the underlying native hardware. Even though operating system
-kernels must explicitly support Xen, a key feature is that user space
-applications and libraries {\em do not} require modification.
-
-Xen support is available for increasingly many operating systems:
-right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
-A FreeBSD port is undergoing testing and will be incorporated into the
-release soon. Other OS ports, including Plan 9, are in progress.  We
-hope that that arch-xen patches will be incorporated into the
-mainstream releases of these operating systems in due course (as has
-already happened for NetBSD).
-
-Possible usage scenarios for Xen include:
-\begin{description}
-\item [Kernel development.] Test and debug kernel modifications in a
-      sandboxed virtual machine --- no need for a separate test
-      machine.
-\item [Multiple OS configurations.] Run multiple operating systems
-      simultaneously, for instance for compatibility or QA purposes.
-\item [Server consolidation.] Move multiple servers onto a single
-      physical host with performance and fault isolation provided at
-      virtual machine boundaries. 
-\item [Cluster computing.] Management at VM granularity provides more
-      flexibility than separately managing each physical host, but
-      better control and isolation than single-system image solutions, 
-      particularly by using live migration for load balancing. 
-\item [Hardware support for custom OSes.] Allow development of new OSes
-      while benefiting from the wide-ranging hardware support of
-      existing OSes such as Linux.
-\end{description}
-
-\section{Structure of a Xen-Based System}
-
-A Xen system has multiple layers, the lowest and most privileged of
-which is Xen itself. 
-Xen in turn may host multiple {\em guest} operating systems, each of
-which is executed within a secure virtual machine (in Xen terminology,
-a {\em domain}). Domains are scheduled by Xen to make effective use of
-the available physical CPUs.  Each guest OS manages its own
-applications, which includes responsibility for scheduling each
-application within the time allotted to the VM by Xen.
-
-The first domain, {\em domain 0}, is created automatically when the
-system boots and has special management privileges. Domain 0 builds
-other domains and manages their virtual devices. It also performs
-administrative tasks such as suspending, resuming and migrating other
-virtual machines.
-
-Within domain 0, a process called \emph{xend} runs to manage the system.
-\Xend is responsible for managing virtual machines and providing access
-to their consoles.  Commands are issued to \xend over an HTTP
-interface, either from a command-line tool or from a web browser.
-
-\section{Hardware Support}
-
-Xen currently runs only on the x86 architecture, requiring a `P6' or
-newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
-Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
-supported, and we also have basic support for HyperThreading (SMT),
-although this remains a topic for ongoing research. A port
-specifically for x86/64 is in progress, although Xen already runs on
-such systems in 32-bit legacy mode. In addition a port to the IA64
-architecture is approaching completion. We hope to add other
-architectures such as PPC and ARM in due course.
-
-
-Xen can currently use up to 4GB of memory.  It is possible for x86
-machines to address up to 64GB of physical memory but there are no
-current plans to support these systems: The x86/64 port is the
-planned route to supporting larger memory sizes.
-
-Xen offloads most of the hardware support issues to the guest OS
-running in Domain~0.  Xen itself contains only the code required to
-detect and start secondary processors, set up interrupt routing, and
-perform PCI bus enumeration.  Device drivers run within a privileged
-guest OS rather than within Xen itself. This approach provides
-compatibility with the majority of device hardware supported by Linux.
-The default XenLinux build contains support for relatively modern
-server-class network and disk hardware, but you can add support for
-other hardware by configuring your XenLinux kernel in the normal way.
-
-\section{History}
-
-Xen was originally developed by the Systems Research Group at the
-University of Cambridge Computer Laboratory as part of the XenoServers
-project, funded by the UK-EPSRC.
-XenoServers aim to provide a `public infrastructure for
-global distributed computing', and Xen plays a key part in that,
-allowing us to efficiently partition a single machine to enable
-multiple independent clients to run their operating systems and
-applications in an environment providing protection, resource
-isolation and accounting.  The project web page contains further
-information along with pointers to papers and technical reports:
-\path{http://www.cl.cam.ac.uk/xeno} 
-
-Xen has since grown into a fully-fledged project in its own right,
-enabling us to investigate interesting research issues regarding the
-best techniques for virtualising resources such as the CPU, memory,
-disk and network.  The project has been bolstered by support from
-Intel Research Cambridge, and HP Labs, who are now working closely
-with us.
-
-Xen was first described in a paper presented at SOSP in
-2003\footnote{\tt
-http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the first
-public release (1.0) was made that October.  Since then, Xen has
-significantly matured and is now used in production scenarios on
-many sites.
-
-Xen 2.0 features greatly enhanced hardware support, configuration
-flexibility, usability and a larger complement of supported operating
-systems. This latest release takes Xen a step closer to becoming the 
-definitive open source solution for virtualisation.
-
-\chapter{Installation}
-
-The Xen distribution includes three main components: Xen itself, ports
-of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the user-space
-tools required to manage a Xen-based system.  This chapter describes
-how to install the Xen 2.0 distribution from source.  Alternatively,
-there may be pre-built packages available as part of your operating
-system distribution.
-
-\section{Prerequisites}
-\label{sec:prerequisites}
-
-The following is a full list of prerequisites.  Items marked `$\dag$'
-are required by the \xend control tools, and hence required if you
-want to run more than one virtual machine; items marked `$*$' are only
-required if you wish to build from source.
-\begin{itemize}
-\item A working Linux distribution using the GRUB bootloader and
-running on a P6-class (or newer) CPU.
-\item [$\dag$] The \path{iproute2} package. 
-\item [$\dag$] The Linux bridge-utils\footnote{Available from 
-{\tt http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
-\item [$\dag$] An installation of Twisted v1.3 or
-above\footnote{Available from {\tt
-http://www.twistedmatrix.com}}. There may be a binary package
-available for your distribution; alternatively it can be installed by
-running `{\sl make install-twisted}' in the root of the Xen source
-tree.
-\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
-\item [$*$] Development installation of libcurl (e.g., libcurl-devel) 
-\item [$*$] Development installation of zlib (e.g., zlib-dev).
-\item [$*$] Development installation of Python v2.2 or later (e.g., 
python-dev).
-\item [$*$] \LaTeX and transfig are required to build the documentation.
-\end{itemize}
-
-Once you have satisfied the relevant prerequisites, you can 
-now install either a binary or source distribution of Xen. 
-
-\section{Installing from Binary Tarball} 
-
-Pre-built tarballs are available for download from the Xen 
-download page
-\begin{quote} 
-{\tt http://xen.sf.net}
-\end{quote} 
-
-Once you've downloaded the tarball, simply unpack and install: 
-\begin{verbatim}
-# tar zxvf xen-2.0-install.tgz
-# cd xen-2.0-install
-# sh ./install.sh 
-\end{verbatim} 
-
-Once you've installed the binaries you need to configure
-your system as described in Section~\ref{s:configure}. 
-
-\section{Installing from Source} 
-
-This section describes how to obtain, build, and install 
-Xen from source. 
-
-\subsection{Obtaining the Source} 
-
-The Xen source tree is available as either a compressed source tar
-ball or as a clone of our master BitKeeper repository.
-
-\begin{description} 
-\item[Obtaining the Source Tarball]\mbox{} \\  
-Stable versions (and daily snapshots) of the Xen source tree are
-available as compressed tarballs from the Xen download page
-\begin{quote} 
-{\tt http://xen.sf.net}
-\end{quote} 
-
-\item[Using BitKeeper]\mbox{} \\  
-If you wish to install Xen from a clone of our latest BitKeeper
-repository then you will need to install the BitKeeper tools.
-Download instructions for BitKeeper can be obtained by filling out the
-form at:
-
-\begin{quote} 
-{\tt http://www.bitmover.com/cgi-bin/download.cgi}
-\end{quote}
-The public master BK repository for the 2.0 release lives at: 
-\begin{quote}
-{\tt bk://xen.bkbits.net/xen-2.0.bk}  
-\end{quote} 
-You can use BitKeeper to
-download it and keep it updated with the latest features and fixes.
-
-Change to the directory in which you want to put the source code, then
-run:
-\begin{verbatim}
-# bk clone bk://xen.bkbits.net/xen-2.0.bk
-\end{verbatim}
-
-Under your current directory, a new directory named \path{xen-2.0.bk}
-has been created, which contains all the source code for Xen, the OS
-ports, and the control tools. You can update your repository with the
-latest changes at any time by running:
-\begin{verbatim}
-# cd xen-2.0.bk # to change into the local repository
-# bk pull       # to update the repository
-\end{verbatim}
-\end{description} 
-
-%\section{The distribution}
-%
-%The Xen source code repository is structured as follows:
-%
-%\begin{description}
-%\item[\path{tools/}] Xen node controller daemon (Xend), command line tools, 
-%  control libraries
-%\item[\path{xen/}] The Xen VMM.
-%\item[\path{linux-*-xen-sparse/}] Xen support for Linux.
-%\item[\path{linux-*-patches/}] Experimental patches for Linux.
-%\item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
-%\item[\path{docs/}] Various documentation files for users and developers.
-%\item[\path{extras/}] Bonus extras.
-%\end{description}
-
-\subsection{Building from Source} 
-
-The top-level Xen Makefile includes a target `world' that will do the
-following:
-
-\begin{itemize}
-\item Build Xen
-\item Build the control tools, including \xend
-\item Download (if necessary) and unpack the Linux 2.6 source code,
-      and patch it for use with Xen
-\item Build a Linux kernel to use in domain 0 and a smaller
-      unprivileged kernel, which can optionally be used for
-      unprivileged virtual machines.
-\end{itemize}
-
-
-After the build has completed you should have a top-level 
-directory called \path{dist/} in which all resulting targets 
-will be placed; of particular interest are the two kernels 
-XenLinux kernel images, one with a `-xen0' extension
-which contains hardware device drivers and drivers for Xen's virtual
-devices, and one with a `-xenU' extension that just contains the
-virtual ones. These are found in \path{dist/install/boot/} along
-with the image for Xen itself and the configuration files used
-during the build. 
-
-The NetBSD port can be built using: 
-\begin{quote}
-\begin{verbatim}
-# make netbsd20
-\end{verbatim} 
-\end{quote} 
-NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
-The snapshot is downloaded as part of the build process, if it is not
-yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
-process also downloads a toolchain which includes all the tools
-necessary to build the NetBSD kernel under Linux.
-
-To customize further the set of kernels built you need to edit
-the top-level Makefile. Look for the line: 
-
-\begin{quote}
-\begin{verbatim}
-KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
-\end{verbatim} 
-\end{quote} 
-
-You can edit this line to include any set of operating system kernels
-which have configurations in the top-level \path{buildconfigs/}
-directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
-kernel containing only virtual device drivers.
-
-%% Inspect the Makefile if you want to see what goes on during a build.
-%% Building Xen and the tools is straightforward, but XenLinux is more
-%% complicated.  The makefile needs a `pristine' Linux kernel tree to which
-%% it will then add the Xen architecture files.  You can tell the
-%% makefile the location of the appropriate Linux compressed tar file by
-%% setting the LINUX\_SRC environment variable, e.g. \\
-%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
-%% placing the tar file somewhere in the search path of {\tt
-%% LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the makefile
-%% can't find a suitable kernel tar file it attempts to download it from
-%% kernel.org (this won't work if you're behind a firewall).
-
-%% After untaring the pristine kernel tree, the makefile uses the {\tt
-%% mkbuildtree} script to add the Xen patches to the kernel. 
-
-
-%% The procedure is similar to build the Linux 2.4 port: \\
-%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
-
-
-%% \framebox{\parbox{5in}{
-%% {\bf Distro specific:} \\
-%% {\it Gentoo} --- if not using udev (most installations, currently), you'll 
need
-%% to enable devfs and devfs mount at boot time in the xen0 config.
-%% }}
-
-\subsection{Custom XenLinux Builds}
-
-% If you have an SMP machine you may wish to give the {\tt '-j4'}
-% argument to make to get a parallel build.
-
-If you wish to build a customized XenLinux kernel (e.g. to support
-additional devices or enable distribution-required features), you can
-use the standard Linux configuration mechanisms, specifying that the
-architecture being built for is \path{xen}, e.g:
-\begin{quote}
-\begin{verbatim} 
-# cd linux-2.6.11-xen0 
-# make ARCH=xen xconfig 
-# cd ..
-# make
-\end{verbatim} 
-\end{quote} 
-
-You can also copy an existing Linux configuration (\path{.config}) 
-into \path{linux-2.6.11-xen0} and execute:  
-\begin{quote}
-\begin{verbatim} 
-# make ARCH=xen oldconfig 
-\end{verbatim} 
-\end{quote} 
-
-You may be prompted with some Xen-specific options; we 
-advise accepting the defaults for these options.
-
-Note that the only difference between the two types of Linux kernel
-that are built is the configuration file used for each.  The "U"
-suffixed (unprivileged) versions don't contain any of the physical
-hardware device drivers, leading to a 30\% reduction in size; hence
-you may prefer these for your non-privileged domains.  The `0'
-suffixed privileged versions can be used to boot the system, as well
-as in driver domains and unprivileged domains.
-
-
-\subsection{Installing the Binaries}
-
-
-The files produced by the build process are stored under the
-\path{dist/install/} directory. To install them in their default
-locations, do:
-\begin{quote}
-\begin{verbatim}
-# make install
-\end{verbatim} 
-\end{quote}
-
-
-Alternatively, users with special installation requirements may wish
-to install them manually by copying the files to their appropriate
-destinations.
-
-%% Files in \path{install/boot/} include:
-%% \begin{itemize}
-%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
-%% \item \path{install/boot/vmlinuz-2.6-xen0}  Link to domain 0 XenLinux kernel
-%% \item \path{install/boot/vmlinuz-2.6-xenU}  Link to unprivileged XenLinux 
kernel
-%% \end{itemize}
-
-The \path{dist/install/boot} directory will also contain the config files
-used for building the XenLinux kernels, and also versions of Xen and
-XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6} and
-\path{vmlinux-syms-2.6.11.11-xen0}) which are essential for interpreting crash
-dumps.  Retain these files as the developers may wish to see them if
-you post on the mailing list.
-
-
-
-
-
-\section{Configuration}
-\label{s:configure}
-Once you have built and installed the Xen distribution, it is 
-simple to prepare the machine for booting and running Xen. 
-
-\subsection{GRUB Configuration}
-
-An entry should be added to \path{grub.conf} (often found under
-\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
-This file is sometimes called \path{menu.lst}, depending on your
-distribution.  The entry should look something like the following:
-
-{\small
-\begin{verbatim}
-title Xen 2.0 / XenLinux 2.6
-  kernel /boot/xen-2.0.gz dom0_mem=131072
-  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
-\end{verbatim}
-}
-
-The kernel line tells GRUB where to find Xen itself and what boot
-parameters should be passed to it (in this case, setting domain 0's
-memory allocation in kilobytes and the settings for the serial port). For more
-details on the various Xen boot parameters see Section~\ref{s:xboot}. 
-
-The module line of the configuration describes the location of the
-XenLinux kernel that Xen should start and the parameters that should
-be passed to it (these are standard Linux parameters, identifying the
-root device and specifying it be initially mounted read only and
-instructing that console output be sent to the screen).  Some
-distributions such as SuSE do not require the \path{ro} parameter.
-
-%% \framebox{\parbox{5in}{
-%% {\bf Distro specific:} \\
-%% {\it SuSE} --- Omit the {\tt ro} option from the XenLinux kernel
-%% command line, since the partition won't be remounted rw during boot.
-%% }}
-
-
-If you want to use an initrd, just add another \path{module} line to
-the configuration, as usual:
-{\small
-\begin{verbatim}
-  module /boot/my_initrd.gz
-\end{verbatim}
-}
-
-As always when installing a new kernel, it is recommended that you do
-not delete existing menu options from \path{menu.lst} --- you may want
-to boot your old Linux kernel in future, particularly if you
-have problems.
-
-
-\subsection{Serial Console (optional)}
-
-%%   kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
-%%   module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro 
-
-
-In order to configure Xen serial console output, it is necessary to add 
-an boot option to your GRUB config; e.g. replace the above kernel line 
-with: 
-\begin{quote}
-{\small
-\begin{verbatim}
-   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
-\end{verbatim}}
-\end{quote}
-
-This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 
-1 stop bit and no parity. Modify these parameters for your set up. 
-
-One can also configure XenLinux to share the serial console; to 
-achieve this append ``\path{console=ttyS0}'' to your 
-module line. 
-
-
-If you wish to be able to log in over the XenLinux serial console it
-is necessary to add a line into \path{/etc/inittab}, just as per 
-regular Linux. Simply add the line:
-\begin{quote}
-{\small 
-{\tt c:2345:respawn:/sbin/mingetty ttyS0}
-}
-\end{quote} 
-
-and you should be able to log in. Note that to successfully log in 
-as root over the serial line will require adding \path{ttyS0} to
-\path{/etc/securetty} in most modern distributions. 
-
-\subsection{TLS Libraries}
-
-Users of the XenLinux 2.6 kernel should disable Thread Local Storage
-(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
-attempting to run with a XenLinux kernel\footnote{If you boot without first
-disabling TLS, you will get a warning message during the boot
-process. In this case, simply perform the rename after the machine is
-up and then run \texttt{/sbin/ldconfig} to make it take effect.}.  You can
-always reenable it by restoring the directory to its original location
-(i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
-
-The reason for this is that the current TLS implementation uses
-segmentation in a way that is not permissible under Xen.  If TLS is
-not disabled, an emulation mode is used within Xen which reduces
-performance substantially.
-
-We hope that this issue can be resolved by working with Linux
-distribution vendors to implement a minor backward-compatible change
-to the TLS library.
-
-\section{Booting Xen} 
-
-It should now be possible to restart the system and use Xen.  Reboot
-as usual but choose the new Xen option when the Grub screen appears.
-
-What follows should look much like a conventional Linux boot.  The
-first portion of the output comes from Xen itself, supplying low level
-information about itself and the machine it is running on.  The
-following portion of the output comes from XenLinux.
-
-You may see some errors during the XenLinux boot.  These are not
-necessarily anything to worry about --- they may result from kernel
-configuration differences between your XenLinux kernel and the one you
-usually use.
-
-When the boot completes, you should be able to log into your system as
-usual.  If you are unable to log in to your system running Xen, you
-should still be able to reboot with your normal Linux kernel.
-
-
-\chapter{Starting Additional Domains}
-
-The first step in creating a new domain is to prepare a root
-filesystem for it to boot off.  Typically, this might be stored in a
-normal partition, an LVM or other volume manager partition, a disk
-file or on an NFS server.  A simple way to do this is simply to boot
-from your standard OS install CD and install the distribution into
-another partition on your hard drive.
-
-To start the \xend control daemon, type
-\begin{quote}
-\verb!# xend start!
-\end{quote}
-If you
-wish the daemon to start automatically, see the instructions in
-Section~\ref{s:xend}. Once the daemon is running, you can use the
-\path{xm} tool to monitor and maintain the domains running on your
-system. This chapter provides only a brief tutorial: we provide full
-details of the \path{xm} tool in the next chapter. 
-
-%\section{From the web interface}
-%
-%Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv} for
-%more details) using the command: \\
-%\verb_# xensv start_ \\
-%This will also start Xend (see Chapter~\ref{cha:xend} for more information).
-%
-%The domain management interface will then be available at {\tt
-%http://your\_machine:8080/}.  This provides a user friendly wizard for
-%starting domains and functions for managing running domains.
-%
-%\section{From the command line}
-
-
-\section{Creating a Domain Configuration File} 
-
-Before you can start an additional domain, you must create a
-configuration file. We provide two example files which you 
-can use as a starting point: 
-\begin{itemize} 
-  \item \path{/etc/xen/xmexample1} is a simple template configuration file
-    for describing a single VM.
-
-  \item \path{/etc/xen/xmexample2} file is a template description that
-    is intended to be reused for multiple virtual machines.  Setting
-    the value of the \path{vmid} variable on the \path{xm} command line
-    fills in parts of this template.
-\end{itemize} 
-
-Copy one of these files and edit it as appropriate.
-Typical values you may wish to edit include: 
-
-\begin{quote}
-\begin{description}
-\item[kernel] Set this to the path of the kernel you compiled for use
-              with Xen (e.g.\  \path{kernel = '/boot/vmlinuz-2.6-xenU'})
-\item[memory] Set this to the size of the domain's memory in
-megabytes (e.g.\ \path{memory = 64})
-\item[disk] Set the first entry in this list to calculate the offset
-of the domain's root partition, based on the domain ID.  Set the
-second to the location of \path{/usr} if you are sharing it between
-domains (e.g.\ \path{disk = ['phy:your\_hard\_drive\%d,sda1,w' \%
-(base\_partition\_number + vmid), 'phy:your\_usr\_partition,sda6,r' ]}
-\item[dhcp] Uncomment the dhcp variable, so that the domain will
-receive its IP address from a DHCP server (e.g.\ \path{dhcp='dhcp'})
-\end{description}
-\end{quote}
-
-You may also want to edit the {\bf vif} variable in order to choose
-the MAC address of the virtual ethernet interface yourself.  For
-example: 
-\begin{quote}
-\verb_vif = ['mac=00:06:AA:F6:BB:B3']_
-\end{quote}
-If you do not set this variable, \xend will automatically generate a
-random MAC address from an unused range.
-
-
-\section{Booting the Domain}
-
-The \path{xm} tool provides a variety of commands for managing domains.
-Use the \path{create} command to start new domains. Assuming you've 
-created a configuration file \path{myvmconf} based around
-\path{/etc/xen/xmexample2}, to start a domain with virtual 
-machine ID~1 you should type: 
-
-\begin{quote}
-\begin{verbatim}
-# xm create -c myvmconf vmid=1
-\end{verbatim}
-\end{quote}
-
-
-The \path{-c} switch causes \path{xm} to turn into the domain's
-console after creation.  The \path{vmid=1} sets the \path{vmid}
-variable used in the \path{myvmconf} file. 
-
-
-You should see the console boot messages from the new domain 
-appearing in the terminal in which you typed the command, 
-culminating in a login prompt. 
-
-
-\section{Example: ttylinux}
-
-Ttylinux is a very small Linux distribution, designed to require very
-few resources.  We will use it as a concrete example of how to start a
-Xen domain.  Most users will probably want to install a full-featured
-distribution once they have mastered the basics\footnote{ttylinux is
-maintained by Pascal Schmidt. You can download source packages from
-the distribution's home page: {\tt http://www.minimalinux.org/ttylinux/}}.
-
-\begin{enumerate}
-\item Download and extract the ttylinux disk image from the Files
-section of the project's SourceForge site (see 
-\path{http://sf.net/projects/xen/}).
-\item Create a configuration file like the following:
-\begin{verbatim}
-kernel = "/boot/vmlinuz-2.6-xenU"
-memory = 64
-name = "ttylinux"
-nics = 1
-ip = "1.2.3.4"
-disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
-root = "/dev/sda1 ro"
-\end{verbatim}
-\item Now start the domain and connect to its console:
-\begin{verbatim}
-xm create configfile -c
-\end{verbatim}
-\item Login as root, password root.
-\end{enumerate}
-
-
-\section{Starting / Stopping Domains Automatically}
-
-It is possible to have certain domains start automatically at boot
-time and to have dom0 wait for all running domains to shutdown before
-it shuts down the system.
-
-To specify a domain is to start at boot-time, place its
-configuration file (or a link to it) under \path{/etc/xen/auto/}.
-
-A Sys-V style init script for RedHat and LSB-compliant systems is
-provided and will be automatically copied to \path{/etc/init.d/}
-during install.  You can then enable it in the appropriate way for
-your distribution.
-
-For instance, on RedHat:
-
-\begin{quote}
-\verb_# chkconfig --add xendomains_
-\end{quote}
-
-By default, this will start the boot-time domains in runlevels 3, 4
-and 5.
-
-You can also use the \path{service} command to run this script
-manually, e.g:
-
-\begin{quote}
-\verb_# service xendomains start_
-
-Starts all the domains with config files under /etc/xen/auto/.
-\end{quote}
-
-
-\begin{quote}
-\verb_# service xendomains stop_
-
-Shuts down ALL running Xen domains.
-\end{quote}
-
-\chapter{Domain Management Tools}
-
-The previous chapter described a simple example of how to configure
-and start a domain.  This chapter summarises the tools available to
-manage running domains.
-
-\section{Command-line Management}
-
-Command line management tasks are also performed using the \path{xm}
-tool.  For online help for the commands available, type:
-\begin{quote}
-\verb_# xm help_
-\end{quote}
-
-You can also type \path{xm help $<$command$>$} for more information 
-on a given command. 
-
-\subsection{Basic Management Commands}
-
-The most important \path{xm} commands are: 
-\begin{quote}
-\verb_# xm list_: Lists all domains running.\\
-\verb_# xm consoles_ : Gives information about the domain consoles.\\
-\verb_# xm console_: Opens a console to a domain (e.g.\
-  \verb_# xm console myVM_
-\end{quote}
-
-\subsection{\tt xm list}
-
-The output of \path{xm list} is in rows of the following format:
-\begin{center}
-{\tt name domid memory cpu state cputime console}
-\end{center}
-
-\begin{quote}
-\begin{description}
-\item[name]  The descriptive name of the virtual machine.
-\item[domid] The number of the domain ID this virtual machine is running in.
-\item[memory] Memory size in megabytes.
-\item[cpu]   The CPU this domain is running on.
-\item[state] Domain state consists of 5 fields:
-  \begin{description}
-  \item[r] running
-  \item[b] blocked
-  \item[p] paused
-  \item[s] shutdown
-  \item[c] crashed
-  \end{description}
-\item[cputime] How much CPU time (in seconds) the domain has used so far.
-\item[console] TCP port accepting connections to the domain's console.
-\end{description}
-\end{quote}
-
-The \path{xm list} command also supports a long output format when the
-\path{-l} switch is used.  This outputs the fulls details of the
-running domains in \xend's SXP configuration format.
-
-For example, suppose the system is running the ttylinux domain as
-described earlier.  The list command should produce output somewhat
-like the following:
-\begin{verbatim}
-# xm list
-Name              Id  Mem(MB)  CPU  State  Time(s)  Console
-Domain-0           0      251    0  r----    172.2        
-ttylinux           5       63    0  -b---      3.0    9605
-\end{verbatim}
-
-Here we can see the details for the ttylinux domain, as well as for
-domain 0 (which, of course, is always running).  Note that the console
-port for the ttylinux domain is 9605.  This can be connected to by TCP
-using a terminal program (e.g. \path{telnet} or, better, 
-\path{xencons}).  The simplest way to connect is to use the \path{xm console}
-command, specifying the domain name or ID.  To connect to the console
-of the ttylinux domain, we could use any of the following: 
-\begin{verbatim}
-# xm console ttylinux
-# xm console 5
-# xencons localhost 9605
-\end{verbatim}
-
-\section{Domain Save and Restore}
-
-The administrator of a Xen system may suspend a virtual machine's
-current state into a disk file in domain 0, allowing it to be resumed
-at a later time.
-
-The ttylinux domain described earlier can be suspended to disk using
-the command:
-\begin{verbatim}
-# xm save ttylinux ttylinux.xen
-\end{verbatim}
-
-This will stop the domain named `ttylinux' and save its current state
-into a file called \path{ttylinux.xen}.
-
-To resume execution of this domain, use the \path{xm restore} command:
-\begin{verbatim}
-# xm restore ttylinux.xen
-\end{verbatim}
-
-This will restore the state of the domain and restart it.  The domain
-will carry on as before and the console may be reconnected using the
-\path{xm console} command, as above.
-
-\section{Live Migration}
-
-Live migration is used to transfer a domain between physical hosts
-whilst that domain continues to perform its usual activities --- from
-the user's perspective, the migration should be imperceptible.
-
-To perform a live migration, both hosts must be running Xen / \xend and
-the destination host must have sufficient resources (e.g. memory
-capacity) to accommodate the domain after the move. Furthermore we
-currently require both source and destination machines to be on the 
-same L2 subnet. 
-
-Currently, there is no support for providing automatic remote access
-to filesystems stored on local disk when a domain is migrated.
-Administrators should choose an appropriate storage solution
-(i.e. SAN, NAS, etc.) to ensure that domain filesystems are also
-available on their destination node. GNBD is a good method for
-exporting a volume from one machine to another. iSCSI can do a similar
-job, but is more complex to set up.
-
-When a domain migrates, it's MAC and IP address move with it, thus it
-is only possible to migrate VMs within the same layer-2 network and IP
-subnet. If the destination node is on a different subnet, the
-administrator would need to manually configure a suitable etherip or
-IP tunnel in the domain 0 of the remote node. 
-
-A domain may be migrated using the \path{xm migrate} command.  To
-live migrate a domain to another machine, we would use
-the command:
-
-\begin{verbatim}
-# xm migrate --live mydomain destination.ournetwork.com
-\end{verbatim}
-
-Without the \path{--live} flag, \xend simply stops the domain and
-copies the memory image over to the new node and restarts it. Since
-domains can have large allocations this can be quite time consuming,
-even on a Gigabit network. With the \path{--live} flag \xend attempts
-to keep the domain running while the migration is in progress,
-resulting in typical `downtimes' of just 60--300ms.
-
-For now it will be necessary to reconnect to the domain's console on
-the new machine using the \path{xm console} command.  If a migrated
-domain has any open network connections then they will be preserved,
-so SSH connections do not have this limitation.
-
-\section{Managing Domain Memory}
-
-XenLinux domains have the ability to relinquish / reclaim machine
-memory at the request of the administrator or the user of the domain.
-
-\subsection{Setting memory footprints from dom0}
-
-The machine administrator can request that a domain alter its memory
-footprint using the \path{xm set-mem} command.  For instance, we can
-request that our example ttylinux domain reduce its memory footprint
-to 32 megabytes.
-
-\begin{verbatim}
-# xm set-mem ttylinux 32
-\end{verbatim}
-
-We can now see the result of this in the output of \path{xm list}:
-
-\begin{verbatim}
-# xm list
-Name              Id  Mem(MB)  CPU  State  Time(s)  Console
-Domain-0           0      251    0  r----    172.2        
-ttylinux           5       31    0  -b---      4.3    9605
-\end{verbatim}
-
-The domain has responded to the request by returning memory to Xen. We
-can restore the domain to its original size using the command line:
-
-\begin{verbatim}
-# xm set-mem ttylinux 64
-\end{verbatim}
-
-\subsection{Setting memory footprints from within a domain}
-
-The virtual file \path{/proc/xen/balloon} allows the owner of a
-domain to adjust their own memory footprint.  Reading the file
-(e.g. \path{cat /proc/xen/balloon}) prints out the current
-memory footprint of the domain.  Writing the file
-(e.g. \path{echo new\_target > /proc/xen/balloon}) requests
-that the kernel adjust the domain's memory footprint to a new value.
-
-\subsection{Setting memory limits}
-
-Xen associates a memory size limit with each domain.  By default, this
-is the amount of memory the domain is originally started with,
-preventing the domain from ever growing beyond this size.  To permit a
-domain to grow beyond its original allocation or to prevent a domain
-you've shrunk from reclaiming the memory it relinquished, use the 
-\path{xm maxmem} command.
-
-\chapter{Domain Filesystem Storage}
-
-It is possible to directly export any Linux block device in dom0 to
-another domain, or to export filesystems / devices to virtual machines
-using standard network protocols (e.g. NBD, iSCSI, NFS, etc).  This
-chapter covers some of the possibilities.
-
-
-\section{Exporting Physical Devices as VBDs} 
-\label{s:exporting-physical-devices-as-vbds}
-
-One of the simplest configurations is to directly export 
-individual partitions from domain 0 to other domains. To 
-achieve this use the \path{phy:} specifier in your domain 
-configuration file. For example a line like
-\begin{quote}
-\verb_disk = ['phy:hda3,sda1,w']_
-\end{quote}
-specifies that the partition \path{/dev/hda3} in domain 0 
-should be exported read-write to the new domain as \path{/dev/sda1}; 
-one could equally well export it as \path{/dev/hda} or 
-\path{/dev/sdb5} should one wish. 
-
-In addition to local disks and partitions, it is possible to export
-any device that Linux considers to be ``a disk'' in the same manner.
-For example, if you have iSCSI disks or GNBD volumes imported into
-domain 0 you can export these to other domains using the \path{phy:}
-disk syntax. E.g.:
-\begin{quote}
-\verb_disk = ['phy:vg/lvm1,sda2,w']_
-\end{quote}
-
-
-
-\begin{center}
-\framebox{\bf Warning: Block device sharing}
-\end{center}
-\begin{quote}
-Block devices should typically only be shared between domains in a
-read-only fashion otherwise the Linux kernel's file systems will get
-very confused as the file system structure may change underneath them
-(having the same ext3 partition mounted rw twice is a sure fire way to
-cause irreparable damage)!  \Xend will attempt to prevent you from
-doing this by checking that the device is not mounted read-write in
-domain 0, and hasn't already been exported read-write to another
-domain.
-If you want read-write sharing, export the directory to other domains
-via NFS from domain0 (or use a cluster file system such as GFS or
-ocfs2).
-
-\end{quote}
-
-
-\section{Using File-backed VBDs}
-
-It is also possible to use a file in Domain 0 as the primary storage
-for a virtual machine.  As well as being convenient, this also has the
-advantage that the virtual block device will be {\em sparse} --- space
-will only really be allocated as parts of the file are used.  So if a
-virtual machine uses only half of its disk space then the file really
-takes up half of the size allocated.
-
-For example, to create a 2GB sparse file-backed virtual block device
-(actually only consumes 1KB of disk):
-\begin{quote}
-\verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
-\end{quote}
-
-Make a file system in the disk file: 
-\begin{quote}
-\verb_# mkfs -t ext3 vm1disk_
-\end{quote}
-
-(when the tool asks for confirmation, answer `y')
-
-Populate the file system e.g. by copying from the current root:
-\begin{quote}
-\begin{verbatim}
-# mount -o loop vm1disk /mnt
-# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
-# mkdir /mnt/{proc,sys,home,tmp}
-\end{verbatim}
-\end{quote}
-
-Tailor the file system by editing \path{/etc/fstab},
-\path{/etc/hostname}, etc (don't forget to edit the files in the
-mounted file system, instead of your domain 0 filesystem, e.g. you
-would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab} ).  For
-this example put \path{/dev/sda1} to root in fstab.
-
-Now unmount (this is important!):
-\begin{quote}
-\verb_# umount /mnt_
-\end{quote}
-
-In the configuration file set:
-\begin{quote}
-\verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
-\end{quote}
-
-As the virtual machine writes to its `disk', the sparse file will be
-filled in and consume more space up to the original 2GB.
-
-{\bf Note that file-backed VBDs may not be appropriate for backing
-I/O-intensive domains.}  File-backed VBDs are known to experience
-substantial slowdowns under heavy I/O workloads, due to the I/O handling
-by the loopback block device used to support file-backed VBDs in dom0.
-Better I/O performance can be achieved by using either LVM-backed VBDs
-(Section~\ref{s:using-lvm-backed-vbds}) or physical devices as VBDs
-(Section~\ref{s:exporting-physical-devices-as-vbds}).
-
-Linux supports a maximum of eight file-backed VBDs across all domains by
-default.  This limit can be statically increased by using the {\em
-max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is compiled as a
-module in the dom0 kernel, or by using the {\em max\_loop=n} boot option
-if CONFIG\_BLK\_DEV\_LOOP is compiled directly into the dom0 kernel.
-
-
-\section{Using LVM-backed VBDs}
-\label{s:using-lvm-backed-vbds}
-
-A particularly appealing solution is to use LVM volumes 
-as backing for domain file-systems since this allows dynamic
-growing/shrinking of volumes as well as snapshot and other 
-features. 
-
-To initialise a partition to support LVM volumes:
-\begin{quote}
-\begin{verbatim} 
-# pvcreate /dev/sda10           
-\end{verbatim} 
-\end{quote}
-
-Create a volume group named `vg' on the physical partition:
-\begin{quote}
-\begin{verbatim} 
-# vgcreate vg /dev/sda10
-\end{verbatim} 
-\end{quote}
-
-Create a logical volume of size 4GB named `myvmdisk1':
-\begin{quote}
-\begin{verbatim} 
-# lvcreate -L4096M -n myvmdisk1 vg
-\end{verbatim} 
-\end{quote}
-
-You should now see that you have a \path{/dev/vg/myvmdisk1}
-Make a filesystem, mount it and populate it, e.g.:
-\begin{quote}
-\begin{verbatim} 
-# mkfs -t ext3 /dev/vg/myvmdisk1
-# mount /dev/vg/myvmdisk1 /mnt
-# cp -ax / /mnt
-# umount /mnt
-\end{verbatim} 
-\end{quote}
-
-Now configure your VM with the following disk configuration:
-\begin{quote}
-\begin{verbatim} 
- disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
-\end{verbatim} 
-\end{quote}
-
-LVM enables you to grow the size of logical volumes, but you'll need
-to resize the corresponding file system to make use of the new
-space. Some file systems (e.g. ext3) now support on-line resize.  See
-the LVM manuals for more details.
-
-You can also use LVM for creating copy-on-write clones of LVM
-volumes (known as writable persistent snapshots in LVM
-terminology). This facility is new in Linux 2.6.8, so isn't as
-stable as one might hope. In particular, using lots of CoW LVM
-disks consumes a lot of dom0 memory, and error conditions such as
-running out of disk space are not handled well. Hopefully this
-will improve in future.
-
-To create two copy-on-write clone of the above file system you
-would use the following commands:
-
-\begin{quote}
-\begin{verbatim} 
-# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
-# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
-\end{verbatim} 
-\end{quote}
-
-Each of these can grow to have 1GB of differences from the master
-volume. You can grow the amount of space for storing the
-differences using the lvextend command, e.g.:
-\begin{quote}
-\begin{verbatim} 
-# lvextend +100M /dev/vg/myclonedisk1
-\end{verbatim} 
-\end{quote}
-
-Don't let the `differences volume' ever fill up otherwise LVM gets
-rather confused. It may be possible to automate the growing
-process by using \path{dmsetup wait} to spot the volume getting full
-and then issue an \path{lvextend}.
-
-In principle, it is possible to continue writing to the volume
-that has been cloned (the changes will not be visible to the
-clones), but we wouldn't recommend this: have the cloned volume
-as a `pristine' file system install that isn't mounted directly
-by any of the virtual machines.
-
-
-\section{Using NFS Root}
-
-First, populate a root filesystem in a directory on the server
-machine. This can be on a distinct physical machine, or simply 
-run within a virtual machine on the same node.
-
-Now configure the NFS server to export this filesystem over the
-network by adding a line to \path{/etc/exports}, for instance:
-
-\begin{quote}
-\begin{small}
-\begin{verbatim}
-/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
-\end{verbatim}
-\end{small}
-\end{quote}
-
-Finally, configure the domain to use NFS root.  In addition to the
-normal variables, you should make sure to set the following values in
-the domain's configuration file:
-
-\begin{quote}
-\begin{small}
-\begin{verbatim}
-root       = '/dev/nfs'
-nfs_server = '2.3.4.5'       # substitute IP address of server 
-nfs_root   = '/path/to/root' # path to root FS on the server
-\end{verbatim}
-\end{small}
-\end{quote}
-
-The domain will need network access at boot time, so either statically
-configure an IP address (Using the config variables \path{ip}, 
-\path{netmask}, \path{gateway}, \path{hostname}) or enable DHCP (
-\path{dhcp='dhcp'}).
-
-Note that the Linux NFS root implementation is known to have stability
-problems under high load (this is not a Xen-specific problem), so this
-configuration may not be appropriate for critical servers.
+
+%% Chapter Introduction moved to introduction.tex
+\include{src/user/introduction}
+
+%% Chapter Installation moved to installation.tex
+\include{src/user/installation}
+
+%% Chapter Starting Additional Domains  moved to start_addl_dom.tex
+\include{src/user/start_addl_dom}
+
+%% Chapter Domain Management Tools moved to domain_mgmt.tex
+\include{src/user/domain_mgmt}
+
+%% Chapter Domain Filesystem Storage moved to domain_filesystem.tex
+\include{src/user/domain_filesystem}
+
 
 
 \part{User Reference Documentation}
 
-\chapter{Control Software} 
-
-The Xen control software includes the \xend node control daemon (which 
-must be running), the xm command line tools, and the prototype 
-xensv web interface. 
-
-\section{\Xend (node control daemon)}
-\label{s:xend}
-
-The Xen Daemon (\Xend) performs system management functions related to
-virtual machines.  It forms a central point of control for a machine
-and can be controlled using an HTTP-based protocol.  \Xend must be
-running in order to start and manage virtual machines.
-
-\Xend must be run as root because it needs access to privileged system
-management functions.  A small set of commands may be issued on the
-\xend command line:
-
-\begin{tabular}{ll}
-\verb!# xend start! & start \xend, if not already running \\
-\verb!# xend stop!  & stop \xend if already running       \\
-\verb!# xend restart! & restart \xend if running, otherwise start it \\
-% \verb!# xend trace_start! & start \xend, with very detailed debug logging \\
-\verb!# xend status! & indicates \xend status by its return code
-\end{tabular}
-
-A SysV init script called {\tt xend} is provided to start \xend at boot
-time.  {\tt make install} installs this script in {\path{/etc/init.d}.
-To enable it, you have to make symbolic links in the appropriate
-runlevel directories or use the {\tt chkconfig} tool, where available.
-
-Once \xend is running, more sophisticated administration can be done
-using the xm tool (see Section~\ref{s:xm}) and the experimental
-Xensv web interface (see Section~\ref{s:xensv}).
-
-As \xend runs, events will be logged to \path{/var/log/xend.log} and, 
-if the migration assistant daemon (\path{xfrd}) has been started, 
-\path{/var/log/xfrd.log}. These may be of use for troubleshooting
-problems.
-
-\section{Xm (command line interface)}
-\label{s:xm}
-
-The xm tool is the primary tool for managing Xen from the console.
-The general format of an xm command line is:
-
-\begin{verbatim}
-# xm command [switches] [arguments] [variables]
-\end{verbatim}
-
-The available {\em switches} and {\em arguments} are dependent on the
-{\em command} chosen.  The {\em variables} may be set using
-declarations of the form {\tt variable=value} and command line
-declarations override any of the values in the configuration file
-being used, including the standard variables described above and any
-custom variables (for instance, the \path{xmdefconfig} file uses a
-{\tt vmid} variable).
-
-The available commands are as follows:
-
-\begin{description}
-\item[set-mem] Request a domain to adjust its memory footprint.
-\item[create] Create a new domain.
-\item[destroy] Kill a domain immediately.
-\item[list] List running domains.
-\item[shutdown] Ask a domain to shutdown.
-\item[dmesg] Fetch the Xen (not Linux!) boot output.
-\item[consoles] Lists the available consoles.
-\item[console] Connect to the console for a domain.
-\item[help] Get help on xm commands.
-\item[save] Suspend a domain to disk.
-\item[restore] Restore a domain from disk.
-\item[pause] Pause a domain's execution.
-\item[unpause] Unpause a domain.
-\item[pincpu] Pin a domain to a CPU.
-\item[bvt] Set BVT scheduler parameters for a domain.
-\item[bvt\_ctxallow] Set the BVT context switching allowance for the system.
-\item[atropos] Set the atropos parameters for a domain.
-\item[rrobin] Set the round robin time slice for the system.
-\item[info] Get information about the Xen host.
-\item[call] Call a \xend HTTP API function directly.
-\end{description}
-
-For a detailed overview of switches, arguments and variables to each command
-try
-\begin{quote}
-\begin{verbatim}
-# xm help command
-\end{verbatim}
-\end{quote}
-
-\section{Xensv (web control interface)}
-\label{s:xensv}
-
-Xensv is the experimental web control interface for managing a Xen
-machine.  It can be used to perform some (but not yet all) of the
-management tasks that can be done using the xm tool.
-
-It can be started using:
-\begin{quote}
-\verb_# xensv start_
-\end{quote}
-and stopped using: 
-\begin{quote}
-\verb_# xensv stop_
-\end{quote}
-
-By default, Xensv will serve out the web interface on port 8080.  This
-can be changed by editing 
-\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
-
-Once Xensv is running, the web interface can be used to create and
-manage running domains.
-
-
-
-
-\chapter{Domain Configuration}
-\label{cha:config}
-
-The following contains the syntax of the domain configuration 
-files and description of how to further specify networking, 
-driver domain and general scheduling behaviour. 
-
-\section{Configuration Files}
-\label{s:cfiles}
-
-Xen configuration files contain the following standard variables.
-Unless otherwise stated, configuration items should be enclosed in
-quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2} 
-for concrete examples of the syntax.
-
-\begin{description}
-\item[kernel] Path to the kernel image 
-\item[ramdisk] Path to a ramdisk image (optional).
-% \item[builder] The name of the domain build function (e.g. {\tt'linux'} or 
{\tt'netbsd'}.
-\item[memory] Memory size in megabytes.
-\item[cpu] CPU to run this domain on, or {\tt -1} for
-  auto-allocation. 
-\item[console] Port to export the domain console on (default 9600 + domain ID).
-\item[nics] Number of virtual network interfaces.
-\item[vif] List of MAC addresses (random addresses are assigned if not
-  given) and bridges to use for the domain's network interfaces, e.g.
-\begin{verbatim}
-vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
-        'bridge=xen-br1' ]
-\end{verbatim}
-  to assign a MAC address and bridge to the first interface and assign
-  a different bridge to the second interface, leaving \xend to choose
-  the MAC address.
-\item[disk] List of block devices to export to the domain,  e.g. \\
-  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
-  exports physical device \path{/dev/hda1} to the domain 
-  as \path{/dev/sda1} with read-only access. Exporting a disk read-write 
-  which is currently mounted is dangerous -- if you are \emph{certain}
-  you wish to do this, you can specify \path{w!} as the mode. 
-\item[dhcp] Set to {\tt 'dhcp'} if you want to use DHCP to configure
-  networking. 
-\item[netmask] Manually configured IP netmask.
-\item[gateway] Manually configured IP gateway. 
-\item[hostname] Set the hostname for the virtual machine.
-\item[root] Specify the root device parameter on the kernel command
-  line. 
-\item[nfs\_server] IP address for the NFS server (if any). 
-\item[nfs\_root] Path of the root filesystem on the NFS server (if any).
-\item[extra] Extra string to append to the kernel command line (if
-  any) 
-\item[restart] Three possible options:
-  \begin{description}
-  \item[always] Always restart the domain, no matter what
-                its exit code is.
-  \item[never]  Never restart the domain.
-  \item[onreboot] Restart the domain iff it requests reboot.
-  \end{description}
-\end{description}
-
-For additional flexibility, it is also possible to include Python
-scripting commands in configuration files.  An example of this is the
-\path{xmexample2} file, which uses Python code to handle the 
-\path{vmid} variable.
-
-
-%\part{Advanced Topics}
-
-\section{Network Configuration}
-
-For many users, the default installation should work `out of the box'.
-More complicated network setups, for instance with multiple ethernet
-interfaces and/or existing bridging setups will require some
-special configuration.
-
-The purpose of this section is to describe the mechanisms provided by
-\xend to allow a flexible configuration for Xen's virtual networking.
-
-\subsection{Xen virtual network topology}
-
-Each domain network interface is connected to a virtual network
-interface in dom0 by a point to point link (effectively a `virtual
-crossover cable').  These devices are named {\tt
-vif$<$domid$>$.$<$vifid$>$} (e.g. {\tt vif1.0} for the first interface
-in domain 1, {\tt vif3.1} for the second interface in domain 3).
-
-Traffic on these virtual interfaces is handled in domain 0 using
-standard Linux mechanisms for bridging, routing, rate limiting, etc.
-Xend calls on two shell scripts to perform initial configuration of
-the network and configuration of new virtual interfaces.  By default,
-these scripts configure a single bridge for all the virtual
-interfaces.  Arbitrary routing / bridging configurations can be
-configured by customising the scripts, as described in the following
-section.
-
-\subsection{Xen networking scripts}
-
-Xen's virtual networking is configured by two shell scripts (by
-default \path{network} and \path{vif-bridge}).  These are
-called automatically by \xend when certain events occur, with
-arguments to the scripts providing further contextual information.
-These scripts are found by default in \path{/etc/xen/scripts}.  The
-names and locations of the scripts can be configured in
-\path{/etc/xen/xend-config.sxp}.
-
-\begin{description} 
-
-\item[network:] This script is called whenever \xend is started or
-stopped to respectively initialise or tear down the Xen virtual
-network. In the default configuration initialisation creates the
-bridge `xen-br0' and moves eth0 onto that bridge, modifying the
-routing accordingly. When \xend exits, it deletes the Xen bridge and
-removes eth0, restoring the normal IP and routing configuration.
-
-%% In configurations where the bridge already exists, this script could
-%% be replaced with a link to \path{/bin/true} (for instance).
-
-\item[vif-bridge:] This script is called for every domain virtual
-interface and can configure firewalling rules and add the vif 
-to the appropriate bridge. By default, this adds and removes 
-VIFs on the default Xen bridge.
-
-\end{description} 
-
-For more complex network setups (e.g. where routing is required or
-integrate with existing bridges) these scripts may be replaced with
-customised variants for your site's preferred configuration.
-
-%% There are two possible types of privileges:  IO privileges and
-%% administration privileges.
-
-\section{Driver Domain Configuration} 
-
-I/O privileges can be assigned to allow a domain to directly access
-PCI devices itself.  This is used to support driver domains.
-
-Setting backend privileges is currently only supported in SXP format
-config files.  To allow a domain to function as a backend for others,
-somewhere within the {\tt vm} element of its configuration file must
-be a {\tt backend} element of the form {\tt (backend ({\em type}))}
-where {\tt \em type} may be either {\tt netif} or {\tt blkif},
-according to the type of virtual device this domain will service.
-%% After this domain has been built, \xend will connect all new and
-%% existing {\em virtual} devices (of the appropriate type) to that
-%% backend.
-
-Note that a block backend cannot currently import virtual block
-devices from other domains, and a network backend cannot import
-virtual network devices from other domains.  Thus (particularly in the
-case of block backends, which cannot import a virtual block device as
-their root filesystem), you may need to boot a backend domain from a
-ramdisk or a network device.
-
-Access to PCI devices may be configured on a per-device basis.  Xen
-will assign the minimal set of hardware privileges to a domain that
-are required to control its devices.  This can be configured in either
-format of configuration file:
-
-\begin{itemize}
-\item SXP Format: Include device elements of the form: \\
-\centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em z})))}} 
\\
-  inside the top-level {\tt vm} element.  Each one specifies the address
-  of a device this domain is allowed to access ---
-  the numbers {\em x},{\em y} and {\em z} may be in either decimal or
-  hexadecimal format.
-\item Flat Format: Include a list of PCI device addresses of the
-  format: \\ 
-\centerline{{\tt pci = ['x,y,z', ...]}} \\ 
-where each element in the
-  list is a string specifying the components of the PCI device
-  address, separated by commas.  The components ({\tt \em x}, {\tt \em
-  y} and {\tt \em z}) of the list may be formatted as either decimal
-  or hexadecimal.
-\end{itemize}
-
-%% \section{Administration Domains}
-
-%% Administration privileges allow a domain to use the `dom0
-%% operations' (so called because they are usually available only to
-%% domain 0).  A privileged domain can build other domains, set scheduling
-%% parameters, etc.
-
-% Support for other administrative domains is not yet available...  perhaps
-% we should plumb it in some time
-
-
-
-
-
-\section{Scheduler Configuration}
-\label{s:sched} 
-
-
-Xen offers a boot time choice between multiple schedulers.  To select
-a scheduler, pass the boot parameter {\em sched=sched\_name} to Xen,
-substituting the appropriate scheduler name.  Details of the schedulers
-and their parameters are included below; future versions of the tools
-will provide a higher-level interface to these tools.
-
-It is expected that system administrators configure their system to
-use the scheduler most appropriate to their needs.  Currently, the BVT
-scheduler is the recommended choice. 
-
-\subsection{Borrowed Virtual Time}
-
-{\tt sched=bvt} (the default) \\ 
-
-BVT provides proportional fair shares of the CPU time.  It has been
-observed to penalise domains that block frequently (e.g. I/O intensive
-domains), but this can be compensated for by using warping. 
-
-\subsubsection{Global Parameters}
-
-\begin{description}
-\item[ctx\_allow]
-  the context switch allowance is similar to the `quantum'
-  in traditional schedulers.  It is the minimum time that
-  a scheduled domain will be allowed to run before being
-  pre-empted. 
-\end{description}
-
-\subsubsection{Per-domain parameters}
-
-\begin{description}
-\item[mcuadv]
-  the MCU (Minimum Charging Unit) advance determines the
-  proportional share of the CPU that a domain receives.  It
-  is set inversely proportionally to a domain's sharing weight.
-\item[warp]
-  the amount of `virtual time' the domain is allowed to warp
-  backwards
-\item[warpl]
-  the warp limit is the maximum time a domain can run warped for
-\item[warpu]
-  the unwarp requirement is the minimum time a domain must
-  run unwarped for before it can warp again
-\end{description}
-
-\subsection{Atropos}
-
-{\tt sched=atropos} \\
-
-Atropos is a soft real time scheduler.  It provides guarantees about
-absolute shares of the CPU, with a facility for sharing
-slack CPU time on a best-effort basis. It can provide timeliness
-guarantees for latency-sensitive domains.
-
-Every domain has an associated period and slice.  The domain should
-receive `slice' nanoseconds every `period' nanoseconds.  This allows
-the administrator to configure both the absolute share of the CPU a
-domain receives and the frequency with which it is scheduled. 
-
-%%  When
-%% domains unblock, their period is reduced to the value of the latency
-%% hint (the slice is scaled accordingly so that they still get the same
-%% proportion of the CPU).  For each subsequent period, the slice and
-%% period times are doubled until they reach their original values.
-
-Note: don't overcommit the CPU when using Atropos (i.e. don't reserve
-more CPU than is available --- the utilisation should be kept to
-slightly less than 100\% in order to ensure predictable behaviour).
-
-\subsubsection{Per-domain parameters}
-
-\begin{description}
-\item[period] The regular time interval during which a domain is
-  guaranteed to receive its allocation of CPU time.
-\item[slice]
-  The length of time per period that a domain is guaranteed to run
-  for (in the absence of voluntary yielding of the CPU). 
-\item[latency]
-  The latency hint is used to control how soon after
-  waking up a domain it should be scheduled.
-\item[xtratime] This is a boolean flag that specifies whether a domain
-  should be allowed a share of the system slack time.
-\end{description}
-
-\subsection{Round Robin}
-
-{\tt sched=rrobin} \\
-
-The round robin scheduler is included as a simple demonstration of
-Xen's internal scheduler API.  It is not intended for production use. 
-
-\subsubsection{Global Parameters}
-
-\begin{description}
-\item[rr\_slice]
-  The maximum time each domain runs before the next
-  scheduling decision is made.
-\end{description}
-
-
-
-
-
-
-
-
-
-
-
-
-\chapter{Build, Boot and Debug options} 
-
-This chapter describes the build- and boot-time options 
-which may be used to tailor your Xen system. 
-
-\section{Xen Build Options}
-
-Xen provides a number of build-time options which should be 
-set as environment variables or passed on make's command-line.  
-
-\begin{description} 
-\item[verbose=y] Enable debugging messages when Xen detects an unexpected 
condition.
-Also enables console output from all domains.
-\item[debug=y] 
-Enable debug assertions.  Implies {\bf verbose=y}.
-(Primarily useful for tracing bugs in Xen).       
-\item[debugger=y] 
-Enable the in-Xen debugger. This can be used to debug 
-Xen, guest OSes, and applications.
-\item[perfc=y] 
-Enable performance counters for significant events
-within Xen. The counts can be reset or displayed
-on Xen's console via console control keys.
-\item[trace=y] 
-Enable per-cpu trace buffers which log a range of
-events within Xen for collection by control
-software. 
-\end{description} 
-
-\section{Xen Boot Options}
-\label{s:xboot}
-
-These options are used to configure Xen's behaviour at runtime.  They
-should be appended to Xen's command line, either manually or by
-editing \path{grub.conf}.
-
-\begin{description}
-\item [noreboot ] 
- Don't reboot the machine automatically on errors.  This is
- useful to catch debug output if you aren't catching console messages
- via the serial line. 
-
-\item [nosmp ] 
- Disable SMP support.
- This option is implied by `ignorebiostables'. 
-
-\item [watchdog ] 
- Enable NMI watchdog which can report certain failures. 
-
-\item [noirqbalance ] 
- Disable software IRQ balancing and affinity. This can be used on
- systems such as Dell 1850/2850 that have workarounds in hardware for
- IRQ-routing issues.
-
-\item [badpage=$<$page number$>$,$<$page number$>$, \ldots ] 
- Specify a list of pages not to be allocated for use 
- because they contain bad bytes. For example, if your
- memory tester says that byte 0x12345678 is bad, you would
- place `badpage=0x12345' on Xen's command line. 
-
-\item [com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
- com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\ 
- Xen supports up to two 16550-compatible serial ports.
- For example: `com1=9600, 8n1, 0x408, 5' maps COM1 to a
- 9600-baud port, 8 data bits, no parity, 1 stop bit,
- I/O port base 0x408, IRQ 5.
- If some configuration options are standard (e.g., I/O base and IRQ),
- then only a prefix of the full configuration string need be
- specified. If the baud rate is pre-configured (e.g., by the
- bootloader) then you can specify `auto' in place of a numeric baud
- rate. 
-
-\item [console=$<$specifier list$>$ ] 
- Specify the destination for Xen console I/O.
- This is a comma-separated list of, for example:
-\begin{description}
- \item[vga]  use VGA console and allow keyboard input
- \item[com1] use serial port com1
- \item[com2H] use serial port com2. Transmitted chars will
-   have the MSB set. Received chars must have
-   MSB set.
- \item[com2L] use serial port com2. Transmitted chars will
-   have the MSB cleared. Received chars must
-   have MSB cleared.
-\end{description}
- The latter two examples allow a single port to be
- shared by two subsystems (e.g. console and
- debugger). Sharing is controlled by MSB of each
- transmitted/received character.
- [NB. Default for this option is `com1,vga'] 
-
-\item [sync\_console ]
- Force synchronous console output. This is useful if you system fails
- unexpectedly before it has sent all available output to the
- console. In most cases Xen will automatically enter synchronous mode
- when an exceptional event occurs, but this option provides a manual
- fallback.
-
-\item [conswitch=$<$switch-char$><$auto-switch-char$>$ ] 
- Specify how to switch serial-console input between
- Xen and DOM0. The required sequence is CTRL-$<$switch-char$>$
- pressed three times. Specifying the backtick character 
- disables switching.
- The $<$auto-switch-char$>$ specifies whether Xen should
- auto-switch input to DOM0 when it boots --- if it is `x'
- then auto-switching is disabled.  Any other value, or
- omitting the character, enables auto-switching.
- [NB. default switch-char is `a'] 
-
-\item [nmi=xxx ] 
- Specify what to do with an NMI parity or I/O error. \\
- `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
- `nmi=dom0':   Inform DOM0 of the NMI. \\
- `nmi=ignore': Ignore the NMI. 
-
-\item [mem=xxx ]
- Set the physical RAM address limit. Any RAM appearing beyond this
- physical address in the memory map will be ignored. This parameter
- may be specified with a B, K, M or G suffix, representing bytes,
- kilobytes, megabytes and gigabytes respectively. The
- default unit, if no suffix is specified, is kilobytes.
-
-\item [dom0\_mem=xxx ] 
- Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
- may be specified with a B, K, M or G suffix, representing bytes,
- kilobytes, megabytes and gigabytes respectively; if no suffix is specified, 
- the parameter defaults to kilobytes. In previous versions of Xen, suffixes
- were not supported and the value is always interpreted as kilobytes. 
-
-\item [tbuf\_size=xxx ] 
- Set the size of the per-cpu trace buffers, in pages
- (default 1).  Note that the trace buffers are only
- enabled in debug builds.  Most users can ignore
- this feature completely. 
-
-\item [sched=xxx ] 
- Select the CPU scheduler Xen should use.  The current
- possibilities are `bvt' (default), `atropos' and `rrobin'. 
- For more information see Section~\ref{s:sched}. 
-
-\item [apic\_verbosity=debug,verbose ]
- Print more detailed information about local APIC and IOAPIC configuration.
-
-\item [lapic ]
- Force use of local APIC even when left disabled by uniprocessor BIOS.
-
-\item [nolapic ]
- Ignore local APIC in a uniprocessor system, even if enabled by the BIOS.
-
-\item [apic=bigsmp,default,es7000,summit ]
- Specify NUMA platform. This can usually be probed automatically.
-
-\end{description} 
-
-In addition, the following options may be specified on the Xen command
-line. Since domain 0 shares responsibility for booting the platform,
-Xen will automatically propagate these options to its command
-line. These options are taken from Linux's command-line syntax with
-unchanged semantics.
-
-\begin{description}
-\item [acpi=off,force,strict,ht,noirq,\ldots ] 
- Modify how Xen (and domain 0) parses the BIOS ACPI tables.
-
-\item [acpi\_skip\_timer\_override ]
- Instruct Xen (and domain 0) to ignore timer-interrupt override
- instructions specified by the BIOS ACPI tables.
-
-\item [noapic ]
- Instruct Xen (and domain 0) to ignore any IOAPICs that are present in
- the system, and instead continue to use the legacy PIC.
-
-\end{description} 
-
-\section{XenLinux Boot Options}
-
-In addition to the standard Linux kernel boot options, we support: 
-\begin{description} 
-\item[xencons=xxx ] Specify the device node to which the Xen virtual
-console driver is attached. The following options are supported:
-\begin{center}
-\begin{tabular}{l}
-`xencons=off': disable virtual console \\ 
-`xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
-`xencons=ttyS': attach console to /dev/ttyS0
-\end{tabular}
-\end{center}
-The default is ttyS for dom0 and tty for all other domains.
-\end{description} 
-
-
-
-\section{Debugging}
-\label{s:keys} 
-
-Xen has a set of debugging features that can be useful to try and
-figure out what's going on. Hit 'h' on the serial line (if you
-specified a baud rate on the Xen command line) or ScrollLock-h on the
-keyboard to get a list of supported commands.
-
-If you have a crash you'll likely get a crash dump containing an EIP
-(PC) which, along with an \path{objdump -d image}, can be useful in
-figuring out what's happened.  Debug a Xenlinux image just as you
-would any other Linux kernel.
-
-%% We supply a handy debug terminal program which you can find in
-%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/}
-%% This should be built and executed on another machine that is connected
-%% via a null modem cable. Documentation is included.
-%% Alternatively, if the Xen machine is connected to a serial-port server
-%% then we supply a dumb TCP terminal client, {\tt xencons}.
-
-
+%% Chapter Control Software moved to control_software.tex
+\include{src/user/control_software}
+
+%% Chapter Domain Configuration moved to domain_configuration.tex
+\include{src/user/domain_configuration}
+
+%% Chapter Build, Boot and Debug Options moved to build.tex
+\include{src/user/build}
 
 
 \chapter{Further Support}
@@ -1875,6 +108,7 @@
 %Various HOWTOs are available in \path{docs/HOWTOS} but this content is
 %being integrated into this manual.
 
+
 \section{Online References}
 
 The official Xen web site is found at:
@@ -1884,6 +118,7 @@
 
 This contains links to the latest versions of all on-line 
 documentation (including the lateset version of the FAQ). 
+
 
 \section{Mailing Lists}
 
@@ -1905,326 +140,18 @@
 \end{description}
 
 
+
 \appendix
 
-
-\chapter{Installing Xen / XenLinux on Debian}
-
-The Debian project provides a tool called \path{debootstrap} which
-allows a base Debian system to be installed into a filesystem without
-requiring the host system to have any Debian-specific software (such
-as \path{apt}. 
-
-Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
-Xen domain:
-
-\begin{enumerate}
-\item Set up Xen 2.0 and test that it's working, as described earlier in
-      this manual.
-
-\item Create disk images for root-fs and swap (alternatively, you
-      might create dedicated partitions, LVM logical volumes, etc. if
-      that suits your setup).
-\begin{small}\begin{verbatim}  
-dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
-dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
-\end{verbatim}\end{small}
-      If you're going to use this filesystem / disk image only as a
-      `template' for other vm disk images, something like 300 MB should
-      be enough.. (of course it depends what kind of packages you are
-      planning to install to the template)
-
-\item Create the filesystem and initialise the swap image
-\begin{small}\begin{verbatim}
-mkfs.ext3 /path/diskimage
-mkswap /path/swapimage
-\end{verbatim}\end{small}
-
-\item Mount the disk image for installation
-\begin{small}\begin{verbatim}
-mount -o loop /path/diskimage /mnt/disk
-\end{verbatim}\end{small}
-
-\item Install \path{debootstrap}
-
-Make sure you have debootstrap installed on the host.  If you are
-running Debian sarge (3.1 / testing) or unstable you can install it by
-running \path{apt-get install debootstrap}.  Otherwise, it can be
-downloaded from the Debian project website.
-
-\item Install Debian base to the disk image:
-\begin{small}\begin{verbatim}
-debootstrap --arch i386 sarge /mnt/disk  \
-            http://ftp.<countrycode>.debian.org/debian
-\end{verbatim}\end{small}
-
-You can use any other Debian http/ftp mirror you want.
-
-\item When debootstrap completes successfully, modify settings:
-\begin{small}\begin{verbatim}
-chroot /mnt/disk /bin/bash
-\end{verbatim}\end{small}
-
-Edit the following files using vi or nano and make needed changes:
-\begin{small}\begin{verbatim}
-/etc/hostname
-/etc/hosts
-/etc/resolv.conf
-/etc/network/interfaces
-/etc/networks
-\end{verbatim}\end{small}
-
-Set up access to the services, edit:
-\begin{small}\begin{verbatim}
-/etc/hosts.deny
-/etc/hosts.allow
-/etc/inetd.conf
-\end{verbatim}\end{small}
-
-Add Debian mirror to:   
-\begin{small}\begin{verbatim}
-/etc/apt/sources.list
-\end{verbatim}\end{small}
-
-Create fstab like this:
-\begin{small}\begin{verbatim}
-/dev/sda1       /       ext3    errors=remount-ro       0       1
-/dev/sda2       none    swap    sw                      0       0
-proc            /proc   proc    defaults                0       0
-\end{verbatim}\end{small}
-
-Logout
-
-\item      Unmount the disk image
-\begin{small}\begin{verbatim}
-umount /mnt/disk
-\end{verbatim}\end{small}
-
-\item Create Xen 2.0 configuration file for the new domain. You can
-        use the example-configurations coming with Xen as a template.
-
-        Make sure you have the following set up:
-\begin{small}\begin{verbatim}
-disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
-root = "/dev/sda1 ro"
-\end{verbatim}\end{small}
-
-\item Start the new domain
-\begin{small}\begin{verbatim}
-xm create -f domain_config_file
-\end{verbatim}\end{small}
-
-Check that the new domain is running:
-\begin{small}\begin{verbatim}
-xm list
-\end{verbatim}\end{small}
-
-\item   Attach to the console of the new domain.
-        You should see something like this when starting the new domain:
-
-\begin{small}\begin{verbatim}
-Started domain testdomain2, console on port 9626
-\end{verbatim}\end{small}
-        
-        There you can see the ID of the console: 26. You can also list
-        the consoles with \path{xm consoles} (ID is the last two
-        digits of the port number.)
-
-        Attach to the console:
-
-\begin{small}\begin{verbatim}
-xm console 26
-\end{verbatim}\end{small}
-
-        or by telnetting to the port 9626 of localhost (the xm console
-        program works better).
-
-\item   Log in and run base-config
-
-        As a default there's no password for the root.
-
-        Check that everything looks OK, and the system started without
-        errors.  Check that the swap is active, and the network settings are
-        correct.
-
-        Run \path{/usr/sbin/base-config} to set up the Debian settings.
-
-        Set up the password for root using passwd.
-
-\item     Done. You can exit the console by pressing \path{Ctrl + ]}
-
-\end{enumerate}
-
-If you need to create new domains, you can just copy the contents of
-the `template'-image to the new disk images, either by mounting the
-template and the new image, and using \path{cp -a} or \path{tar} or by
-simply copying the image file.  Once this is done, modify the
-image-specific settings (hostname, network settings, etc).
-
-\chapter{Installing Xen / XenLinux on Redhat or Fedora Core}
-
-When using Xen / XenLinux on a standard Linux distribution there are
-a couple of things to watch out for:
-
-Note that, because domains>0 don't have any privileged access at all,
-certain commands in the default boot sequence will fail e.g. attempts
-to update the hwclock, change the console font, update the keytable
-map, start apmd (power management), or gpm (mouse cursor).  Either
-ignore the errors (they should be harmless), or remove them from the
-startup scripts.  Deleting the following links are a good start:
-{\path{S24pcmcia}}, {\path{S09isdn}},
-{\path{S17keytable}}, {\path{S26apmd}},
-{\path{S85gpm}}.
-
-If you want to use a single root file system that works cleanly for
-both domain 0 and unprivileged domains, a useful trick is to use
-different 'init' run levels. For example, use
-run level 3 for domain 0, and run level 4 for other domains. This
-enables different startup scripts to be run in depending on the run
-level number passed on the kernel command line.
-
-If using NFS root files systems mounted either from an
-external server or from domain0 there are a couple of other gotchas.
-The default {\path{/etc/sysconfig/iptables}} rules block NFS, so part
-way through the boot sequence things will suddenly go dead.
-
-If you're planning on having a separate NFS {\path{/usr}} partition, the
-RH9 boot scripts don't make life easy - they attempt to mount NFS file
-systems way to late in the boot process. The easiest way I found to do
-this was to have a {\path{/linuxrc}} script run ahead of
-{\path{/sbin/init}} that mounts {\path{/usr}}:
-
-\begin{quote}
-\begin{small}\begin{verbatim}
- #!/bin/bash
- /sbin/ipconfig lo 127.0.0.1
- /sbin/portmap
- /bin/mount /usr
- exec /sbin/init "$@" <>/dev/console 2>&1
-\end{verbatim}\end{small}
-\end{quote}
-
-%$ XXX SMH: font lock fix :-)  
-
-The one slight complication with the above is that
-{\path{/sbin/portmap}} is dynamically linked against
-{\path{/usr/lib/libwrap.so.0}} Since this is in
-{\path{/usr}}, it won't work. This can be solved by copying the
-file (and link) below the /usr mount point, and just let the file be
-'covered' when the mount happens.
-
-In some installations, where a shared read-only {\path{/usr}} is
-being used, it may be desirable to move other large directories over
-into the read-only {\path{/usr}}. For example, you might replace
-{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with
-links into {\path{/usr/root/bin}}, {\path{/usr/root/lib}}
-and {\path{/usr/root/sbin}} respectively. This creates other
-problems for running the {\path{/linuxrc}} script, requiring
-bash, portmap, mount, ifconfig, and a handful of other shared
-libraries to be copied below the mount point --- a simple
-statically-linked C program would solve this problem.
-
-
-
-
-\chapter{Glossary of Terms}
-
-\begin{description}
-\item[Atropos]             One of the CPU schedulers provided by Xen.
-                           Atropos provides domains with absolute shares
-                           of the CPU, with timeliness guarantees and a
-                           mechanism for sharing out `slack time'.
-
-\item[BVT]                 The BVT scheduler is used to give proportional
-                           fair shares of the CPU to domains.
-
-\item[Exokernel]           A minimal piece of privileged code, similar to
-                           a {\bf microkernel} but providing a more
-                           `hardware-like' interface to the tasks it
-                           manages.  This is similar to a paravirtualising
-                           VMM like {\bf Xen} but was designed as a new
-                           operating system structure, rather than
-                           specifically to run multiple conventional OSs.
-
-\item[Domain]              A domain is the execution context that
-                           contains a running {\bf virtual machine}.
-                           The relationship between virtual machines
-                           and domains on Xen is similar to that between
-                           programs and processes in an operating
-                           system: a virtual machine is a persistent
-                           entity that resides on disk (somewhat like
-                           a program).  When it is loaded for execution,
-                           it runs in a domain.  Each domain has a
-                           {\bf domain ID}.
-
-\item[Domain 0]            The first domain to be started on a Xen
-                           machine.  Domain 0 is responsible for managing
-                           the system.
-
-\item[Domain ID]           A unique identifier for a {\bf domain},
-                           analogous to a process ID in an operating
-                           system.
-
-\item[Full virtualisation] An approach to virtualisation which
-                           requires no modifications to the hosted
-                           operating system, providing the illusion of
-                           a complete system of real hardware devices.
-
-\item[Hypervisor]          An alternative term for {\bf VMM}, used
-                           because it means `beyond supervisor',
-                           since it is responsible for managing multiple
-                           `supervisor' kernels.
-
-\item[Live migration]      A technique for moving a running virtual
-                           machine to another physical host, without
-                           stopping it or the services running on it.
-
-\item[Microkernel]         A small base of code running at the highest
-                           hardware privilege level.  A microkernel is
-                           responsible for sharing CPU and memory (and
-                           sometimes other devices) between less
-                           privileged tasks running on the system.
-                           This is similar to a VMM, particularly a
-                           {\bf paravirtualising} VMM but typically
-                           addressing a different problem space and
-                           providing different kind of interface.
-
-\item[NetBSD/Xen]          A port of NetBSD to the Xen architecture.
-
-\item[Paravirtualisation]  An approach to virtualisation which requires
-                           modifications to the operating system in
-                           order to run in a virtual machine.  Xen
-                           uses paravirtualisation but preserves
-                           binary compatibility for user space
-                           applications.
-
-\item[Shadow pagetables]   A technique for hiding the layout of machine
-                           memory from a virtual machine's operating
-                           system.  Used in some {\bf VMMs} to provide
-                           the illusion of contiguous physical memory,
-                           in Xen this is used during
-                           {\bf live migration}.
-
-\item[Virtual Machine]     The environment in which a hosted operating
-                           system runs, providing the abstraction of a
-                           dedicated machine.  A virtual machine may
-                           be identical to the underlying hardware (as
-                           in {\bf full virtualisation}, or it may
-                           differ, as in {\bf paravirtualisation}.
-
-\item[VMM]                 Virtual Machine Monitor - the software that
-                           allows multiple virtual machines to be
-                           multiplexed on a single physical machine.
-
-\item[Xen]                 Xen is a paravirtualising virtual machine
-                           monitor, developed primarily by the
-                           Systems Research Group at the University
-                           of Cambridge Computer Laboratory.
-
-\item[XenLinux]            Official name for the port of the Linux kernel
-                           that runs on Xen.
-
-\end{description}
+%% Chapter Installing Xen / XenLinux on Debian moved to debian.tex
+\include{src/user/debian}
+
+%% Chapter Installing Xen on Red Hat moved to redhat.tex
+\include{src/user/redhat}
+
+
+%% Chapter Glossary of Terms moved to glossary.tex
+\include{src/user/glossary}
 
 
 \end{document}
diff -r 97dbd9524a7e -r 06d84bf87159 extras/mini-os/xenbus/xenbus_xs.c
--- a/extras/mini-os/xenbus/xenbus_xs.c Thu Sep 22 17:34:14 2005
+++ b/extras/mini-os/xenbus/xenbus_xs.c Thu Sep 22 17:42:01 2005
@@ -127,7 +127,7 @@
                return ERR_PTR(err);
 
        for (i = 0; i < num_vecs; i++) {
-               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
+               err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
                if (err)
                        return ERR_PTR(err);
        }
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig     Thu Sep 22 17:42:01 2005
@@ -73,6 +73,8 @@
 config XEN_TPMDEV_FRONTEND
         bool "TPM-device frontend driver"
         default n
+       select TCG_TPM
+       select TCG_XEN
         help
           The TPM-device frontend driver.
 
@@ -108,13 +110,6 @@
          network interfaces within another guest OS. Unless you are building a
          dedicated device-driver domain, or your master control domain
          (domain 0), then you almost certainly want to say Y here.
-
-config XEN_NETDEV_GRANT
-        bool "Grant table substrate for network drivers (DANGEROUS)"
-        default n
-        help
-          This introduces the use of grant tables as a data exhange mechanism
-          between the frontend and backend network drivers.
 
 config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
        bool "Pipelined transmitter (DANGEROUS)"
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64       Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Thu Sep 
22 17:42:01 2005
@@ -16,7 +16,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64       Thu Sep 
22 17:42:01 2005
@@ -16,7 +16,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32        Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -372,7 +371,7 @@
 #
 CONFIG_ISAPNP=y
 # CONFIG_PNPBIOS is not set
-CONFIG_PNPACPI=y
+# CONFIG_PNPACPI is not set
 
 #
 # Block devices
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64        Thu Sep 
22 17:42:01 2005
@@ -19,7 +19,6 @@
 # CONFIG_XEN_TPMDEV_BACKEND is not set
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
-CONFIG_XEN_NETDEV_GRANT=y
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Thu Sep 22 17:42:01 2005
@@ -45,12 +45,12 @@
        return 0;
 }
 
-int direct_remap_pfn_range(struct mm_struct *mm,
-                           unsigned long address, 
-                           unsigned long mfn,
-                           unsigned long size, 
-                           pgprot_t prot,
-                           domid_t  domid)
+static int __direct_remap_pfn_range(struct mm_struct *mm,
+                                   unsigned long address, 
+                                   unsigned long mfn,
+                                   unsigned long size, 
+                                   pgprot_t prot,
+                                   domid_t  domid)
 {
        int i;
        unsigned long start_address;
@@ -98,6 +98,20 @@
        return 0;
 }
 
+int direct_remap_pfn_range(struct vm_area_struct *vma,
+                          unsigned long address, 
+                          unsigned long mfn,
+                          unsigned long size, 
+                          pgprot_t prot,
+                          domid_t  domid)
+{
+       /* Same as remap_pfn_range(). */
+       vma->vm_flags |= VM_IO | VM_RESERVED;
+
+       return __direct_remap_pfn_range(
+               vma->vm_mm, address, mfn, size, prot, domid);
+}
+
 EXPORT_SYMBOL(direct_remap_pfn_range);
 
 
@@ -221,8 +235,9 @@
 #ifdef __x86_64__
        flags |= _PAGE_USER;
 #endif
-       if (direct_remap_pfn_range(&init_mm, (unsigned long) addr, 
phys_addr>>PAGE_SHIFT,
-                                   size, __pgprot(flags), domid)) {
+       if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
+                                    phys_addr>>PAGE_SHIFT,
+                                    size, __pgprot(flags), domid)) {
                vunmap((void __force *) addr);
                return NULL;
        }
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c     Thu Sep 22 17:42:01 2005
@@ -295,7 +295,7 @@
        /* Write-combine setting is ignored, it is changed via the mtrr
         * interfaces on this platform.
         */
-       if (direct_remap_pfn_range(vma->vm_mm, vma->vm_start, vma->vm_pgoff,
+       if (direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                                   vma->vm_end - vma->vm_start,
                                   vma->vm_page_prot, DOMID_IO))
                return -EAGAIN;
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/kernel/devmem.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/devmem.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/devmem.c     Thu Sep 22 17:42:01 2005
@@ -90,22 +90,10 @@
 
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
-       int uncached;
-
-       uncached = uncached_access(file);
-       if (uncached)
+       if (uncached_access(file))
                vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-       /* Don't try to swap out physical pages.. */
-       vma->vm_flags |= VM_RESERVED;
-
-       /*
-        * Don't dump addresses that are not real memory to a core file.
-        */
-       if (uncached)
-               vma->vm_flags |= VM_IO;
-
-       if (direct_remap_pfn_range(vma->vm_mm, vma->vm_start, vma->vm_pgoff,
+       if (direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                                   vma->vm_end - vma->vm_start,
                                   vma->vm_page_prot, DOMID_IO))
                return -EAGAIN;
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Thu Sep 22 17:42:01 2005
@@ -182,14 +182,14 @@
 }
 
 int
-gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
+gnttab_grant_foreign_transfer(domid_t domid)
 {
     int ref;
 
     if ( unlikely((ref = get_free_entry()) == -1) )
         return -ENOSPC;
 
-    shared[ref].frame = pfn;
+    shared[ref].frame = 0;
     shared[ref].domid = domid;
     wmb();
     shared[ref].flags = GTF_accept_transfer;
@@ -198,10 +198,9 @@
 }
 
 void
-gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
-                                 unsigned long pfn)
-{
-    shared[ref].frame = pfn;
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid)
+{
+    shared[ref].frame = 0;
     shared[ref].domid = domid;
     wmb();
     shared[ref].flags = GTF_accept_transfer;
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Sep 22 17:42:01 2005
@@ -334,7 +334,7 @@
        return;
     }
 
-    xenbus_write("control", "shutdown", "", O_CREAT);
+    xenbus_write("control", "shutdown", "");
 
     err = xenbus_transaction_end(0);
     if (err == -ETIMEDOUT) {
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/xen/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Sep 22 17:42:01 2005
@@ -1,4 +1,5 @@
 
+obj-y  += util.o
 
 obj-y  += console/
 obj-y  += evtchn/
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c        Thu Sep 22 
17:42:01 2005
@@ -362,7 +362,10 @@
                return;
        } 
         
-       set_new_target(new_target >> PAGE_SHIFT);
+       /* The given memory/target value is in KiB, so it needs converting to
+          pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+       */
+       set_new_target(new_target >> (PAGE_SHIFT - 10));
     
 }
 
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu Sep 22 
17:42:01 2005
@@ -28,12 +28,12 @@
 #define BATCH_PER_DOMAIN 16
 
 static unsigned long mmap_vstart;
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                                   \
-    (mmap_vstart +                                              \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
+#define MMAP_PAGES                                             \
+       (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg)                                          \
+       (mmap_vstart +                                                  \
+        ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
+        ((_seg) * PAGE_SIZE))
 
 /*
  * Each outstanding request that we've passed to the lower device layers has a 
@@ -42,12 +42,12 @@
  * response queued for it, with the saved 'id' passed back.
  */
 typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    atomic_t       pendcnt;
-    unsigned short operation;
-    int            status;
+       blkif_t       *blkif;
+       unsigned long  id;
+       int            nr_pages;
+       atomic_t       pendcnt;
+       unsigned short operation;
+       int            status;
 } pending_req_t;
 
 /*
@@ -68,14 +68,13 @@
 static request_queue_t *plugged_queue;
 static inline void flush_plugged_queue(void)
 {
-    request_queue_t *q = plugged_queue;
-    if ( q != NULL )
-    {
-        if ( q->unplug_fn != NULL )
-            q->unplug_fn(q);
-        blk_put_queue(q);
-        plugged_queue = NULL;
-    }
+       request_queue_t *q = plugged_queue;
+       if (q != NULL) {
+               if ( q->unplug_fn != NULL )
+                       q->unplug_fn(q);
+               blk_put_queue(q);
+               plugged_queue = NULL;
+       }
 }
 
 /* When using grant tables to map a frame for device access then the
@@ -106,24 +105,23 @@
 
 static void fast_flush_area(int idx, int nr_pages)
 {
-    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    unsigned int i, invcount = 0;
-    u16 handle;
-
-    for ( i = 0; i < nr_pages; i++ )
-    {
-        if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
-        {
-            unmap[i].host_addr      = MMAP_VADDR(idx, i);
-            unmap[i].dev_bus_addr   = 0;
-            unmap[i].handle         = handle;
-            pending_handle(idx, i)  = BLKBACK_INVALID_HANDLE;
-            invcount++;
-        }
-    }
-    if ( unlikely(HYPERVISOR_grant_table_op(
-                    GNTTABOP_unmap_grant_ref, unmap, invcount)))
-        BUG();
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       unsigned int i, invcount = 0;
+       u16 handle;
+
+       for (i = 0; i < nr_pages; i++) {
+               handle = pending_handle(idx, i);
+               if (handle == BLKBACK_INVALID_HANDLE)
+                       continue;
+               unmap[i].host_addr      = MMAP_VADDR(idx, i);
+               unmap[i].dev_bus_addr   = 0;
+               unmap[i].handle         = handle;
+               pending_handle(idx, i)  = BLKBACK_INVALID_HANDLE;
+               invcount++;
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, unmap, invcount));
 }
 
 
@@ -136,34 +134,38 @@
 
 static int __on_blkdev_list(blkif_t *blkif)
 {
-    return blkif->blkdev_list.next != NULL;
+       return blkif->blkdev_list.next != NULL;
 }
 
 static void remove_from_blkdev_list(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( !__on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = NULL;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (!__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (__on_blkdev_list(blkif)) {
+               list_del(&blkif->blkdev_list);
+               blkif->blkdev_list.next = NULL;
+               blkif_put(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 static void add_to_blkdev_list_tail(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( __on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
-    {
-        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
-        blkif_get(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
+               list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+               blkif_get(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 
@@ -175,54 +177,53 @@
 
 static int blkio_schedule(void *arg)
 {
-    DECLARE_WAITQUEUE(wq, current);
-
-    blkif_t          *blkif;
-    struct list_head *ent;
-
-    daemonize("xenblkd");
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&blkio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-             list_empty(&blkio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&blkio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                !list_empty(&blkio_schedule_list) )
-        {
-            ent = blkio_schedule_list.next;
-            blkif = list_entry(ent, blkif_t, blkdev_list);
-            blkif_get(blkif);
-            remove_from_blkdev_list(blkif);
-            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
-                add_to_blkdev_list_tail(blkif);
-            blkif_put(blkif);
-        }
-
-        /* Push the batch through to disc. */
-        flush_plugged_queue();
-    }
+       DECLARE_WAITQUEUE(wq, current);
+
+       blkif_t          *blkif;
+       struct list_head *ent;
+
+       daemonize("xenblkd");
+
+       for (;;) {
+               /* Wait for work to do. */
+               add_wait_queue(&blkio_schedule_wait, &wq);
+               set_current_state(TASK_INTERRUPTIBLE);
+               if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
+                    list_empty(&blkio_schedule_list) )
+                       schedule();
+               __set_current_state(TASK_RUNNING);
+               remove_wait_queue(&blkio_schedule_wait, &wq);
+
+               /* Queue up a batch of requests. */
+               while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+                      !list_empty(&blkio_schedule_list)) {
+                       ent = blkio_schedule_list.next;
+                       blkif = list_entry(ent, blkif_t, blkdev_list);
+                       blkif_get(blkif);
+                       remove_from_blkdev_list(blkif);
+                       if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
+                               add_to_blkdev_list_tail(blkif);
+                       blkif_put(blkif);
+               }
+
+               /* Push the batch through to disc. */
+               flush_plugged_queue();
+       }
 }
 
 static void maybe_trigger_blkio_schedule(void)
 {
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-         !list_empty(&blkio_schedule_list) )
-        wake_up(&blkio_schedule_wait);
+       /*
+        * Needed so that two processes, which together make the following
+        * predicate true, don't both read stale values and evaluate the
+        * predicate incorrectly. Incredibly unlikely to stall the scheduler
+        * on x86, but...
+        */
+       smp_mb();
+
+       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+           !list_empty(&blkio_schedule_list))
+               wake_up(&blkio_schedule_wait);
 }
 
 
@@ -233,36 +234,34 @@
 
 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
 {
-    unsigned long flags;
-
-    /* An error fails the entire request. */
-    if ( !uptodate )
-    {
-        DPRINTK("Buffer not up-to-date at end of operation\n");
-        pending_req->status = BLKIF_RSP_ERROR;
-    }
-
-    if ( atomic_dec_and_test(&pending_req->pendcnt) )
-    {
-        int pending_idx = pending_req - pending_reqs;
-        fast_flush_area(pending_idx, pending_req->nr_pages);
-        make_response(pending_req->blkif, pending_req->id,
-                      pending_req->operation, pending_req->status);
-        blkif_put(pending_req->blkif);
-        spin_lock_irqsave(&pend_prod_lock, flags);
-        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-        spin_unlock_irqrestore(&pend_prod_lock, flags);
-        maybe_trigger_blkio_schedule();
-    }
+       unsigned long flags;
+
+       /* An error fails the entire request. */
+       if (!uptodate) {
+               DPRINTK("Buffer not up-to-date at end of operation\n");
+               pending_req->status = BLKIF_RSP_ERROR;
+       }
+
+       if (atomic_dec_and_test(&pending_req->pendcnt)) {
+               int pending_idx = pending_req - pending_reqs;
+               fast_flush_area(pending_idx, pending_req->nr_pages);
+               make_response(pending_req->blkif, pending_req->id,
+                             pending_req->operation, pending_req->status);
+               blkif_put(pending_req->blkif);
+               spin_lock_irqsave(&pend_prod_lock, flags);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               spin_unlock_irqrestore(&pend_prod_lock, flags);
+               maybe_trigger_blkio_schedule();
+       }
 }
 
 static int end_block_io_op(struct bio *bio, unsigned int done, int error)
 {
-    if ( bio->bi_size != 0 )
-        return 1;
-    __end_block_io_op(bio->bi_private, !error);
-    bio_put(bio);
-    return error;
+       if (bio->bi_size != 0)
+               return 1;
+       __end_block_io_op(bio->bi_private, !error);
+       bio_put(bio);
+       return error;
 }
 
 
@@ -272,10 +271,10 @@
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
-    blkif_t *blkif = dev_id;
-    add_to_blkdev_list_tail(blkif);
-    maybe_trigger_blkio_schedule();
-    return IRQ_HANDLED;
+       blkif_t *blkif = dev_id;
+       add_to_blkdev_list_tail(blkif);
+       maybe_trigger_blkio_schedule();
+       return IRQ_HANDLED;
 }
 
 
@@ -286,183 +285,174 @@
 
 static int do_block_io_op(blkif_t *blkif, int max_to_do)
 {
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-    blkif_request_t *req;
-    RING_IDX i, rp;
-    int more_to_do = 0;
-
-    rp = blk_ring->sring->req_prod;
-    rmb(); /* Ensure we see queued requests up to 'rp'. */
-
-    for ( i = blk_ring->req_cons; 
-         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       blkif_request_t *req;
+       RING_IDX i, rp;
+       int more_to_do = 0;
+
+       rp = blk_ring->sring->req_prod;
+       rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+       for (i = blk_ring->req_cons; 
+            (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
+            i++) {
+               if ((max_to_do-- == 0) ||
+                   (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+                       more_to_do = 1;
+                       break;
+               }
         
-        req = RING_GET_REQUEST(blk_ring, i);
-        switch ( req->operation )
-        {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-            dispatch_rw_block_io(blkif, req);
-            break;
-
-        default:
-            DPRINTK("error: unknown block io operation [%d]\n",
-                    req->operation);
-            make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
-            break;
-        }
-    }
-
-    blk_ring->req_cons = i;
-    return more_to_do;
+               req = RING_GET_REQUEST(blk_ring, i);
+               switch (req->operation) {
+               case BLKIF_OP_READ:
+               case BLKIF_OP_WRITE:
+                       dispatch_rw_block_io(blkif, req);
+                       break;
+
+               default:
+                       DPRINTK("error: unknown block io operation [%d]\n",
+                               req->operation);
+                       make_response(blkif, req->id, req->operation,
+                                     BLKIF_RSP_ERROR);
+                       break;
+               }
+       }
+
+       blk_ring->req_cons = i;
+       return more_to_do;
 }
 
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
 {
-    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
-    int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
-    unsigned long fas = 0;
-    int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-    pending_req_t *pending_req;
-    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    struct phys_req preq;
-    struct { 
-        unsigned long buf; unsigned int nsec;
-    } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    unsigned int nseg;
-    struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    int nbio = 0;
-    request_queue_t *q;
-
-    /* Check that number of segments is sane. */
-    nseg = req->nr_segments;
-    if ( unlikely(nseg == 0) || 
-         unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
-    {
-        DPRINTK("Bad number of segments in request (%d)\n", nseg);
-        goto bad_descriptor;
-    }
-
-    preq.dev           = req->handle;
-    preq.sector_number = req->sector_number;
-    preq.nr_sects      = 0;
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        fas         = req->frame_and_sects[i];
-        seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
-
-        if ( seg[i].nsec <= 0 )
-            goto bad_descriptor;
-        preq.nr_sects += seg[i].nsec;
-
-        map[i].host_addr = MMAP_VADDR(pending_idx, i);
-        map[i].dom = blkif->domid;
-        map[i].ref = blkif_gref_from_fas(fas);
-        map[i].flags = GNTMAP_host_map;
-        if ( operation == WRITE )
-            map[i].flags |= GNTMAP_readonly;
-    }
-
-    if ( unlikely(HYPERVISOR_grant_table_op(
-                    GNTTABOP_map_grant_ref, map, nseg)))
-        BUG();
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        if ( unlikely(map[i].handle < 0) )
-        {
-            DPRINTK("invalid buffer -- could not remap it\n");
-            fast_flush_area(pending_idx, nseg);
-            goto bad_descriptor;
-        }
-
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
-
-        pending_handle(pending_idx, i) = map[i].handle;
-    }
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        fas         = req->frame_and_sects[i];
-        seg[i].buf  = map[i].dev_bus_addr | (blkif_first_sect(fas) << 9);
-    }
-
-    if ( vbd_translate(&preq, blkif, operation) != 0 )
-    {
-        DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
-                operation == READ ? "read" : "write", preq.sector_number,
-                preq.sector_number + preq.nr_sects, preq.dev); 
-        goto bad_descriptor;
-    }
-
-    pending_req = &pending_reqs[pending_idx];
-    pending_req->blkif     = blkif;
-    pending_req->id        = req->id;
-    pending_req->operation = operation;
-    pending_req->status    = BLKIF_RSP_OKAY;
-    pending_req->nr_pages  = nseg;
-
-    for ( i = 0; i < nseg; i++ )
-    {
-        if ( ((int)preq.sector_number|(int)seg[i].nsec) &
-             ((bdev_hardsect_size(preq.bdev) >> 9) - 1) )
-        {
-            DPRINTK("Misaligned I/O request from domain %d", blkif->domid);
-            goto cleanup_and_fail;
-        }
-
-        while ( (bio == NULL) ||
-                (bio_add_page(bio,
-                              virt_to_page(MMAP_VADDR(pending_idx, i)),
-                              seg[i].nsec << 9,
-                              seg[i].buf & ~PAGE_MASK) == 0) )
-        {
-            bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
-            if ( unlikely(bio == NULL) )
-            {
-            cleanup_and_fail:
-                for ( i = 0; i < (nbio-1); i++ )
-                    bio_put(biolist[i]);
-                fast_flush_area(pending_idx, nseg);
-                goto bad_descriptor;
-            }
+       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+       int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
+       unsigned long fas = 0;
+       int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+       pending_req_t *pending_req;
+       struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct phys_req preq;
+       struct { 
+               unsigned long buf; unsigned int nsec;
+       } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       unsigned int nseg;
+       struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       int nbio = 0;
+       request_queue_t *q;
+
+       /* Check that number of segments is sane. */
+       nseg = req->nr_segments;
+       if (unlikely(nseg == 0) || 
+           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+               DPRINTK("Bad number of segments in request (%d)\n", nseg);
+               goto bad_descriptor;
+       }
+
+       preq.dev           = req->handle;
+       preq.sector_number = req->sector_number;
+       preq.nr_sects      = 0;
+
+       for (i = 0; i < nseg; i++) {
+               fas         = req->frame_and_sects[i];
+               seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+
+               if (seg[i].nsec <= 0)
+                       goto bad_descriptor;
+               preq.nr_sects += seg[i].nsec;
+
+               map[i].host_addr = MMAP_VADDR(pending_idx, i);
+               map[i].dom = blkif->domid;
+               map[i].ref = blkif_gref_from_fas(fas);
+               map[i].flags = GNTMAP_host_map;
+               if ( operation == WRITE )
+                       map[i].flags |= GNTMAP_readonly;
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_map_grant_ref, map, nseg));
+
+       for (i = 0; i < nseg; i++) {
+               if (unlikely(map[i].handle < 0)) {
+                       DPRINTK("invalid buffer -- could not remap it\n");
+                       fast_flush_area(pending_idx, nseg);
+                       goto bad_descriptor;
+               }
+
+               phys_to_machine_mapping[__pa(MMAP_VADDR(
+                       pending_idx, i)) >> PAGE_SHIFT] =
+                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
+
+               pending_handle(pending_idx, i) = map[i].handle;
+       }
+
+       for (i = 0; i < nseg; i++) {
+               fas         = req->frame_and_sects[i];
+               seg[i].buf  = map[i].dev_bus_addr | 
+                       (blkif_first_sect(fas) << 9);
+       }
+
+       if (vbd_translate(&preq, blkif, operation) != 0) {
+               DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
+                       operation == READ ? "read" : "write",
+                       preq.sector_number,
+                       preq.sector_number + preq.nr_sects, preq.dev); 
+               goto bad_descriptor;
+       }
+
+       pending_req = &pending_reqs[pending_idx];
+       pending_req->blkif     = blkif;
+       pending_req->id        = req->id;
+       pending_req->operation = operation;
+       pending_req->status    = BLKIF_RSP_OKAY;
+       pending_req->nr_pages  = nseg;
+
+       for (i = 0; i < nseg; i++) {
+               if (((int)preq.sector_number|(int)seg[i].nsec) &
+                   ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
+                       DPRINTK("Misaligned I/O request from domain %d",
+                               blkif->domid);
+                       goto cleanup_and_fail;
+               }
+
+               while ((bio == NULL) ||
+                      (bio_add_page(bio,
+                                    virt_to_page(MMAP_VADDR(pending_idx, i)),
+                                    seg[i].nsec << 9,
+                                    seg[i].buf & ~PAGE_MASK) == 0)) {
+                       bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
+                       if (unlikely(bio == NULL)) {
+                       cleanup_and_fail:
+                               for (i = 0; i < (nbio-1); i++)
+                                       bio_put(biolist[i]);
+                               fast_flush_area(pending_idx, nseg);
+                               goto bad_descriptor;
+                       }
                 
-            bio->bi_bdev    = preq.bdev;
-            bio->bi_private = pending_req;
-            bio->bi_end_io  = end_block_io_op;
-            bio->bi_sector  = preq.sector_number;
-        }
-
-        preq.sector_number += seg[i].nsec;
-    }
-
-    if ( (q = bdev_get_queue(bio->bi_bdev)) != plugged_queue )
-    {
-        flush_plugged_queue();
-        blk_get_queue(q);
-        plugged_queue = q;
-    }
-
-    atomic_set(&pending_req->pendcnt, nbio);
-    pending_cons++;
-    blkif_get(blkif);
-
-    for ( i = 0; i < nbio; i++ )
-        submit_bio(operation, biolist[i]);
-
-    return;
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+                       bio->bi_sector  = preq.sector_number;
+               }
+
+               preq.sector_number += seg[i].nsec;
+       }
+
+       if ((q = bdev_get_queue(bio->bi_bdev)) != plugged_queue) {
+               flush_plugged_queue();
+               blk_get_queue(q);
+               plugged_queue = q;
+       }
+
+       atomic_set(&pending_req->pendcnt, nbio);
+       pending_cons++;
+       blkif_get(blkif);
+
+       for (i = 0; i < nbio; i++)
+               submit_bio(operation, biolist[i]);
+
+       return;
 
  bad_descriptor:
-    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+       make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 } 
 
 
@@ -475,66 +465,71 @@
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st)
 {
-    blkif_response_t *resp;
-    unsigned long     flags;
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
-    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    wmb(); /* Ensure other side can see the response fields. */
-    blk_ring->rsp_prod_pvt++;
-    RING_PUSH_RESPONSES(blk_ring);
-    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(blkif->evtchn);
+       blkif_response_t *resp;
+       unsigned long     flags;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+
+       /* Place on the response ring for the relevant domain. */ 
+       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+       resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+       wmb(); /* Ensure other side can see the response fields. */
+       blk_ring->rsp_prod_pvt++;
+       RING_PUSH_RESPONSES(blk_ring);
+       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+       /* Kick the relevant domain. */
+       notify_via_evtchn(blkif->evtchn);
 }
 
 void blkif_deschedule(blkif_t *blkif)
 {
-    remove_from_blkdev_list(blkif);
+       remove_from_blkdev_list(blkif);
 }
 
 static int __init blkif_init(void)
 {
-    int i;
-    struct page *page;
-
-    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
-         !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
-        return 0;
-
-    blkif_interface_init();
-
-    page = balloon_alloc_empty_page_range(MMAP_PAGES);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
+       int i;
+       struct page *page;
+
+       if (!(xen_start_info->flags & SIF_INITDOMAIN) &&
+           !(xen_start_info->flags & SIF_BLK_BE_DOMAIN))
+               return 0;
+
+       blkif_interface_init();
+
+       page = balloon_alloc_empty_page_range(MMAP_PAGES);
+       BUG_ON(page == NULL);
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       pending_cons = 0;
+       pending_prod = MAX_PENDING_REQS;
+       memset(pending_reqs, 0, sizeof(pending_reqs));
+       for (i = 0; i < MAX_PENDING_REQS; i++)
+               pending_ring[i] = i;
     
-    spin_lock_init(&blkio_schedule_list_lock);
-    INIT_LIST_HEAD(&blkio_schedule_list);
-
-    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-
-    blkif_xenbus_init();
-
-    memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
-#endif
-
-    return 0;
+       spin_lock_init(&blkio_schedule_list_lock);
+       INIT_LIST_HEAD(&blkio_schedule_list);
+
+       BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0);
+
+       blkif_xenbus_init();
+
+       memset(pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES);
+
+       return 0;
 }
 
 __initcall(blkif_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Sep 22 17:42:01 2005
@@ -17,6 +17,7 @@
 #include <asm-xen/xen-public/io/blkif.h>
 #include <asm-xen/xen-public/io/ring.h>
 #include <asm-xen/gnttab.h>
+#include <asm-xen/driver_util.h>
 
 #if 0
 #define ASSERT(_p) \
@@ -30,39 +31,39 @@
 #endif
 
 struct vbd {
-    blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
-    unsigned char  readonly;    /* Non-zero -> read-only */
-    unsigned char  type;        /* VDISK_xxx */
-    u32            pdevice;     /* phys device that this vbd maps to */
-    struct block_device *bdev;
+       blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
+       unsigned char  readonly;    /* Non-zero -> read-only */
+       unsigned char  type;        /* VDISK_xxx */
+       u32            pdevice;     /* phys device that this vbd maps to */
+       struct block_device *bdev;
 }; 
 
 typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t           domid;
-    unsigned int      handle;
-    /* Physical parameters of the comms window. */
-    unsigned long     shmem_frame;
-    unsigned int      evtchn;
-    unsigned int      remote_evtchn;
-    /* Comms information. */
-    blkif_back_ring_t blk_ring;
-    /* VBDs attached to this interface. */
-    struct vbd        vbd;
-    /* Private fields. */
-    enum { DISCONNECTED, CONNECTED } status;
+       /* Unique identifier for this interface. */
+       domid_t           domid;
+       unsigned int      handle;
+       /* Physical parameters of the comms window. */
+       unsigned int      evtchn;
+       unsigned int      remote_evtchn;
+       /* Comms information. */
+       blkif_back_ring_t blk_ring;
+       struct vm_struct *blk_ring_area;
+       /* VBDs attached to this interface. */
+       struct vbd        vbd;
+       /* Private fields. */
+       enum { DISCONNECTED, CONNECTED } status;
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    /* Is this a blktap frontend */
-    unsigned int     is_blktap;
+       /* Is this a blktap frontend */
+       unsigned int     is_blktap;
 #endif
-    struct list_head blkdev_list;
-    spinlock_t       blk_ring_lock;
-    atomic_t         refcnt;
+       struct list_head blkdev_list;
+       spinlock_t       blk_ring_lock;
+       atomic_t         refcnt;
 
-    struct work_struct free_work;
-    u16 shmem_handle;
-    unsigned long shmem_vaddr;
-    grant_ref_t shmem_ref;
+       struct work_struct free_work;
+
+       u16         shmem_handle;
+       grant_ref_t shmem_ref;
 } blkif_t;
 
 blkif_t *alloc_blkif(domid_t domid);
@@ -70,11 +71,11 @@
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
 
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define blkif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            free_blkif_callback(_b);             \
-    } while (0)
+#define blkif_put(_b)                                  \
+       do {                                            \
+               if (atomic_dec_and_test(&(_b)->refcnt)) \
+                       free_blkif_callback(_b);        \
+       } while (0)
 
 /* Create a vbd. */
 int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, u32 pdevice,
@@ -86,10 +87,10 @@
 unsigned long vbd_secsize(struct vbd *vbd);
 
 struct phys_req {
-    unsigned short       dev;
-    unsigned short       nr_sects;
-    struct block_device *bdev;
-    blkif_sector_t       sector_number;
+       unsigned short       dev;
+       unsigned short       nr_sects;
+       struct block_device *bdev;
+       blkif_sector_t       sector_number;
 };
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
@@ -103,3 +104,13 @@
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Thu Sep 22 
17:42:01 2005
@@ -13,131 +13,144 @@
 
 blkif_t *alloc_blkif(domid_t domid)
 {
-    blkif_t *blkif;
+       blkif_t *blkif;
 
-    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
-    if (!blkif)
-           return ERR_PTR(-ENOMEM);
+       blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+       if (!blkif)
+               return ERR_PTR(-ENOMEM);
 
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-    blkif->status = DISCONNECTED;
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 1);
+       memset(blkif, 0, sizeof(*blkif));
+       blkif->domid = domid;
+       blkif->status = DISCONNECTED;
+       spin_lock_init(&blkif->blk_ring_lock);
+       atomic_set(&blkif->refcnt, 1);
 
-    return blkif;
+       return blkif;
 }
 
-static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
-                            unsigned long shared_page)
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
 {
-    struct gnttab_map_grant_ref op;
-    op.host_addr = localaddr;
-    op.flags = GNTMAP_host_map;
-    op.ref = shared_page;
-    op.dom = blkif->domid;
+       struct gnttab_map_grant_ref op;
 
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+       op.host_addr = (unsigned long)blkif->blk_ring_area->addr;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = shared_page;
+       op.dom       = blkif->domid;
 
-    if (op.handle < 0) {
-       DPRINTK(" Grant table operation failure !\n");
-       return op.handle;
-    }
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 
-    blkif->shmem_ref = shared_page;
-    blkif->shmem_handle = op.handle;
-    blkif->shmem_vaddr = localaddr;
-    return 0;
+       if (op.handle < 0) {
+               DPRINTK(" Grant table operation failure !\n");
+               return op.handle;
+       }
+
+       blkif->shmem_ref = shared_page;
+       blkif->shmem_handle = op.handle;
+
+       return 0;
 }
 
 static void unmap_frontend_page(blkif_t *blkif)
 {
-    struct gnttab_unmap_grant_ref op;
+       struct gnttab_unmap_grant_ref op;
 
-    op.host_addr = blkif->shmem_vaddr;
-    op.handle = blkif->shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       op.host_addr    = (unsigned long)blkif->blk_ring_area->addr;
+       op.handle       = blkif->shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 }
 
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
 {
-    struct vm_struct *vma;
-    blkif_sring_t *sring;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
+       blkif_sring_t *sring;
+       evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+       int err;
 
-    BUG_ON(blkif->remote_evtchn);
+       BUG_ON(blkif->remote_evtchn);
 
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-       return -ENOMEM;
+       if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
+               return -ENOMEM;
 
-    err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
+       err = map_frontend_page(blkif, shared_page);
+       if (err) {
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = blkif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_page(blkif);
-       vfree(vma->addr);
-       return err;
-    }
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = blkif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    blkif->evtchn = op.u.bind_interdomain.port1;
-    blkif->remote_evtchn = evtchn;
+       blkif->evtchn = op.u.bind_interdomain.port1;
+       blkif->remote_evtchn = evtchn;
 
-    sring = (blkif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+       sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
+       SHARED_RING_INIT(sring);
+       BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
 
-    bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
-                             blkif);
-    blkif->status        = CONNECTED;
-    blkif->shmem_frame   = shared_page;
+       bind_evtchn_to_irqhandler(
+               blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
+       blkif->status = CONNECTED;
 
-    return 0;
+       return 0;
 }
 
 static void free_blkif(void *arg)
 {
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    blkif_t *blkif = (blkif_t *)arg;
+       evtchn_op_t op = { .cmd = EVTCHNOP_close };
+       blkif_t *blkif = (blkif_t *)arg;
 
-    op.u.close.port = blkif->evtchn;
-    op.u.close.dom = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = blkif->remote_evtchn;
-    op.u.close.dom = blkif->domid;
-    HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->remote_evtchn;
+       op.u.close.dom = blkif->domid;
+       HYPERVISOR_event_channel_op(&op);
 
-    vbd_free(&blkif->vbd);
+       vbd_free(&blkif->vbd);
 
-    if (blkif->evtchn)
-        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+       if (blkif->evtchn)
+               unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
 
-    if (blkif->blk_ring.sring) {
-       unmap_frontend_page(blkif);
-       vfree(blkif->blk_ring.sring);
-       blkif->blk_ring.sring = NULL;
-    }
+       if (blkif->blk_ring.sring) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
+       }
 
-    kmem_cache_free(blkif_cachep, blkif);
+       kmem_cache_free(blkif_cachep, blkif);
 }
 
 void free_blkif_callback(blkif_t *blkif)
 {
-    INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
-    schedule_work(&blkif->free_work);
+       INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+       schedule_work(&blkif->free_work);
 }
 
 void __init blkif_interface_init(void)
 {
-    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
-                                     0, 0, NULL, NULL);
+       blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
+                                        0, 0, NULL, NULL);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Thu Sep 22 17:42:01 2005
@@ -11,10 +11,10 @@
 
 static inline dev_t vbd_map_devnum(u32 cookie)
 {
-    return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
+       return MKDEV(BLKIF_MAJOR(cookie), BLKIF_MINOR(cookie));
 }
-#define vbd_sz(_v)   ((_v)->bdev->bd_part ? \
-    (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
+#define vbd_sz(_v)   ((_v)->bdev->bd_part ?                            \
+       (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
 #define bdev_put(_b) blkdev_put(_b)
 
 unsigned long vbd_size(struct vbd *vbd)
@@ -35,63 +35,73 @@
 int vbd_create(blkif_t *blkif, blkif_vdev_t handle,
               u32 pdevice, int readonly)
 {
-    struct vbd *vbd;
+       struct vbd *vbd;
 
-    vbd = &blkif->vbd;
-    vbd->handle   = handle; 
-    vbd->readonly = readonly;
-    vbd->type     = 0;
+       vbd = &blkif->vbd;
+       vbd->handle   = handle; 
+       vbd->readonly = readonly;
+       vbd->type     = 0;
 
-    vbd->pdevice  = pdevice;
+       vbd->pdevice  = pdevice;
 
-    vbd->bdev = open_by_devnum(
-        vbd_map_devnum(vbd->pdevice),
-        vbd->readonly ? FMODE_READ : FMODE_WRITE);
-    if ( IS_ERR(vbd->bdev) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        return -ENOENT;
-    }
+       vbd->bdev = open_by_devnum(
+               vbd_map_devnum(vbd->pdevice),
+               vbd->readonly ? FMODE_READ : FMODE_WRITE);
+       if (IS_ERR(vbd->bdev)) {
+               DPRINTK("vbd_creat: device %08x doesn't exist.\n",
+                       vbd->pdevice);
+               return -ENOENT;
+       }
 
-    if ( (vbd->bdev->bd_disk == NULL) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-       vbd_free(vbd);
-        return -ENOENT;
-    }
+       if (vbd->bdev->bd_disk == NULL) {
+               DPRINTK("vbd_creat: device %08x doesn't exist.\n",
+                       vbd->pdevice);
+               vbd_free(vbd);
+               return -ENOENT;
+       }
 
-    if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
-        vbd->type |= VDISK_CDROM;
-    if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
-        vbd->type |= VDISK_REMOVABLE;
+       if (vbd->bdev->bd_disk->flags & GENHD_FL_CD)
+               vbd->type |= VDISK_CDROM;
+       if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
+               vbd->type |= VDISK_REMOVABLE;
 
-    DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
-            handle, blkif->domid);
-    return 0;
+       DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+               handle, blkif->domid);
+       return 0;
 }
 
 void vbd_free(struct vbd *vbd)
 {
-    if (vbd->bdev)
-       bdev_put(vbd->bdev);
-    vbd->bdev = NULL;
+       if (vbd->bdev)
+               bdev_put(vbd->bdev);
+       vbd->bdev = NULL;
 }
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
 {
-    struct vbd *vbd = &blkif->vbd;
-    int rc = -EACCES;
+       struct vbd *vbd = &blkif->vbd;
+       int rc = -EACCES;
 
-    if ((operation == WRITE) && vbd->readonly)
-        goto out;
+       if ((operation == WRITE) && vbd->readonly)
+               goto out;
 
-    if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
-        goto out;
+       if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
+               goto out;
 
-    req->dev  = vbd->pdevice;
-    req->bdev = vbd->bdev;
-    rc = 0;
+       req->dev  = vbd->pdevice;
+       req->bdev = vbd->bdev;
+       rc = 0;
 
  out:
-    return rc;
+       return rc;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Sep 22 17:42:01 2005
@@ -124,7 +124,7 @@
 
        return;
 
-abort:
+ abort:
        xenbus_transaction_end(1);
 }
 
@@ -228,6 +228,7 @@
        be->dev = dev;
        be->backend_watch.node = dev->nodename;
        be->backend_watch.callback = backend_changed;
+       /* Will implicitly call backend_changed once. */
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
@@ -249,8 +250,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return 0;
 
  free_be:
@@ -279,3 +278,13 @@
 {
        xenbus_register_backend(&blkback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Sep 22 17:42:01 2005
@@ -146,4 +146,15 @@
 int xlvbd_add(blkif_sector_t capacity, int device,
              u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
 void xlvbd_del(struct blkfront_info *info);
+
 #endif /* __XEN_DRIVERS_BLOCK_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu Sep 22 17:42:01 2005
@@ -65,7 +65,7 @@
 };
 
 static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
-                                         NUM_VBD_MAJORS];
+                                        NUM_VBD_MAJORS];
 
 #define XLBD_MAJOR_IDE_START   0
 #define XLBD_MAJOR_SCSI_START  (NUM_IDE_MAJORS)
@@ -309,3 +309,13 @@
 
        bdput(bd);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c  Thu Sep 22 17:42:01 2005
@@ -4,7 +4,6 @@
  * This is a modified version of the block backend driver that remaps requests
  * to a user-space memory region.  It is intended to be used to write 
  * application-level servers that provide block interfaces to client VMs.
- * 
  */
 
 #include <linux/kernel.h>
@@ -67,20 +66,19 @@
 
 static inline int BLKTAP_MODE_VALID(unsigned long arg)
 {
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) );
+       return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
+               (arg == BLKTAP_MODE_INTERCEPT_FE) ||
+               (arg == BLKTAP_MODE_INTERPOSE   ));
 /*
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
+  return (
+  ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
+  ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+  ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+  ( arg == BLKTAP_MODE_INTERPOSE    ) ||
+  ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+  ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+  ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+  );
 */
 }
 
@@ -110,14 +108,12 @@
 unsigned long rings_vstart; /* start of mmaped vma               */
 unsigned long user_vstart;  /* start of user mappings            */
 
-#define MMAP_PAGES                                              \
-    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#define MMAP_VADDR(_start, _req,_seg)                           \
-    (_start +                                                   \
-     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
-     ((_seg) * PAGE_SIZE))
-
-
+#define MMAP_PAGES                                             \
+       (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_start, _req,_seg)                                  \
+       (_start +                                                       \
+        ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
+        ((_seg) * PAGE_SIZE))
 
 /*
  * Each outstanding request that we've passed to the lower device layers has a 
@@ -126,12 +122,12 @@
  * response queued for it, with the saved 'id' passed back.
  */
 typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    atomic_t       pendcnt;
-    unsigned short operation;
-    int            status;
+       blkif_t       *blkif;
+       unsigned long  id;
+       int            nr_pages;
+       atomic_t       pendcnt;
+       unsigned short operation;
+       int            status;
 } pending_req_t;
 
 /*
@@ -156,17 +152,17 @@
 
 static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
 {
-    return ( (fe_dom << 16) | MASK_PEND_IDX(idx) );
+       return ((fe_dom << 16) | MASK_PEND_IDX(idx));
 }
 
 extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) 
 { 
-    return (PEND_RING_IDX)( id & 0x0000ffff );
+       return (PEND_RING_IDX)(id & 0x0000ffff);
 }
 
 extern inline domid_t ID_TO_DOM(unsigned long id) 
 { 
-    return (domid_t)(id >> 16); 
+       return (domid_t)(id >> 16); 
 }
 
 
@@ -181,8 +177,8 @@
  */
 struct grant_handle_pair
 {
-    u16  kernel;
-    u16  user;
+       u16  kernel;
+       u16  user;
 };
 static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
 #define pending_handle(_idx, _i) \
@@ -199,21 +195,20 @@
  */
 
 static struct page *blktap_nopage(struct vm_area_struct *vma,
-                                             unsigned long address,
-                                             int *type)
-{
-    /*
-     * if the page has not been mapped in by the driver then generate
-     * a SIGBUS to the domain.
-     */
-
-    force_sig(SIGBUS, current);
-
-    return 0;
+                                 unsigned long address,
+                                 int *type)
+{
+       /*
+        * if the page has not been mapped in by the driver then generate
+        * a SIGBUS to the domain.
+        */
+       force_sig(SIGBUS, current);
+
+       return 0;
 }
 
 struct vm_operations_struct blktap_vm_ops = {
-    nopage:   blktap_nopage,
+       nopage:   blktap_nopage,
 };
 
 /******************************************************************
@@ -222,44 +217,45 @@
 
 static int blktap_open(struct inode *inode, struct file *filp)
 {
-    blkif_sring_t *sring;
+       blkif_sring_t *sring;
+
+       if (test_and_set_bit(0, &blktap_dev_inuse))
+               return -EBUSY;
     
-    if ( test_and_set_bit(0, &blktap_dev_inuse) )
-        return -EBUSY;
+       /* Allocate the fe ring. */
+       sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+       if (sring == NULL)
+               goto fail_nomem;
+
+       SetPageReserved(virt_to_page(sring));
     
-    /* Allocate the fe ring. */
-    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
-    if (sring == NULL)
-        goto fail_nomem;
-
-    SetPageReserved(virt_to_page(sring));
-    
-    SHARED_RING_INIT(sring);
-    FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
-
-    return 0;
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
+
+       return 0;
 
  fail_nomem:
-    return -ENOMEM;
+       return -ENOMEM;
 }
 
 static int blktap_release(struct inode *inode, struct file *filp)
 {
-    blktap_dev_inuse = 0;
-    blktap_ring_ok = 0;
-
-    /* Free the ring page. */
-    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
-    free_page((unsigned long) blktap_ufe_ring.sring);
-
-    /* Clear any active mappings and free foreign map table */
-    if (blktap_vma != NULL) {
-        zap_page_range(blktap_vma, blktap_vma->vm_start, 
-                       blktap_vma->vm_end - blktap_vma->vm_start, NULL);
-        blktap_vma = NULL;
-    }
-
-    return 0;
+       blktap_dev_inuse = 0;
+       blktap_ring_ok = 0;
+
+       /* Free the ring page. */
+       ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+       free_page((unsigned long) blktap_ufe_ring.sring);
+
+       /* Clear any active mappings and free foreign map table */
+       if (blktap_vma != NULL) {
+               zap_page_range(
+                       blktap_vma, blktap_vma->vm_start, 
+                       blktap_vma->vm_end - blktap_vma->vm_start, NULL);
+               blktap_vma = NULL;
+       }
+
+       return 0;
 }
 
 
@@ -283,128 +279,124 @@
  */
 static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-    int size;
-    struct page **map;
-    int i;
-
-    DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
-           vma->vm_start, vma->vm_end);
-
-    vma->vm_flags |= VM_RESERVED;
-    vma->vm_ops = &blktap_vm_ops;
-
-    size = vma->vm_end - vma->vm_start;
-    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
-        printk(KERN_INFO 
-               "blktap: you _must_ map exactly %d pages!\n",
-               MMAP_PAGES + RING_PAGES);
-        return -EAGAIN;
-    }
-
-    size >>= PAGE_SHIFT;
-    DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+       int size;
+       struct page **map;
+       int i;
+
+       DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+               vma->vm_start, vma->vm_end);
+
+       vma->vm_flags |= VM_RESERVED;
+       vma->vm_ops = &blktap_vm_ops;
+
+       size = vma->vm_end - vma->vm_start;
+       if (size != ((MMAP_PAGES + RING_PAGES) << PAGE_SHIFT)) {
+               printk(KERN_INFO 
+                      "blktap: you _must_ map exactly %d pages!\n",
+                      MMAP_PAGES + RING_PAGES);
+               return -EAGAIN;
+       }
+
+       size >>= PAGE_SHIFT;
+       DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
     
-    rings_vstart = vma->vm_start;
-    user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+       rings_vstart = vma->vm_start;
+       user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
     
-    /* Map the ring pages to the start of the region and reserve it. */
-
-    /* not sure if I really need to do this... */
-    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-    if (remap_pfn_range(vma, vma->vm_start, 
-                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) 
-    {
-        WPRINTK("Mapping user ring failed!\n");
-        goto fail;
-    }
-
-    /* Mark this VM as containing foreign pages, and set up mappings. */
-    map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
-                  * sizeof(struct page_struct*),
-                  GFP_KERNEL);
-    if (map == NULL) 
-    {
-        WPRINTK("Couldn't alloc VM_FOREIGH map.\n");
-        goto fail;
-    }
-
-    for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
-        map[i] = NULL;
+       /* Map the ring pages to the start of the region and reserve it. */
+
+       /* not sure if I really need to do this... */
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       if (remap_pfn_range(vma, vma->vm_start, 
+                           __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
+                           PAGE_SIZE, vma->vm_page_prot)) {
+               WPRINTK("Mapping user ring failed!\n");
+               goto fail;
+       }
+
+       /* Mark this VM as containing foreign pages, and set up mappings. */
+       map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+                     * sizeof(struct page_struct*),
+                     GFP_KERNEL);
+       if (map == NULL) {
+               WPRINTK("Couldn't alloc VM_FOREIGH map.\n");
+               goto fail;
+       }
+
+       for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
+               map[i] = NULL;
     
-    vma->vm_private_data = map;
-    vma->vm_flags |= VM_FOREIGN;
-
-    blktap_vma = vma;
-    blktap_ring_ok = 1;
-
-    return 0;
+       vma->vm_private_data = map;
+       vma->vm_flags |= VM_FOREIGN;
+
+       blktap_vma = vma;
+       blktap_ring_ok = 1;
+
+       return 0;
  fail:
-    /* Clear any active mappings. */
-    zap_page_range(vma, vma->vm_start, 
-                   vma->vm_end - vma->vm_start, NULL);
-
-    return -ENOMEM;
+       /* Clear any active mappings. */
+       zap_page_range(vma, vma->vm_start, 
+                      vma->vm_end - vma->vm_start, NULL);
+
+       return -ENOMEM;
 }
 
 static int blktap_ioctl(struct inode *inode, struct file *filp,
                         unsigned int cmd, unsigned long arg)
 {
-    switch(cmd) {
-    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
-        return blktap_read_ufe_ring();
-
-    case BLKTAP_IOCTL_SETMODE:
-        if (BLKTAP_MODE_VALID(arg)) {
-            blktap_mode = arg;
-            /* XXX: may need to flush rings here. */
-            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
-            return 0;
-        }
-    case BLKTAP_IOCTL_PRINT_IDXS:
+       switch(cmd) {
+       case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+               return blktap_read_ufe_ring();
+
+       case BLKTAP_IOCTL_SETMODE:
+               if (BLKTAP_MODE_VALID(arg)) {
+                       blktap_mode = arg;
+                       /* XXX: may need to flush rings here. */
+                       printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+                       return 0;
+               }
+       case BLKTAP_IOCTL_PRINT_IDXS:
         {
-            //print_fe_ring_idxs();
-            WPRINTK("User Rings: \n-----------\n");
-            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
-                            "| req_prod: %2d, rsp_prod: %2d\n",
-                            blktap_ufe_ring.rsp_cons,
-                            blktap_ufe_ring.req_prod_pvt,
-                            blktap_ufe_ring.sring->req_prod,
-                            blktap_ufe_ring.sring->rsp_prod);
+               //print_fe_ring_idxs();
+               WPRINTK("User Rings: \n-----------\n");
+               WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+                       "| req_prod: %2d, rsp_prod: %2d\n",
+                       blktap_ufe_ring.rsp_cons,
+                       blktap_ufe_ring.req_prod_pvt,
+                       blktap_ufe_ring.sring->req_prod,
+                       blktap_ufe_ring.sring->rsp_prod);
             
         }
-    }
-    return -ENOIOCTLCMD;
+       }
+       return -ENOIOCTLCMD;
 }
 
 static unsigned int blktap_poll(struct file *file, poll_table *wait)
 {
-        poll_wait(file, &blktap_wait, wait);
-        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ) 
-        {
-            flush_tlb_all();
-
-            RING_PUSH_REQUESTS(&blktap_ufe_ring);
-            return POLLIN | POLLRDNORM;
-        }
-
-        return 0;
+       poll_wait(file, &blktap_wait, wait);
+       if (RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)) {
+               flush_tlb_all();
+               RING_PUSH_REQUESTS(&blktap_ufe_ring);
+               return POLLIN | POLLRDNORM;
+       }
+
+       return 0;
 }
 
 void blktap_kick_user(void)
 {
-    /* blktap_ring->req_prod = blktap_req_prod; */
-    wake_up_interruptible(&blktap_wait);
+       /* blktap_ring->req_prod = blktap_req_prod; */
+       wake_up_interruptible(&blktap_wait);
 }
 
 static struct file_operations blktap_fops = {
-    owner:    THIS_MODULE,
-    poll:     blktap_poll,
-    ioctl:    blktap_ioctl,
-    open:     blktap_open,
-    release:  blktap_release,
-    mmap:     blktap_mmap,
+       owner:    THIS_MODULE,
+       poll:     blktap_poll,
+       ioctl:    blktap_ioctl,
+       open:     blktap_open,
+       release:  blktap_release,
+       mmap:     blktap_mmap,
 };
 
 
@@ -417,44 +409,44 @@
 
 static void fast_flush_area(int idx, int nr_pages)
 {
-    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
-    unsigned int i, op = 0;
-    struct grant_handle_pair *handle;
-    unsigned long ptep;
-
-    for (i=0; i<nr_pages; i++)
-    {
-        handle = &pending_handle(idx, i);
-        if (!BLKTAP_INVALID_HANDLE(handle))
-        {
-
-            unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
-            unmap[op].dev_bus_addr = 0;
-            unmap[op].handle = handle->kernel;
-            op++;
-
-            if (create_lookup_pte_addr(blktap_vma->vm_mm,
-                                       MMAP_VADDR(user_vstart, idx, i), 
-                                       &ptep) !=0) {
-                DPRINTK("Couldn't get a pte addr!\n");
-                return;
-            }
-            unmap[op].host_addr    = ptep;
-            unmap[op].dev_bus_addr = 0;
-            unmap[op].handle       = handle->user;
-            op++;
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+       unsigned int i, op = 0;
+       struct grant_handle_pair *handle;
+       unsigned long ptep;
+
+       for ( i = 0; i < nr_pages; i++)
+       {
+               handle = &pending_handle(idx, i);
+               if (BLKTAP_INVALID_HANDLE(handle))
+                       continue;
+
+               unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
+               unmap[op].dev_bus_addr = 0;
+               unmap[op].handle = handle->kernel;
+               op++;
+
+               if (create_lookup_pte_addr(
+                       blktap_vma->vm_mm,
+                       MMAP_VADDR(user_vstart, idx, i), 
+                       &ptep) !=0) {
+                       DPRINTK("Couldn't get a pte addr!\n");
+                       return;
+               }
+               unmap[op].host_addr    = ptep;
+               unmap[op].dev_bus_addr = 0;
+               unmap[op].handle       = handle->user;
+               op++;
             
-           BLKTAP_INVALIDATE_HANDLE(handle);
-        }
-    }
-    if ( unlikely(HYPERVISOR_grant_table_op(
-        GNTTABOP_unmap_grant_ref, unmap, op)))
-        BUG();
-
-    if (blktap_vma != NULL)
-        zap_page_range(blktap_vma, 
-                       MMAP_VADDR(user_vstart, idx, 0), 
-                       nr_pages << PAGE_SHIFT, NULL);
+               BLKTAP_INVALIDATE_HANDLE(handle);
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, unmap, op));
+
+       if (blktap_vma != NULL)
+               zap_page_range(blktap_vma, 
+                              MMAP_VADDR(user_vstart, idx, 0), 
+                              nr_pages << PAGE_SHIFT, NULL);
 }
 
 /******************************************************************
@@ -466,34 +458,38 @@
 
 static int __on_blkdev_list(blkif_t *blkif)
 {
-    return blkif->blkdev_list.next != NULL;
+       return blkif->blkdev_list.next != NULL;
 }
 
 static void remove_from_blkdev_list(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( !__on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = NULL;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (!__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (__on_blkdev_list(blkif)) {
+               list_del(&blkif->blkdev_list);
+               blkif->blkdev_list.next = NULL;
+               blkif_put(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 static void add_to_blkdev_list_tail(blkif_t *blkif)
 {
-    unsigned long flags;
-    if ( __on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
-    {
-        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
-        blkif_get(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+       unsigned long flags;
+
+       if (__on_blkdev_list(blkif))
+               return;
+
+       spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+       if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
+               list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+               blkif_get(blkif);
+       }
+       spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
 }
 
 
@@ -505,51 +501,50 @@
 
 static int blkio_schedule(void *arg)
 {
-    DECLARE_WAITQUEUE(wq, current);
-
-    blkif_t          *blkif;
-    struct list_head *ent;
-
-    daemonize("xenblkd");
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&blkio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-             list_empty(&blkio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&blkio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                !list_empty(&blkio_schedule_list) )
-        {
-            ent = blkio_schedule_list.next;
-            blkif = list_entry(ent, blkif_t, blkdev_list);
-            blkif_get(blkif);
-            remove_from_blkdev_list(blkif);
-            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
-                add_to_blkdev_list_tail(blkif);
-            blkif_put(blkif);
-        }
-    }
+       DECLARE_WAITQUEUE(wq, current);
+
+       blkif_t          *blkif;
+       struct list_head *ent;
+
+       daemonize("xenblkd");
+
+       for (;;) {
+               /* Wait for work to do. */
+               add_wait_queue(&blkio_schedule_wait, &wq);
+               set_current_state(TASK_INTERRUPTIBLE);
+               if ((NR_PENDING_REQS == MAX_PENDING_REQS) || 
+                   list_empty(&blkio_schedule_list))
+                       schedule();
+               __set_current_state(TASK_RUNNING);
+               remove_wait_queue(&blkio_schedule_wait, &wq);
+
+               /* Queue up a batch of requests. */
+               while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+                      !list_empty(&blkio_schedule_list)) {
+                       ent = blkio_schedule_list.next;
+                       blkif = list_entry(ent, blkif_t, blkdev_list);
+                       blkif_get(blkif);
+                       remove_from_blkdev_list(blkif);
+                       if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
+                               add_to_blkdev_list_tail(blkif);
+                       blkif_put(blkif);
+               }
+       }
 }
 
 static void maybe_trigger_blkio_schedule(void)
 {
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-         !list_empty(&blkio_schedule_list) )
-        wake_up(&blkio_schedule_wait);
+       /*
+        * Needed so that two processes, who together make the following
+        * predicate true, don't both read stale values and evaluate the
+        * predicate incorrectly. Incredibly unlikely to stall the scheduler
+        * on the x86, but...
+        */
+       smp_mb();
+
+       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+           !list_empty(&blkio_schedule_list))
+               wake_up(&blkio_schedule_wait);
 }
 
 
@@ -561,54 +556,53 @@
 
 static int blktap_read_ufe_ring(void)
 {
-    /* This is called to read responses from the UFE ring. */
-
-    RING_IDX i, j, rp;
-    blkif_response_t *resp;
-    blkif_t *blkif;
-    int pending_idx;
-    pending_req_t *pending_req;
-    unsigned long     flags;
-
-    /* if we are forwarding from UFERring to FERing */
-    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
-
-        /* for each outstanding message on the UFEring  */
-        rp = blktap_ufe_ring.sring->rsp_prod;
-        rmb();
+       /* This is called to read responses from the UFE ring. */
+
+       RING_IDX i, j, rp;
+       blkif_response_t *resp;
+       blkif_t *blkif;
+       int pending_idx;
+       pending_req_t *pending_req;
+       unsigned long     flags;
+
+       /* if we are forwarding from UFERring to FERing */
+       if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+               /* for each outstanding message on the UFEring  */
+               rp = blktap_ufe_ring.sring->rsp_prod;
+               rmb();
         
-        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
-        {
-            resp = RING_GET_RESPONSE(&blktap_ufe_ring, i);
-            pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id));
-            pending_req = &pending_reqs[pending_idx];
+               for (i = blktap_ufe_ring.rsp_cons; i != rp; i++) {
+                       resp = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+                       pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id));
+                       pending_req = &pending_reqs[pending_idx];
             
-            blkif = pending_req->blkif;
-            for (j = 0; j < pending_req->nr_pages; j++) {
-                unsigned long vaddr;
-                struct page **map = blktap_vma->vm_private_data;
-                int offset; 
-
-                vaddr  = MMAP_VADDR(user_vstart, pending_idx, j);
-                offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
-
-                //ClearPageReserved(virt_to_page(vaddr));
-                ClearPageReserved((struct page *)map[offset]);
-                map[offset] = NULL;
-            }
-
-            fast_flush_area(pending_idx, pending_req->nr_pages);
-            make_response(blkif, pending_req->id, resp->operation, 
-                          resp->status);
-            blkif_put(pending_req->blkif);
-            spin_lock_irqsave(&pend_prod_lock, flags);
-            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-            spin_unlock_irqrestore(&pend_prod_lock, flags);
-        }
-        blktap_ufe_ring.rsp_cons = i;
-        maybe_trigger_blkio_schedule();
-    }
-    return 0;
+                       blkif = pending_req->blkif;
+                       for (j = 0; j < pending_req->nr_pages; j++) {
+                               unsigned long vaddr;
+                               struct page **map = blktap_vma->vm_private_data;
+                               int offset; 
+
+                               vaddr  = MMAP_VADDR(user_vstart, pending_idx, 
j);
+                               offset = (vaddr - blktap_vma->vm_start) >> 
PAGE_SHIFT;
+
+                               //ClearPageReserved(virt_to_page(vaddr));
+                               ClearPageReserved((struct page *)map[offset]);
+                               map[offset] = NULL;
+                       }
+
+                       fast_flush_area(pending_idx, pending_req->nr_pages);
+                       make_response(blkif, pending_req->id, resp->operation, 
+                                     resp->status);
+                       blkif_put(pending_req->blkif);
+                       spin_lock_irqsave(&pend_prod_lock, flags);
+                       pending_ring[MASK_PEND_IDX(pending_prod++)] = 
pending_idx;
+                       spin_unlock_irqrestore(&pend_prod_lock, flags);
+               }
+               blktap_ufe_ring.rsp_cons = i;
+               maybe_trigger_blkio_schedule();
+       }
+       return 0;
 }
 
 
@@ -618,10 +612,10 @@
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
-    blkif_t *blkif = dev_id;
-    add_to_blkdev_list_tail(blkif);
-    maybe_trigger_blkio_schedule();
-    return IRQ_HANDLED;
+       blkif_t *blkif = dev_id;
+       add_to_blkdev_list_tail(blkif);
+       maybe_trigger_blkio_schedule();
+       return IRQ_HANDLED;
 }
 
 
@@ -632,199 +626,194 @@
 
 static int do_block_io_op(blkif_t *blkif, int max_to_do)
 {
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-    blkif_request_t *req;
-    RING_IDX i, rp;
-    int more_to_do = 0;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       blkif_request_t *req;
+       RING_IDX i, rp;
+       int more_to_do = 0;
     
-    rp = blk_ring->sring->req_prod;
-    rmb(); /* Ensure we see queued requests up to 'rp'. */
-
-    for ( i = blk_ring->req_cons; 
-         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
+       rp = blk_ring->sring->req_prod;
+       rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+       for (i = blk_ring->req_cons; 
+            (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
+            i++ ) {
+               if ((max_to_do-- == 0) ||
+                   (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+                       more_to_do = 1;
+                       break;
+               }
         
-        req = RING_GET_REQUEST(blk_ring, i);
-        switch ( req->operation )
-        {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-            dispatch_rw_block_io(blkif, req);
-            break;
-
-        default:
-            DPRINTK("error: unknown block io operation [%d]\n",
-                    req->operation);
-            make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
-            break;
-        }
-    }
-
-    blk_ring->req_cons = i;
-    blktap_kick_user();
-
-    return more_to_do;
+               req = RING_GET_REQUEST(blk_ring, i);
+               switch (req->operation) {
+               case BLKIF_OP_READ:
+               case BLKIF_OP_WRITE:
+                       dispatch_rw_block_io(blkif, req);
+                       break;
+
+               default:
+                       DPRINTK("error: unknown block io operation [%d]\n",
+                               req->operation);
+                       make_response(blkif, req->id, req->operation,
+                                     BLKIF_RSP_ERROR);
+                       break;
+               }
+       }
+
+       blk_ring->req_cons = i;
+       blktap_kick_user();
+
+       return more_to_do;
 }
 
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
 {
-    blkif_request_t *target;
-    int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-    pending_req_t *pending_req;
-    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
-    int op, ret;
-    unsigned int nseg;
-
-    /* Check that number of segments is sane. */
-    nseg = req->nr_segments;
-    if ( unlikely(nseg == 0) || 
-         unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
-    {
-        DPRINTK("Bad number of segments in request (%d)\n", nseg);
-        goto bad_descriptor;
-    }
-
-    /* Make sure userspace is ready. */
-    if (!blktap_ring_ok) {
-        DPRINTK("blktap: ring not ready for requests!\n");
-        goto bad_descriptor;
-    }
+       blkif_request_t *target;
+       int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+       pending_req_t *pending_req;
+       struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+       int op, ret;
+       unsigned int nseg;
+
+       /* Check that number of segments is sane. */
+       nseg = req->nr_segments;
+       if (unlikely(nseg == 0) || 
+           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+               DPRINTK("Bad number of segments in request (%d)\n", nseg);
+               goto bad_descriptor;
+       }
+
+       /* Make sure userspace is ready. */
+       if (!blktap_ring_ok) {
+               DPRINTK("blktap: ring not ready for requests!\n");
+               goto bad_descriptor;
+       }
     
 
-    if ( RING_FULL(&blktap_ufe_ring) ) {
-        WPRINTK("blktap: fe_ring is full, can't add (very broken!).\n");
-        goto bad_descriptor;
-    }
-
-    flush_cache_all(); /* a noop on intel... */
-
-    /* Map the foreign pages directly in to the application */    
-    op = 0;
-    for (i=0; i<req->nr_segments; i++) {
-
-        unsigned long uvaddr;
-        unsigned long kvaddr;
-        unsigned long ptep;
-
-        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
-        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
-
-        /* Map the remote page to kernel. */
-        map[op].host_addr = kvaddr;
-        map[op].dom   = blkif->domid;
-        map[op].ref   = blkif_gref_from_fas(req->frame_and_sects[i]);
-        map[op].flags = GNTMAP_host_map;
-        /* This needs a bit more thought in terms of interposition: 
-         * If we want to be able to modify pages during write using 
-         * grant table mappings, the guest will either need to allow 
-         * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */
-        if (req->operation == BLKIF_OP_WRITE)
-            map[op].flags |= GNTMAP_readonly;
-        op++;
-
-        /* Now map it to user. */
-        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
-        if (ret)
-        {
-            DPRINTK("Couldn't get a pte addr!\n");
-            fast_flush_area(pending_idx, req->nr_segments);
-            goto bad_descriptor;
-        }
-
-        map[op].host_addr = ptep;
-        map[op].dom       = blkif->domid;
-        map[op].ref       = blkif_gref_from_fas(req->frame_and_sects[i]);
-        map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
-                            | GNTMAP_contains_pte;
-        /* Above interposition comment applies here as well. */
-        if (req->operation == BLKIF_OP_WRITE)
-            map[op].flags |= GNTMAP_readonly;
-        op++;
-    }
-
-    if ( unlikely(HYPERVISOR_grant_table_op(
-            GNTTABOP_map_grant_ref, map, op)))
-        BUG();
-
-    op = 0;
-    for (i=0; i<(req->nr_segments*2); i+=2) {
-        unsigned long uvaddr;
-        unsigned long kvaddr;
-        unsigned long offset;
-        int cancel = 0;
-
-        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2);
-        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2);
-
-        if ( unlikely(map[i].handle < 0) ) 
-        {
-            DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
-            ret = map[i].handle;
-            cancel = 1;
-        }
-
-        if ( unlikely(map[i+1].handle < 0) ) 
-        {
-            DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
-            ret = map[i+1].handle;
-            cancel = 1;
-        }
-
-        if (cancel) 
-        {
-            fast_flush_area(pending_idx, req->nr_segments);
-            goto bad_descriptor;
-        }
-
-        /* Set the necessary mappings in p2m and in the VM_FOREIGN 
-         * vm_area_struct to allow user vaddr -> struct page lookups
-         * to work.  This is needed for direct IO to foreign pages. */
-        phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
-            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
-
-        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
-        ((struct page **)blktap_vma->vm_private_data)[offset] =
-            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
-
-        /* Save handles for unmapping later. */
-        pending_handle(pending_idx, i/2).kernel = map[i].handle;
-        pending_handle(pending_idx, i/2).user   = map[i+1].handle;
-    }
-
-    /* Mark mapped pages as reserved: */
-    for ( i = 0; i < req->nr_segments; i++ )
-    {
-        unsigned long kvaddr;
-
-        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
-        SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
-    }
-
-    pending_req = &pending_reqs[pending_idx];
-    pending_req->blkif     = blkif;
-    pending_req->id        = req->id;
-    pending_req->operation = req->operation;
-    pending_req->status    = BLKIF_RSP_OKAY;
-    pending_req->nr_pages  = nseg;
-    req->id = MAKE_ID(blkif->domid, pending_idx);
-    //atomic_set(&pending_req->pendcnt, nbio);
-    pending_cons++;
-    blkif_get(blkif);
-
-    /* Finally, write the request message to the user ring. */
-    target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
-    memcpy(target, req, sizeof(*req));
-    blktap_ufe_ring.req_prod_pvt++;
-    return;
+       if (RING_FULL(&blktap_ufe_ring)) {
+               WPRINTK("blktap: fe_ring is full, can't add "
+                       "(very broken!).\n");
+               goto bad_descriptor;
+       }
+
+       flush_cache_all(); /* a noop on intel... */
+
+       /* Map the foreign pages directly in to the application */    
+       op = 0;
+       for (i = 0; i < req->nr_segments; i++) {
+
+               unsigned long uvaddr;
+               unsigned long kvaddr;
+               unsigned long ptep;
+
+               uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
+               kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
+
+               /* Map the remote page to kernel. */
+               map[op].host_addr = kvaddr;
+               map[op].dom   = blkif->domid;
+               map[op].ref   = blkif_gref_from_fas(req->frame_and_sects[i]);
+               map[op].flags = GNTMAP_host_map;
+               /* This needs a bit more thought in terms of interposition: 
+                * If we want to be able to modify pages during write using 
+                * grant table mappings, the guest will either need to allow 
+                * it, or we'll need to incur a copy. Bit of an fbufs moment. 
;) */
+               if (req->operation == BLKIF_OP_WRITE)
+                       map[op].flags |= GNTMAP_readonly;
+               op++;
+
+               /* Now map it to user. */
+               ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
+               if (ret) {
+                       DPRINTK("Couldn't get a pte addr!\n");
+                       fast_flush_area(pending_idx, req->nr_segments);
+                       goto bad_descriptor;
+               }
+
+               map[op].host_addr = ptep;
+               map[op].dom       = blkif->domid;
+               map[op].ref       = 
blkif_gref_from_fas(req->frame_and_sects[i]);
+               map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
+                       | GNTMAP_contains_pte;
+               /* Above interposition comment applies here as well. */
+               if (req->operation == BLKIF_OP_WRITE)
+                       map[op].flags |= GNTMAP_readonly;
+               op++;
+       }
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_map_grant_ref, map, op));
+
+       op = 0;
+       for (i = 0; i < (req->nr_segments*2); i += 2) {
+               unsigned long uvaddr;
+               unsigned long kvaddr;
+               unsigned long offset;
+               int cancel = 0;
+
+               uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2);
+               kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2);
+
+               if (unlikely(map[i].handle < 0)) {
+                       DPRINTK("Error on kernel grant mapping (%d)\n",
+                               map[i].handle);
+                       ret = map[i].handle;
+                       cancel = 1;
+               }
+
+               if (unlikely(map[i+1].handle < 0)) {
+                       DPRINTK("Error on user grant mapping (%d)\n",
+                               map[i+1].handle);
+                       ret = map[i+1].handle;
+                       cancel = 1;
+               }
+
+               if (cancel) {
+                       fast_flush_area(pending_idx, req->nr_segments);
+                       goto bad_descriptor;
+               }
+
+               /* Set the necessary mappings in p2m and in the VM_FOREIGN 
+                * vm_area_struct to allow user vaddr -> struct page lookups
+                * to work.  This is needed for direct IO to foreign pages. */
+               phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
+                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
+
+               offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
+               ((struct page **)blktap_vma->vm_private_data)[offset] =
+                       pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+
+               /* Save handles for unmapping later. */
+               pending_handle(pending_idx, i/2).kernel = map[i].handle;
+               pending_handle(pending_idx, i/2).user   = map[i+1].handle;
+       }
+
+       /* Mark mapped pages as reserved: */
+       for (i = 0; i < req->nr_segments; i++) {
+               unsigned long kvaddr;
+               kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
+               SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
+       }
+
+       pending_req = &pending_reqs[pending_idx];
+       pending_req->blkif     = blkif;
+       pending_req->id        = req->id;
+       pending_req->operation = req->operation;
+       pending_req->status    = BLKIF_RSP_OKAY;
+       pending_req->nr_pages  = nseg;
+       req->id = MAKE_ID(blkif->domid, pending_idx);
+       //atomic_set(&pending_req->pendcnt, nbio);
+       pending_cons++;
+       blkif_get(blkif);
+
+       /* Finally, write the request message to the user ring. */
+       target = RING_GET_REQUEST(&blktap_ufe_ring,
+                                 blktap_ufe_ring.req_prod_pvt);
+       memcpy(target, req, sizeof(*req));
+       blktap_ufe_ring.req_prod_pvt++;
+       return;
 
  bad_descriptor:
-    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+       make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 } 
 
 
@@ -837,80 +826,89 @@
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st)
 {
-    blkif_response_t *resp;
-    unsigned long     flags;
-    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
-    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    wmb(); /* Ensure other side can see the response fields. */
-    blk_ring->rsp_prod_pvt++;
-    RING_PUSH_RESPONSES(blk_ring);
-    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(blkif->evtchn);
+       blkif_response_t *resp;
+       unsigned long     flags;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+
+       /* Place on the response ring for the relevant domain. */ 
+       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+       resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+       wmb(); /* Ensure other side can see the response fields. */
+       blk_ring->rsp_prod_pvt++;
+       RING_PUSH_RESPONSES(blk_ring);
+       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+       /* Kick the relevant domain. */
+       notify_via_evtchn(blkif->evtchn);
 }
 
 static struct miscdevice blktap_miscdev = {
-    .minor        = BLKTAP_MINOR,
-    .name         = "blktap",
-    .fops         = &blktap_fops,
-    .devfs_name   = "misc/blktap",
+       .minor        = BLKTAP_MINOR,
+       .name         = "blktap",
+       .fops         = &blktap_fops,
+       .devfs_name   = "misc/blktap",
 };
 
 void blkif_deschedule(blkif_t *blkif)
 {
-    remove_from_blkdev_list(blkif);
+       remove_from_blkdev_list(blkif);
 }
 
 static int __init blkif_init(void)
 {
-    int i, j, err;
-    struct page *page;
+       int i, j, err;
+       struct page *page;
 /*
-    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
-         !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
-        return 0;
+  if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
+  !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
+  return 0;
 */
-    blkif_interface_init();
-
-    page = balloon_alloc_empty_page_range(MMAP_PAGES);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
+       blkif_interface_init();
+
+       page = balloon_alloc_empty_page_range(MMAP_PAGES);
+       BUG_ON(page == NULL);
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       pending_cons = 0;
+       pending_prod = MAX_PENDING_REQS;
+       memset(pending_reqs, 0, sizeof(pending_reqs));
+       for ( i = 0; i < MAX_PENDING_REQS; i++ )
+               pending_ring[i] = i;
     
-    spin_lock_init(&blkio_schedule_list_lock);
-    INIT_LIST_HEAD(&blkio_schedule_list);
-
-    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-
-    blkif_xenbus_init();
-
-    for (i=0; i<MAX_PENDING_REQS ; i++)
-        for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
-            BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
-
-    err = misc_register(&blktap_miscdev);
-    if ( err != 0 )
-    {
-        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
-        return err;
-    }
-
-    init_waitqueue_head(&blktap_wait);
-
-    return 0;
+       spin_lock_init(&blkio_schedule_list_lock);
+       INIT_LIST_HEAD(&blkio_schedule_list);
+
+       BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0);
+
+       blkif_xenbus_init();
+
+       for (i = 0; i < MAX_PENDING_REQS ; i++)
+               for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
+                       BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
+
+       err = misc_register(&blktap_miscdev);
+       if (err != 0) {
+               printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n",
+                      err);
+               return err;
+       }
+
+       init_waitqueue_head(&blktap_wait);
+
+       return 0;
 }
 
 __initcall(blkif_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h  Thu Sep 22 17:42:01 2005
@@ -17,6 +17,7 @@
 #include <asm-xen/xen-public/io/blkif.h>
 #include <asm-xen/xen-public/io/ring.h>
 #include <asm-xen/gnttab.h>
+#include <asm-xen/driver_util.h>
 
 #if 0
 #define ASSERT(_p) \
@@ -32,39 +33,39 @@
 #define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
 
 struct vbd {
-    blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
-    unsigned char  readonly;    /* Non-zero -> read-only */
-    unsigned char  type;        /* VDISK_xxx */
-    u32            pdevice;     /* phys device that this vbd maps to */
-    struct block_device *bdev;
+       blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
+       unsigned char  readonly;    /* Non-zero -> read-only */
+       unsigned char  type;        /* VDISK_xxx */
+       u32            pdevice;     /* phys device that this vbd maps to */
+       struct block_device *bdev;
 }; 
 
 typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t           domid;
-    unsigned int      handle;
-    /* Physical parameters of the comms window. */
-    unsigned long     shmem_frame;
-    unsigned int      evtchn;
-    unsigned int      remote_evtchn;
-    /* Comms information. */
-    blkif_back_ring_t blk_ring;
-    /* VBDs attached to this interface. */
-    struct vbd        vbd;
-    /* Private fields. */
-    enum { DISCONNECTED, CONNECTED } status;
+       /* Unique identifier for this interface. */
+       domid_t           domid;
+       unsigned int      handle;
+       /* Physical parameters of the comms window. */
+       unsigned int      evtchn;
+       unsigned int      remote_evtchn;
+       /* Comms information. */
+       blkif_back_ring_t blk_ring;
+       struct vm_struct *blk_ring_area;
+       /* VBDs attached to this interface. */
+       struct vbd        vbd;
+       /* Private fields. */
+       enum { DISCONNECTED, CONNECTED } status;
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    /* Is this a blktap frontend */
-    unsigned int     is_blktap;
+       /* Is this a blktap frontend */
+       unsigned int     is_blktap;
 #endif
-    struct list_head blkdev_list;
-    spinlock_t       blk_ring_lock;
-    atomic_t         refcnt;
+       struct list_head blkdev_list;
+       spinlock_t       blk_ring_lock;
+       atomic_t         refcnt;
 
-    struct work_struct free_work;
-    u16 shmem_handle;
-    unsigned long shmem_vaddr;
-    grant_ref_t shmem_ref;
+       struct work_struct free_work;
+
+       u16              shmem_handle;
+       grant_ref_t      shmem_ref;
 } blkif_t;
 
 blkif_t *alloc_blkif(domid_t domid);
@@ -88,10 +89,10 @@
 unsigned long vbd_secsize(struct vbd *vbd);
 
 struct phys_req {
-    unsigned short       dev;
-    unsigned short       nr_sects;
-    struct block_device *bdev;
-    blkif_sector_t       sector_number;
+       unsigned short       dev;
+       unsigned short       nr_sects;
+       struct block_device *bdev;
+       blkif_sector_t       sector_number;
 };
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
@@ -105,3 +106,13 @@
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c       Thu Sep 22 
17:42:01 2005
@@ -13,129 +13,143 @@
 
 blkif_t *alloc_blkif(domid_t domid)
 {
-    blkif_t *blkif;
+       blkif_t *blkif;
 
-    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
-    if (!blkif)
-           return ERR_PTR(-ENOMEM);
+       blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+       if (!blkif)
+               return ERR_PTR(-ENOMEM);
 
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid = domid;
-    blkif->status = DISCONNECTED;
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 1);
+       memset(blkif, 0, sizeof(*blkif));
+       blkif->domid = domid;
+       blkif->status = DISCONNECTED;
+       spin_lock_init(&blkif->blk_ring_lock);
+       atomic_set(&blkif->refcnt, 1);
 
-    return blkif;
+       return blkif;
 }
 
-static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
-                            unsigned long shared_page)
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
 {
-    struct gnttab_map_grant_ref op;
-    op.host_addr = localaddr;
-    op.flags = GNTMAP_host_map;
-    op.ref = shared_page;
-    op.dom = blkif->domid;
+       struct gnttab_map_grant_ref op;
 
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+       op.host_addr = (unsigned long)blkif->blk_ring_area->addr;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = shared_page;
+       op.dom       = blkif->domid;
 
-    if (op.handle < 0) {
-       DPRINTK(" Grant table operation failure !\n");
-       return op.handle;
-    }
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 
-    blkif->shmem_ref = shared_page;
-    blkif->shmem_handle = op.handle;
-    blkif->shmem_vaddr = localaddr;
-    return 0;
+       if (op.handle < 0) {
+               DPRINTK(" Grant table operation failure !\n");
+               return op.handle;
+       }
+
+       blkif->shmem_ref    = shared_page;
+       blkif->shmem_handle = op.handle;
+
+       return 0;
 }
 
 static void unmap_frontend_page(blkif_t *blkif)
 {
-    struct gnttab_unmap_grant_ref op;
+       struct gnttab_unmap_grant_ref op;
 
-    op.host_addr = blkif->shmem_vaddr;
-    op.handle = blkif->shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       op.host_addr    = (unsigned long)blkif->blk_ring_area->addr;
+       op.handle       = blkif->shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(blkif->blk_ring_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(blkif->blk_ring_area);
 }
 
 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
 {
-    struct vm_struct *vma;
-    blkif_sring_t *sring;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
+       blkif_sring_t *sring;
+       evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+       int err;
 
-    BUG_ON(blkif->remote_evtchn);
+       BUG_ON(blkif->remote_evtchn);
 
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-       return -ENOMEM;
+       if ((blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL)
+               return -ENOMEM;
 
-    err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
+       err = map_frontend_page(blkif, shared_page);
+       if (err) {
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = blkif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_page(blkif);
-       vfree(vma->addr);
-       return err;
-    }
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = blkif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
 
-    blkif->evtchn = op.u.bind_interdomain.port1;
-    blkif->remote_evtchn = evtchn;
 
-    sring = (blkif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+       blkif->evtchn = op.u.bind_interdomain.port1;
+       blkif->remote_evtchn = evtchn;
 
-    bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
-                             blkif);
-    blkif->status        = CONNECTED;
-    blkif->shmem_frame   = shared_page;
+       sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
+       SHARED_RING_INIT(sring);
+       BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
 
-    return 0;
+       bind_evtchn_to_irqhandler(
+               blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
+       blkif->status        = CONNECTED;
+
+       return 0;
 }
 
 static void free_blkif(void *arg)
 {
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    blkif_t *blkif = (blkif_t *)arg;
+       evtchn_op_t op = { .cmd = EVTCHNOP_close };
+       blkif_t *blkif = (blkif_t *)arg;
 
-    op.u.close.port = blkif->evtchn;
-    op.u.close.dom = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = blkif->remote_evtchn;
-    op.u.close.dom = blkif->domid;
-    HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = blkif->remote_evtchn;
+       op.u.close.dom = blkif->domid;
+       HYPERVISOR_event_channel_op(&op);
 
-    if (blkif->evtchn)
-        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+       if (blkif->evtchn)
+               unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
 
-    if (blkif->blk_ring.sring) {
-       unmap_frontend_page(blkif);
-       vfree(blkif->blk_ring.sring);
-       blkif->blk_ring.sring = NULL;
-    }
+       if (blkif->blk_ring.sring) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
+       }
 
-    kmem_cache_free(blkif_cachep, blkif);
+       kmem_cache_free(blkif_cachep, blkif);
 }
 
 void free_blkif_callback(blkif_t *blkif)
 {
-    INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
-    schedule_work(&blkif->free_work);
+       INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
+       schedule_work(&blkif->free_work);
 }
 
 void __init blkif_interface_init(void)
 {
-    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
-                                     0, 0, NULL, NULL);
+       blkif_cachep = kmem_cache_create(
+               "blkif_cache", sizeof(blkif_t), 0, 0, NULL, NULL);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c  Thu Sep 22 17:42:01 2005
@@ -172,6 +172,7 @@
        be->dev = dev;
        be->backend_watch.node = dev->nodename;
        be->backend_watch.callback = backend_changed;
+       /* Registration implicitly fires backend_changed once */
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
@@ -193,8 +194,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return 0;
 
  free_be:
@@ -223,3 +222,13 @@
 {
        xenbus_register_backend(&blkback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/console/console.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c        Thu Sep 22 
17:42:01 2005
@@ -75,31 +75,33 @@
 
 static int __init xencons_setup(char *str)
 {
-    char *q;
-    int n;
-
-    if ( !strncmp(str, "ttyS", 4) )
-        xc_mode = XC_SERIAL;
-    else if ( !strncmp(str, "tty", 3) )
-        xc_mode = XC_TTY;
-    else if ( !strncmp(str, "off", 3) )
-        xc_mode = XC_OFF;
-
-    switch ( xc_mode )
-    {
-    case XC_SERIAL:
-        n = simple_strtol( str+4, &q, 10 );
-        if ( q > (str + 4) ) xc_num = n;
-        break;
-    case XC_TTY:
-        n = simple_strtol( str+3, &q, 10 );
-        if ( q > (str + 3) ) xc_num = n;
-        break;
-    default:
-        break;
-    }
-
-    return 1;
+       char *q;
+       int n;
+
+       if (!strncmp(str, "ttyS", 4))
+               xc_mode = XC_SERIAL;
+       else if (!strncmp(str, "tty", 3))
+               xc_mode = XC_TTY;
+       else if (!strncmp(str, "off", 3))
+               xc_mode = XC_OFF;
+
+       switch ( xc_mode )
+       {
+       case XC_SERIAL:
+               n = simple_strtol(str+4, &q, 10);
+               if (q > (str + 4))
+                       xc_num = n;
+               break;
+       case XC_TTY:
+               n = simple_strtol(str+3, &q, 10);
+               if (q > (str + 3))
+                       xc_num = n;
+               break;
+       default:
+               break;
+       }
+
+       return 1;
 }
 __setup("xencons=", xencons_setup);
 
@@ -111,11 +113,11 @@
 
 static int __init xencons_bufsz_setup(char *str)
 {
-    unsigned int goal;
-    goal = simple_strtoul(str, NULL, 0);
-    while ( wbuf_size < goal )
-        wbuf_size <<= 1;
-    return 1;
+       unsigned int goal;
+       goal = simple_strtoul(str, NULL, 0);
+       while (wbuf_size < goal)
+               wbuf_size <<= 1;
+       return 1;
 }
 __setup("xencons_bufsz=", xencons_bufsz_setup);
 
@@ -135,57 +137,55 @@
 /******************** Kernel console driver ********************************/
 
 static void kcons_write(
-    struct console *c, const char *s, unsigned int count)
-{
-    int           i;
-    unsigned long flags;
-
-    spin_lock_irqsave(&xencons_lock, flags);
+       struct console *c, const char *s, unsigned int count)
+{
+       int           i;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
     
-    for ( i = 0; i < count; i++ )
-    {
-        if ( (wp - wc) >= (wbuf_size - 1) )
-            break;
-        if ( (wbuf[WBUF_MASK(wp++)] = s[i]) == '\n' )
-            wbuf[WBUF_MASK(wp++)] = '\r';
-    }
-
-    __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       for (i = 0; i < count; i++) {
+               if ((wp - wc) >= (wbuf_size - 1))
+                       break;
+               if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n')
+                       wbuf[WBUF_MASK(wp++)] = '\r';
+       }
+
+       __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static void kcons_write_dom0(
-    struct console *c, const char *s, unsigned int count)
-{
-    int rc;
-
-    while ( (count > 0) &&
-            ((rc = HYPERVISOR_console_io(
-                CONSOLEIO_write, count, (char *)s)) > 0) )
-    {
-        count -= rc;
-        s += rc;
-    }
+       struct console *c, const char *s, unsigned int count)
+{
+       int rc;
+
+       while ((count > 0) &&
+              ((rc = HYPERVISOR_console_io(
+                       CONSOLEIO_write, count, (char *)s)) > 0)) {
+               count -= rc;
+               s += rc;
+       }
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static struct tty_driver *kcons_device(struct console *c, int *index)
 {
-    *index = c->index;
-    return xencons_driver;
+       *index = c->index;
+       return xencons_driver;
 }
 #else
 static kdev_t kcons_device(struct console *c)
 {
-    return MKDEV(TTY_MAJOR, (xc_mode == XC_SERIAL) ? 64 : 1);
+       return MKDEV(TTY_MAJOR, (xc_mode == XC_SERIAL) ? 64 : 1);
 }
 #endif
 
 static struct console kcons_info = {
-    .device    = kcons_device,
-    .flags     = CON_PRINTBUFFER,
-    .index     = -1,
+       .device = kcons_device,
+       .flags  = CON_PRINTBUFFER,
+       .index  = -1,
 };
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
@@ -196,44 +196,42 @@
 void xen_console_init(void)
 #endif
 {
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-    {
-        if ( xc_mode == XC_DEFAULT )
-            xc_mode = XC_SERIAL;
-        kcons_info.write = kcons_write_dom0;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-        if ( xc_mode == XC_SERIAL )
-            kcons_info.flags |= CON_ENABLED;
-#endif
-    }
-    else
-    {
-        if ( xc_mode == XC_DEFAULT )
-            xc_mode = XC_TTY;
-        kcons_info.write = kcons_write;
-    }
-
-    switch ( xc_mode )
-    {
-    case XC_SERIAL:
-        strcpy(kcons_info.name, "ttyS");
-        if ( xc_num == -1 ) xc_num = 0;
-        break;
-
-    case XC_TTY:
-        strcpy(kcons_info.name, "tty");
-        if ( xc_num == -1 ) xc_num = 1;
-        break;
-
-    default:
-        return __RETCODE;
-    }
-
-    wbuf = alloc_bootmem(wbuf_size);
-
-    register_console(&kcons_info);
-
-    return __RETCODE;
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               if (xc_mode == XC_DEFAULT)
+                       xc_mode = XC_SERIAL;
+               kcons_info.write = kcons_write_dom0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+               if (xc_mode == XC_SERIAL)
+                       kcons_info.flags |= CON_ENABLED;
+#endif
+       } else {
+               if (xc_mode == XC_DEFAULT)
+                       xc_mode = XC_TTY;
+               kcons_info.write = kcons_write;
+       }
+
+       switch (xc_mode) {
+       case XC_SERIAL:
+               strcpy(kcons_info.name, "ttyS");
+               if (xc_num == -1)
+                       xc_num = 0;
+               break;
+
+       case XC_TTY:
+               strcpy(kcons_info.name, "tty");
+               if (xc_num == -1)
+                       xc_num = 1;
+               break;
+
+       default:
+               return __RETCODE;
+       }
+
+       wbuf = alloc_bootmem(wbuf_size);
+
+       register_console(&kcons_info);
+
+       return __RETCODE;
 }
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 console_initcall(xen_console_init);
@@ -246,41 +244,40 @@
 asmlinkage int xprintk(const char *fmt, ...)
 #endif
 {
-    va_list args;
-    int printk_len;
-    static char printk_buf[1024];
+       va_list args;
+       int printk_len;
+       static char printk_buf[1024];
     
-    /* Emit the output into the temporary buffer */
-    va_start(args, fmt);
-    printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
-    va_end(args);
-
-    /* Send the processed output directly to Xen. */
-    kcons_write_dom0(NULL, printk_buf, printk_len);
-
-    return 0;
+       /* Emit the output into the temporary buffer */
+       va_start(args, fmt);
+       printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+       va_end(args);
+
+       /* Send the processed output directly to Xen. */
+       kcons_write_dom0(NULL, printk_buf, printk_len);
+
+       return 0;
 }
 
 /*** Forcibly flush console data before dying. ***/
 void xencons_force_flush(void)
 {
-    int        sz;
-
-    /* Emergency console is synchronous, so there's nothing to flush. */
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-        return;
-
-
-    /* Spin until console data is flushed through to the domain controller. */
-    while ( (wc != wp) )
-    {
-       int sent = 0;
-        if ( (sz = wp - wc) == 0 )
-            continue;
-       sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
-       if (sent > 0)
-           wc += sent;
-    }
+       int sz;
+
+       /* Emergency console is synchronous, so there's nothing to flush. */
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               return;
+
+
+       /* Spin until console data is flushed through to the daemon. */
+       while (wc != wp) {
+               int sent = 0;
+               if ((sz = wp - wc) == 0)
+                       continue;
+               sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+               if (sent > 0)
+                       wc += sent;
+       }
 }
 
 
@@ -305,362 +302,358 @@
 /* Non-privileged receive callback. */
 static void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
 {
-    int           i;
-    unsigned long flags;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    if ( xencons_tty != NULL )
-    {
-        for ( i = 0; i < len; i++ ) {
+       int           i;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       if (xencons_tty == NULL)
+               goto out;
+
+       for (i = 0; i < len; i++) {
 #ifdef CONFIG_MAGIC_SYSRQ
-            if (sysrq_enabled) {
-                if (buf[i] == '\x0f') { /* ^O */
-                    sysrq_requested = jiffies;
-                    continue; /* don't print the sysrq key */
-                } else if (sysrq_requested) {
-                    unsigned long sysrq_timeout = sysrq_requested + HZ*2;
-                    sysrq_requested = 0;
-                    /* if it's been less than a timeout, do the sysrq */
-                    if (time_before(jiffies, sysrq_timeout)) {
-                        spin_unlock_irqrestore(&xencons_lock, flags);
-                        handle_sysrq(buf[i], regs, xencons_tty);
-                        spin_lock_irqsave(&xencons_lock, flags);
-                        continue;
-                    }
-                }
-            }
-#endif
-            tty_insert_flip_char(xencons_tty, buf[i], 0);
-        }
-        tty_flip_buffer_push(xencons_tty);
-    }
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
+               if (sysrq_enabled) {
+                       if (buf[i] == '\x0f') { /* ^O */
+                               sysrq_requested = jiffies;
+                               continue; /* don't print the sysrq key */
+                       } else if (sysrq_requested) {
+                               unsigned long sysrq_timeout =
+                                       sysrq_requested + HZ*2;
+                               sysrq_requested = 0;
+                               if (time_before(jiffies, sysrq_timeout)) {
+                                       spin_unlock_irqrestore(
+                                               &xencons_lock, flags);
+                                       handle_sysrq(
+                                               buf[i], regs, xencons_tty);
+                                       spin_lock_irqsave(
+                                               &xencons_lock, flags);
+                                       continue;
+                               }
+                       }
+               }
+#endif
+               tty_insert_flip_char(xencons_tty, buf[i], 0);
+       }
+       tty_flip_buffer_push(xencons_tty);
+
+ out:
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 /* Privileged and non-privileged transmit worker. */
 static void __xencons_tx_flush(void)
 {
-    int        sz, work_done = 0;
-
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-    {
-        if ( x_char )
-        {
-            kcons_write_dom0(NULL, &x_char, 1);
-            x_char = 0;
-            work_done = 1;
-        }
-
-        while ( wc != wp )
-        {
-            sz = wp - wc;
-            if ( sz > (wbuf_size - WBUF_MASK(wc)) )
-                sz = wbuf_size - WBUF_MASK(wc);
-            kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
-            wc += sz;
-            work_done = 1;
-        }
-    }
-    else
-    {
-        while ( x_char )
-        {
-           if (xencons_ring_send(&x_char, 1) == 1) {
-               x_char = 0;
-               work_done = 1;
-           }
-        }
-
-        while ( wc != wp )
-        {
-           int sent;
-            sz = wp - wc;
-           if ( sz > (wbuf_size - WBUF_MASK(wc)) )
-               sz = wbuf_size - WBUF_MASK(wc);
-           sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
-           if ( sent > 0 ) {
-               wc += sent;
-               work_done = 1;
-           }
-        }
-    }
-
-    if ( work_done && (xencons_tty != NULL) )
-    {
-        wake_up_interruptible(&xencons_tty->write_wait);
-        if ( (xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
-             (xencons_tty->ldisc.write_wakeup != NULL) )
-            (xencons_tty->ldisc.write_wakeup)(xencons_tty);
-    }
+       int sz, work_done = 0;
+
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               if (x_char) {
+                       kcons_write_dom0(NULL, &x_char, 1);
+                       x_char = 0;
+                       work_done = 1;
+               }
+
+               while (wc != wp) {
+                       sz = wp - wc;
+                       if (sz > (wbuf_size - WBUF_MASK(wc)))
+                               sz = wbuf_size - WBUF_MASK(wc);
+                       kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
+                       wc += sz;
+                       work_done = 1;
+               }
+       } else {
+               while (x_char) {
+                       if (xencons_ring_send(&x_char, 1) == 1) {
+                               x_char = 0;
+                               work_done = 1;
+                       }
+               }
+
+               while (wc != wp) {
+                       int sent;
+                       sz = wp - wc;
+                       if (sz > (wbuf_size - WBUF_MASK(wc)))
+                               sz = wbuf_size - WBUF_MASK(wc);
+                       sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+                       if (sent > 0) {
+                               wc += sent;
+                               work_done = 1;
+                       }
+               }
+       }
+
+       if (work_done && (xencons_tty != NULL))
+       {
+               wake_up_interruptible(&xencons_tty->write_wait);
+               if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+                   (xencons_tty->ldisc.write_wakeup != NULL))
+                       (xencons_tty->ldisc.write_wakeup)(xencons_tty);
+       }
 }
 
 /* Privileged receive callback and transmit kicker. */
 static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
                                           struct pt_regs *regs)
 {
-    static char   rbuf[16];
-    int           i, l;
-    unsigned long flags;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-
-    if ( xencons_tty != NULL )
-    {
-        /* Receive work. */
-        while ( (l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 )
-            for ( i = 0; i < l; i++ )
-                tty_insert_flip_char(xencons_tty, rbuf[i], 0);
-        if ( xencons_tty->flip.count != 0 )
-            tty_flip_buffer_push(xencons_tty);
-    }
-
-    /* Transmit work. */
-    __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
-    return IRQ_HANDLED;
+       static char   rbuf[16];
+       int           i, l;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       if (xencons_tty != NULL)
+       {
+               /* Receive work. */
+               while ((l = HYPERVISOR_console_io(
+                       CONSOLEIO_read, 16, rbuf)) > 0)
+                       for (i = 0; i < l; i++)
+                               tty_insert_flip_char(xencons_tty, rbuf[i], 0);
+               if (xencons_tty->flip.count != 0)
+                       tty_flip_buffer_push(xencons_tty);
+       }
+
+       /* Transmit work. */
+       __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return IRQ_HANDLED;
 }
 
 static int xencons_write_room(struct tty_struct *tty)
 {
-    return wbuf_size - (wp - wc);
+       return wbuf_size - (wp - wc);
 }
 
 static int xencons_chars_in_buffer(struct tty_struct *tty)
 {
-    return wp - wc;
+       return wp - wc;
 }
 
 static void xencons_send_xchar(struct tty_struct *tty, char ch)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    x_char = ch;
-    __xencons_tx_flush();
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       x_char = ch;
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static void xencons_throttle(struct tty_struct *tty)
 {
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    if ( I_IXOFF(tty) )
-        xencons_send_xchar(tty, STOP_CHAR(tty));
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       if (I_IXOFF(tty))
+               xencons_send_xchar(tty, STOP_CHAR(tty));
 }
 
 static void xencons_unthrottle(struct tty_struct *tty)
 {
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    if ( I_IXOFF(tty) )
-    {
-        if ( x_char != 0 )
-            x_char = 0;
-        else
-            xencons_send_xchar(tty, START_CHAR(tty));
-    }
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       if (I_IXOFF(tty)) {
+               if (x_char != 0)
+                       x_char = 0;
+               else
+                       xencons_send_xchar(tty, START_CHAR(tty));
+       }
 }
 
 static void xencons_flush_buffer(struct tty_struct *tty)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    wc = wp = 0;
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       wc = wp = 0;
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static inline int __xencons_put_char(int ch)
 {
-    char _ch = (char)ch;
-    if ( (wp - wc) == wbuf_size )
-        return 0;
-    wbuf[WBUF_MASK(wp++)] = _ch;
-    return 1;
+       char _ch = (char)ch;
+       if ((wp - wc) == wbuf_size)
+               return 0;
+       wbuf[WBUF_MASK(wp++)] = _ch;
+       return 1;
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static int xencons_write(
-    struct tty_struct *tty,
-    const unsigned char *buf,
-    int count)
-{
-    int i;
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return count;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-
-    for ( i = 0; i < count; i++ )
-        if ( !__xencons_put_char(buf[i]) )
-            break;
-
-    if ( i != 0 )
-        __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
-    return i;
+       struct tty_struct *tty,
+       const unsigned char *buf,
+       int count)
+{
+       int i;
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return count;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       for (i = 0; i < count; i++)
+               if (!__xencons_put_char(buf[i]))
+                       break;
+
+       if (i != 0)
+               __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return i;
 }
 #else
 static int xencons_write(
-    struct tty_struct *tty, 
-    int from_user,
-    const u_char *buf, 
-    int count)
-{
-    int i;
-    unsigned long flags;
-
-    if ( from_user && verify_area(VERIFY_READ, buf, count) )
-        return -EINVAL;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return count;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-
-    for ( i = 0; i < count; i++ )
-    {
-        char ch;
-        if ( from_user )
-            __get_user(ch, buf + i);
-        else
-            ch = buf[i];
-        if ( !__xencons_put_char(ch) )
-            break;
-    }
-
-    if ( i != 0 )
-        __xencons_tx_flush();
-
-    spin_unlock_irqrestore(&xencons_lock, flags);
-
-    return i;
+       struct tty_struct *tty, 
+       int from_user,
+       const u_char *buf, 
+       int count)
+{
+       int i;
+       unsigned long flags;
+
+       if (from_user && verify_area(VERIFY_READ, buf, count))
+               return -EINVAL;
+
+       if (TTY_INDEX(tty) != 0)
+               return count;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       for (i = 0; i < count; i++) {
+               char ch;
+               if (from_user)
+                       __get_user(ch, buf + i);
+               else
+                       ch = buf[i];
+               if (!__xencons_put_char(ch))
+                       break;
+       }
+
+       if (i != 0)
+               __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return i;
 }
 #endif
 
 static void xencons_put_char(struct tty_struct *tty, u_char ch)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    (void)__xencons_put_char(ch);
-    spin_unlock_irqrestore(&xencons_lock, flags);
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       (void)__xencons_put_char(ch);
+       spin_unlock_irqrestore(&xencons_lock, flags);
 }
 
 static void xencons_flush_chars(struct tty_struct *tty)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    __xencons_tx_flush();
-    spin_unlock_irqrestore(&xencons_lock, flags);    
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);    
 }
 
 static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
 {
-    unsigned long orig_jiffies = jiffies;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    while ( DRV(tty->driver)->chars_in_buffer(tty) )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-        if ( signal_pending(current) )
-            break;
-        if ( (timeout != 0) && time_after(jiffies, orig_jiffies + timeout) )
-            break;
-    }
+       unsigned long orig_jiffies = jiffies;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       while (DRV(tty->driver)->chars_in_buffer(tty))
+       {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+               if (signal_pending(current))
+                       break;
+               if ( (timeout != 0) &&
+                    time_after(jiffies, orig_jiffies + timeout) )
+                       break;
+       }
     
-    set_current_state(TASK_RUNNING);
+       set_current_state(TASK_RUNNING);
 }
 
 static int xencons_open(struct tty_struct *tty, struct file *filp)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return 0;
-
-    spin_lock_irqsave(&xencons_lock, flags);
-    tty->driver_data = NULL;
-    if ( xencons_tty == NULL )
-        xencons_tty = tty;
-    __xencons_tx_flush();
-    spin_unlock_irqrestore(&xencons_lock, flags);    
-
-    return 0;
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return 0;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       tty->driver_data = NULL;
+       if (xencons_tty == NULL)
+               xencons_tty = tty;
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);    
+
+       return 0;
 }
 
 static void xencons_close(struct tty_struct *tty, struct file *filp)
 {
-    unsigned long flags;
-
-    if ( TTY_INDEX(tty) != 0 )
-        return;
-
-    if ( tty->count == 1 )
-    {
-        tty->closing = 1;
-        tty_wait_until_sent(tty, 0);
-        if ( DRV(tty->driver)->flush_buffer != NULL )
-            DRV(tty->driver)->flush_buffer(tty);
-        if ( tty->ldisc.flush_buffer != NULL )
-            tty->ldisc.flush_buffer(tty);
-        tty->closing = 0;
-        spin_lock_irqsave(&xencons_lock, flags);
-        xencons_tty = NULL;
-        spin_unlock_irqrestore(&xencons_lock, flags);    
-    }
+       unsigned long flags;
+
+       if (TTY_INDEX(tty) != 0)
+               return;
+
+       if (tty->count == 1) {
+               tty->closing = 1;
+               tty_wait_until_sent(tty, 0);
+               if (DRV(tty->driver)->flush_buffer != NULL)
+                       DRV(tty->driver)->flush_buffer(tty);
+               if (tty->ldisc.flush_buffer != NULL)
+                       tty->ldisc.flush_buffer(tty);
+               tty->closing = 0;
+               spin_lock_irqsave(&xencons_lock, flags);
+               xencons_tty = NULL;
+               spin_unlock_irqrestore(&xencons_lock, flags);    
+       }
 }
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 static struct tty_operations xencons_ops = {
-    .open = xencons_open,
-    .close = xencons_close,
-    .write = xencons_write,
-    .write_room = xencons_write_room,
-    .put_char = xencons_put_char,
-    .flush_chars = xencons_flush_chars,
-    .chars_in_buffer = xencons_chars_in_buffer,
-    .send_xchar = xencons_send_xchar,
-    .flush_buffer = xencons_flush_buffer,
-    .throttle = xencons_throttle,
-    .unthrottle = xencons_unthrottle,
-    .wait_until_sent = xencons_wait_until_sent,
+       .open = xencons_open,
+       .close = xencons_close,
+       .write = xencons_write,
+       .write_room = xencons_write_room,
+       .put_char = xencons_put_char,
+       .flush_chars = xencons_flush_chars,
+       .chars_in_buffer = xencons_chars_in_buffer,
+       .send_xchar = xencons_send_xchar,
+       .flush_buffer = xencons_flush_buffer,
+       .throttle = xencons_throttle,
+       .unthrottle = xencons_unthrottle,
+       .wait_until_sent = xencons_wait_until_sent,
 };
 
 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
 static const char *xennullcon_startup(void)
 {
-    return NULL;
+       return NULL;
 }
 
 static int xennullcon_dummy(void)
 {
-    return 0;
+       return 0;
 }
 
 #define DUMMY (void *)xennullcon_dummy
@@ -672,122 +665,128 @@
  */
 
 const struct consw xennull_con = {
-    .owner =           THIS_MODULE,
-    .con_startup =     xennullcon_startup,
-    .con_init =                DUMMY,
-    .con_deinit =      DUMMY,
-    .con_clear =       DUMMY,
-    .con_putc =                DUMMY,
-    .con_putcs =       DUMMY,
-    .con_cursor =      DUMMY,
-    .con_scroll =      DUMMY,
-    .con_bmove =       DUMMY,
-    .con_switch =      DUMMY,
-    .con_blank =       DUMMY,
-    .con_font_set =    DUMMY,
-    .con_font_get =    DUMMY,
-    .con_font_default =        DUMMY,
-    .con_font_copy =   DUMMY,
-    .con_set_palette = DUMMY,
-    .con_scrolldelta = DUMMY,
+       .owner =                THIS_MODULE,
+       .con_startup =  xennullcon_startup,
+       .con_init =             DUMMY,
+       .con_deinit =   DUMMY,
+       .con_clear =    DUMMY,
+       .con_putc =             DUMMY,
+       .con_putcs =    DUMMY,
+       .con_cursor =   DUMMY,
+       .con_scroll =   DUMMY,
+       .con_bmove =    DUMMY,
+       .con_switch =   DUMMY,
+       .con_blank =    DUMMY,
+       .con_font_set = DUMMY,
+       .con_font_get = DUMMY,
+       .con_font_default =     DUMMY,
+       .con_font_copy =        DUMMY,
+       .con_set_palette =      DUMMY,
+       .con_scrolldelta =      DUMMY,
 };
 #endif
 #endif
 
 static int __init xencons_init(void)
 {
-    int rc;
-
-    if ( xc_mode == XC_OFF )
-        return 0;
-
-    xencons_ring_init();
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 
-                                      1 : MAX_NR_CONSOLES);
-    if ( xencons_driver == NULL )
-        return -ENOMEM;
+       int rc;
+
+       if (xc_mode == XC_OFF)
+               return 0;
+
+       xencons_ring_init();
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 
+                                         1 : MAX_NR_CONSOLES);
+       if (xencons_driver == NULL)
+               return -ENOMEM;
 #else
-    memset(&xencons_driver, 0, sizeof(struct tty_driver));
-    xencons_driver.magic       = TTY_DRIVER_MAGIC;
-    xencons_driver.refcount    = &xencons_refcount;
-    xencons_driver.table       = xencons_table;
-    xencons_driver.num         = (xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES;
-#endif
-
-    DRV(xencons_driver)->major           = TTY_MAJOR;
-    DRV(xencons_driver)->type            = TTY_DRIVER_TYPE_SERIAL;
-    DRV(xencons_driver)->subtype         = SERIAL_TYPE_NORMAL;
-    DRV(xencons_driver)->init_termios    = tty_std_termios;
-    DRV(xencons_driver)->flags           = 
-        TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_NO_DEVFS;
-    DRV(xencons_driver)->termios         = xencons_termios;
-    DRV(xencons_driver)->termios_locked  = xencons_termios_locked;
-
-    if ( xc_mode == XC_SERIAL )
-    {
-        DRV(xencons_driver)->name        = "ttyS";
-        DRV(xencons_driver)->minor_start = 64 + xc_num;
-        DRV(xencons_driver)->name_base   = 0 + xc_num;
-    }
-    else
-    {
-        DRV(xencons_driver)->name        = "tty";
-        DRV(xencons_driver)->minor_start = xc_num;
-        DRV(xencons_driver)->name_base   = xc_num;
-    }
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    tty_set_operations(xencons_driver, &xencons_ops);
+       memset(&xencons_driver, 0, sizeof(struct tty_driver));
+       xencons_driver.magic       = TTY_DRIVER_MAGIC;
+       xencons_driver.refcount    = &xencons_refcount;
+       xencons_driver.table       = xencons_table;
+       xencons_driver.num         =
+               (xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES;
+#endif
+
+       DRV(xencons_driver)->major           = TTY_MAJOR;
+       DRV(xencons_driver)->type            = TTY_DRIVER_TYPE_SERIAL;
+       DRV(xencons_driver)->subtype         = SERIAL_TYPE_NORMAL;
+       DRV(xencons_driver)->init_termios    = tty_std_termios;
+       DRV(xencons_driver)->flags           = 
+               TTY_DRIVER_REAL_RAW |
+               TTY_DRIVER_RESET_TERMIOS |
+               TTY_DRIVER_NO_DEVFS;
+       DRV(xencons_driver)->termios         = xencons_termios;
+       DRV(xencons_driver)->termios_locked  = xencons_termios_locked;
+
+       if (xc_mode == XC_SERIAL)
+       {
+               DRV(xencons_driver)->name        = "ttyS";
+               DRV(xencons_driver)->minor_start = 64 + xc_num;
+               DRV(xencons_driver)->name_base   = 0 + xc_num;
+       } else {
+               DRV(xencons_driver)->name        = "tty";
+               DRV(xencons_driver)->minor_start = xc_num;
+               DRV(xencons_driver)->name_base   = xc_num;
+       }
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       tty_set_operations(xencons_driver, &xencons_ops);
 #else
-    xencons_driver.open            = xencons_open;
-    xencons_driver.close           = xencons_close;
-    xencons_driver.write           = xencons_write;
-    xencons_driver.write_room      = xencons_write_room;
-    xencons_driver.put_char        = xencons_put_char;
-    xencons_driver.flush_chars     = xencons_flush_chars;
-    xencons_driver.chars_in_buffer = xencons_chars_in_buffer;
-    xencons_driver.send_xchar      = xencons_send_xchar;
-    xencons_driver.flush_buffer    = xencons_flush_buffer;
-    xencons_driver.throttle        = xencons_throttle;
-    xencons_driver.unthrottle      = xencons_unthrottle;
-    xencons_driver.wait_until_sent = xencons_wait_until_sent;
-#endif
-
-    if ( (rc = tty_register_driver(DRV(xencons_driver))) != 0 )
-    {
-        printk("WARNING: Failed to register Xen virtual "
-               "console driver as '%s%d'\n",
-               DRV(xencons_driver)->name, DRV(xencons_driver)->name_base);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-        put_tty_driver(xencons_driver);
-        xencons_driver = NULL;
-#endif
-        return rc;
-    }
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    tty_register_device(xencons_driver, 0, NULL);
-#endif
-
-    if ( xen_start_info->flags & SIF_INITDOMAIN )
-    {
-        xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE);
-        (void)request_irq(xencons_priv_irq,
-                          xencons_priv_interrupt, 0, "console", NULL);
-    }
-    else
-    {
-       
-       xencons_ring_register_receiver(xencons_rx);
-    }
-
-    printk("Xen virtual console successfully installed as %s%d\n",
-           DRV(xencons_driver)->name,
-           DRV(xencons_driver)->name_base );
+       xencons_driver.open            = xencons_open;
+       xencons_driver.close           = xencons_close;
+       xencons_driver.write           = xencons_write;
+       xencons_driver.write_room      = xencons_write_room;
+       xencons_driver.put_char        = xencons_put_char;
+       xencons_driver.flush_chars     = xencons_flush_chars;
+       xencons_driver.chars_in_buffer = xencons_chars_in_buffer;
+       xencons_driver.send_xchar      = xencons_send_xchar;
+       xencons_driver.flush_buffer    = xencons_flush_buffer;
+       xencons_driver.throttle        = xencons_throttle;
+       xencons_driver.unthrottle      = xencons_unthrottle;
+       xencons_driver.wait_until_sent = xencons_wait_until_sent;
+#endif
+
+       if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) {
+               printk("WARNING: Failed to register Xen virtual "
+                      "console driver as '%s%d'\n",
+                      DRV(xencons_driver)->name, 
DRV(xencons_driver)->name_base);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+               put_tty_driver(xencons_driver);
+               xencons_driver = NULL;
+#endif
+               return rc;
+       }
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+       tty_register_device(xencons_driver, 0, NULL);
+#endif
+
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE);
+               (void)request_irq(xencons_priv_irq,
+                                 xencons_priv_interrupt, 0, "console", NULL);
+       } else {
+               xencons_ring_register_receiver(xencons_rx);
+       }
+
+       printk("Xen virtual console successfully installed as %s%d\n",
+              DRV(xencons_driver)->name,
+              DRV(xencons_driver)->name_base );
     
-    return 0;
+       return 0;
 }
 
 module_init(xencons_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c
--- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c   Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c   Thu Sep 22 
17:42:01 2005
@@ -36,13 +36,12 @@
 
 static inline struct ring_head *outring(void)
 {
-       return machine_to_virt(xen_start_info->console_mfn << PAGE_SHIFT);
+       return mfn_to_virt(xen_start_info->console_mfn);
 }
 
 static inline struct ring_head *inring(void)
 {
-       return machine_to_virt(xen_start_info->console_mfn << PAGE_SHIFT)
-               + PAGE_SIZE/2;
+       return mfn_to_virt(xen_start_info->console_mfn) + PAGE_SIZE/2;
 }
 
 
@@ -126,3 +125,13 @@
 
        (void)xencons_ring_init();
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h
--- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h   Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h   Thu Sep 22 
17:42:01 2005
@@ -3,12 +3,21 @@
 
 asmlinkage int xprintk(const char *fmt, ...);
 
-
 int xencons_ring_init(void);
 int xencons_ring_send(const char *data, unsigned len);
 
-typedef void (xencons_receiver_func)(char *buf, unsigned len, 
-                                     struct pt_regs *regs);
+typedef void (xencons_receiver_func)(
+       char *buf, unsigned len, struct pt_regs *regs);
 void xencons_ring_register_receiver(xencons_receiver_func *f);
 
 #endif /* _XENCONS_RING_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c  Thu Sep 22 17:42:01 2005
@@ -1,9 +1,9 @@
 /******************************************************************************
  * evtchn.c
  * 
- * Xenolinux driver for receiving and demuxing event-channel signals.
- * 
- * Copyright (c) 2004, K A Fraser
+ * Driver for receiving and demuxing event-channel signals.
+ * 
+ * Copyright (c) 2004-2005, K A Fraser
  * Multi-process extensions Copyright (c) 2004, Steven Smith
  * 
  * This file may be distributed separately from the Linux kernel, or
@@ -46,29 +46,18 @@
 #include <linux/init.h>
 #define XEN_EVTCHN_MASK_OPS
 #include <asm-xen/evtchn.h>
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#include <linux/devfs_fs_kernel.h>
-#define OLD_DEVFS
-#else
 #include <linux/gfp.h>
-#endif
-
-#ifdef OLD_DEVFS
-/* NB. This must be shared amongst drivers if more things go in /dev/xen */
-static devfs_handle_t xen_dev_dir;
-#endif
 
 struct per_user_data {
-    /* Notification ring, accessed via /dev/xen/evtchn. */
-#   define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
-#   define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
-    u16 *ring;
-    unsigned int ring_cons, ring_prod, ring_overflow;
-
-    /* Processes wait on this queue when ring is empty. */
-    wait_queue_head_t evtchn_wait;
-    struct fasync_struct *evtchn_async_queue;
+       /* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+       u16 *ring;
+       unsigned int ring_cons, ring_prod, ring_overflow;
+
+       /* Processes wait on this queue when ring is empty. */
+       wait_queue_head_t evtchn_wait;
+       struct fasync_struct *evtchn_async_queue;
 };
 
 /* Who's bound to each port? */
@@ -77,356 +66,310 @@
 
 void evtchn_device_upcall(int port)
 {
-    struct per_user_data *u;
-
-    spin_lock(&port_user_lock);
-
-    mask_evtchn(port);
-    clear_evtchn(port);
-
-    if ( (u = port_user[port]) != NULL )
-    {
-        if ( (u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE )
-        {
-            u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port;
-            if ( u->ring_cons == u->ring_prod++ )
-            {
-                wake_up_interruptible(&u->evtchn_wait);
-                kill_fasync(&u->evtchn_async_queue, SIGIO, POLL_IN);
-            }
-        }
-        else
-        {
-            u->ring_overflow = 1;
-        }
-    }
-
-    spin_unlock(&port_user_lock);
+       struct per_user_data *u;
+
+       spin_lock(&port_user_lock);
+
+       mask_evtchn(port);
+       clear_evtchn(port);
+
+       if ((u = port_user[port]) != NULL) {
+               if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+                       u->ring[EVTCHN_RING_MASK(u->ring_prod)] = (u16)port;
+                       if (u->ring_cons == u->ring_prod++) {
+                               wake_up_interruptible(&u->evtchn_wait);
+                               kill_fasync(&u->evtchn_async_queue, SIGIO, 
POLL_IN);
+                       }
+               } else {
+                       u->ring_overflow = 1;
+               }
+       }
+
+       spin_unlock(&port_user_lock);
 }
 
 static ssize_t evtchn_read(struct file *file, char *buf,
                            size_t count, loff_t *ppos)
 {
-    int rc;
-    unsigned int c, p, bytes1 = 0, bytes2 = 0;
-    DECLARE_WAITQUEUE(wait, current);
-    struct per_user_data *u = file->private_data;
-
-    add_wait_queue(&u->evtchn_wait, &wait);
-
-    count &= ~1; /* even number of bytes */
-
-    if ( count == 0 )
-    {
-        rc = 0;
-        goto out;
-    }
-
-    if ( count > PAGE_SIZE )
-        count = PAGE_SIZE;
-
-    for ( ; ; )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-
-        if ( (c = u->ring_cons) != (p = u->ring_prod) )
-            break;
-
-        if ( u->ring_overflow )
-        {
-            rc = -EFBIG;
-            goto out;
-        }
-
-        if ( file->f_flags & O_NONBLOCK )
-        {
-            rc = -EAGAIN;
-            goto out;
-        }
-
-        if ( signal_pending(current) )
-        {
-            rc = -ERESTARTSYS;
-            goto out;
-        }
-
-        schedule();
-    }
-
-    /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
-    if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 )
-    {
-        bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(u16);
-        bytes2 = EVTCHN_RING_MASK(p) * sizeof(u16);
-    }
-    else
-    {
-        bytes1 = (p - c) * sizeof(u16);
-        bytes2 = 0;
-    }
-
-    /* Truncate chunks according to caller's maximum byte count. */
-    if ( bytes1 > count )
-    {
-        bytes1 = count;
-        bytes2 = 0;
-    }
-    else if ( (bytes1 + bytes2) > count )
-    {
-        bytes2 = count - bytes1;
-    }
-
-    if ( copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
-         ((bytes2 != 0) && copy_to_user(&buf[bytes1], &u->ring[0], bytes2)) )
-    {
-        rc = -EFAULT;
-        goto out;
-    }
-
-    u->ring_cons += (bytes1 + bytes2) / sizeof(u16);
-
-    rc = bytes1 + bytes2;
+       int rc;
+       unsigned int c, p, bytes1 = 0, bytes2 = 0;
+       DECLARE_WAITQUEUE(wait, current);
+       struct per_user_data *u = file->private_data;
+
+       add_wait_queue(&u->evtchn_wait, &wait);
+
+       count &= ~1; /* even number of bytes */
+
+       if (count == 0) {
+               rc = 0;
+               goto out;
+       }
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if ((c = u->ring_cons) != (p = u->ring_prod))
+                       break;
+
+               if (u->ring_overflow) {
+                       rc = -EFBIG;
+                       goto out;
+               }
+
+               if (file->f_flags & O_NONBLOCK) {
+                       rc = -EAGAIN;
+                       goto out;
+               }
+
+               if (signal_pending(current)) {
+                       rc = -ERESTARTSYS;
+                       goto out;
+               }
+
+               schedule();
+       }
+
+       /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+       if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+               bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+                       sizeof(u16);
+               bytes2 = EVTCHN_RING_MASK(p) * sizeof(u16);
+       } else {
+               bytes1 = (p - c) * sizeof(u16);
+               bytes2 = 0;
+       }
+
+       /* Truncate chunks according to caller's maximum byte count. */
+       if (bytes1 > count) {
+               bytes1 = count;
+               bytes2 = 0;
+       } else if ((bytes1 + bytes2) > count) {
+               bytes2 = count - bytes1;
+       }
+
+       if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+           ((bytes2 != 0) &&
+            copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       u->ring_cons += (bytes1 + bytes2) / sizeof(u16);
+
+       rc = bytes1 + bytes2;
 
  out:
-    __set_current_state(TASK_RUNNING);
-    remove_wait_queue(&u->evtchn_wait, &wait);
-    return rc;
+       __set_current_state(TASK_RUNNING);
+       remove_wait_queue(&u->evtchn_wait, &wait);
+       return rc;
 }
 
 static ssize_t evtchn_write(struct file *file, const char *buf,
                             size_t count, loff_t *ppos)
 {
-    int  rc, i;
-    u16 *kbuf = (u16 *)__get_free_page(GFP_KERNEL);
-    struct per_user_data *u = file->private_data;
-
-    if ( kbuf == NULL )
-        return -ENOMEM;
-
-    count &= ~1; /* even number of bytes */
-
-    if ( count == 0 )
-    {
-        rc = 0;
-        goto out;
-    }
-
-    if ( count > PAGE_SIZE )
-        count = PAGE_SIZE;
-
-    if ( copy_from_user(kbuf, buf, count) != 0 )
-    {
-        rc = -EFAULT;
-        goto out;
-    }
-
-    spin_lock_irq(&port_user_lock);
-    for ( i = 0; i < (count/2); i++ )
-        if ( (kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u) )
-            unmask_evtchn(kbuf[i]);
-    spin_unlock_irq(&port_user_lock);
-
-    rc = count;
+       int  rc, i;
+       u16 *kbuf = (u16 *)__get_free_page(GFP_KERNEL);
+       struct per_user_data *u = file->private_data;
+
+       if (kbuf == NULL)
+               return -ENOMEM;
+
+       count &= ~1; /* even number of bytes */
+
+       if (count == 0) {
+               rc = 0;
+               goto out;
+       }
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       if (copy_from_user(kbuf, buf, count) != 0) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       spin_lock_irq(&port_user_lock);
+       for (i = 0; i < (count/2); i++)
+               if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+                       unmask_evtchn(kbuf[i]);
+       spin_unlock_irq(&port_user_lock);
+
+       rc = count;
 
  out:
-    free_page((unsigned long)kbuf);
-    return rc;
+       free_page((unsigned long)kbuf);
+       return rc;
 }
 
 static int evtchn_ioctl(struct inode *inode, struct file *file,
                         unsigned int cmd, unsigned long arg)
 {
-    int rc = 0;
-    struct per_user_data *u = file->private_data;
-
-    spin_lock_irq(&port_user_lock);
+       int rc = 0;
+       struct per_user_data *u = file->private_data;
+
+       spin_lock_irq(&port_user_lock);
     
-    switch ( cmd )
-    {
-    case EVTCHN_RESET:
-        /* Initialise the ring to empty. Clear errors. */
-        u->ring_cons = u->ring_prod = u->ring_overflow = 0;
-        break;
-
-    case EVTCHN_BIND:
-        if ( arg >= NR_EVENT_CHANNELS )
-        {
-            rc = -EINVAL;
-        }
-        else if ( port_user[arg] != NULL )
-        {
-            rc = -EISCONN;
-        }
-        else
-        {
-            port_user[arg] = u;
-            unmask_evtchn(arg);
-        }
-        break;
-
-    case EVTCHN_UNBIND:
-        if ( arg >= NR_EVENT_CHANNELS )
-        {
-            rc = -EINVAL;
-        }
-        else if ( port_user[arg] != u )
-        {
-            rc = -ENOTCONN;
-        }
-        else
-        {
-            port_user[arg] = NULL;
-            mask_evtchn(arg);
-        }
-        break;
-
-    default:
-        rc = -ENOSYS;
-        break;
-    }
-
-    spin_unlock_irq(&port_user_lock);   
-
-    return rc;
+       switch (cmd) {
+       case EVTCHN_RESET:
+               /* Initialise the ring to empty. Clear errors. */
+               u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+               break;
+
+       case EVTCHN_BIND:
+               if (arg >= NR_EVENT_CHANNELS) {
+                       rc = -EINVAL;
+               } else if (port_user[arg] != NULL) {
+                       rc = -EISCONN;
+               } else {
+                       port_user[arg] = u;
+                       unmask_evtchn(arg);
+               }
+               break;
+
+       case EVTCHN_UNBIND:
+               if (arg >= NR_EVENT_CHANNELS) {
+                       rc = -EINVAL;
+               } else if (port_user[arg] != u) {
+                       rc = -ENOTCONN;
+               } else {
+                       port_user[arg] = NULL;
+                       mask_evtchn(arg);
+               }
+               break;
+
+       default:
+               rc = -ENOSYS;
+               break;
+       }
+
+       spin_unlock_irq(&port_user_lock);   
+
+       return rc;
 }
 
 static unsigned int evtchn_poll(struct file *file, poll_table *wait)
 {
-    unsigned int mask = POLLOUT | POLLWRNORM;
-    struct per_user_data *u = file->private_data;
-
-    poll_wait(file, &u->evtchn_wait, wait);
-    if ( u->ring_cons != u->ring_prod )
-        mask |= POLLIN | POLLRDNORM;
-    if ( u->ring_overflow )
-        mask = POLLERR;
-    return mask;
+       unsigned int mask = POLLOUT | POLLWRNORM;
+       struct per_user_data *u = file->private_data;
+
+       poll_wait(file, &u->evtchn_wait, wait);
+       if (u->ring_cons != u->ring_prod)
+               mask |= POLLIN | POLLRDNORM;
+       if (u->ring_overflow)
+               mask = POLLERR;
+       return mask;
 }
 
 static int evtchn_fasync(int fd, struct file *filp, int on)
 {
-    struct per_user_data *u = filp->private_data;
-    return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+       struct per_user_data *u = filp->private_data;
+       return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
 }
 
 static int evtchn_open(struct inode *inode, struct file *filp)
 {
-    struct per_user_data *u;
-
-    if ( (u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL )
-        return -ENOMEM;
-
-    memset(u, 0, sizeof(*u));
-    init_waitqueue_head(&u->evtchn_wait);
-
-    if ( (u->ring = (u16 *)__get_free_page(GFP_KERNEL)) == NULL )
-    {
-        kfree(u);
-        return -ENOMEM;
-    }
-
-    filp->private_data = u;
-
-    return 0;
+       struct per_user_data *u;
+
+       if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL)
+               return -ENOMEM;
+
+       memset(u, 0, sizeof(*u));
+       init_waitqueue_head(&u->evtchn_wait);
+
+       if ((u->ring = (u16 *)__get_free_page(GFP_KERNEL)) == NULL)
+       {
+               kfree(u);
+               return -ENOMEM;
+       }
+
+       filp->private_data = u;
+
+       return 0;
 }
 
 static int evtchn_release(struct inode *inode, struct file *filp)
 {
-    int i;
-    struct per_user_data *u = filp->private_data;
-
-    spin_lock_irq(&port_user_lock);
-
-    free_page((unsigned long)u->ring);
-
-    for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
-    {
-        if ( port_user[i] == u )
-        {
-            port_user[i] = NULL;
-            mask_evtchn(i);
-        }
-    }
-
-    spin_unlock_irq(&port_user_lock);
-
-    kfree(u);
-
-    return 0;
+       int i;
+       struct per_user_data *u = filp->private_data;
+
+       spin_lock_irq(&port_user_lock);
+
+       free_page((unsigned long)u->ring);
+
+       for (i = 0; i < NR_EVENT_CHANNELS; i++)
+       {
+               if (port_user[i] == u)
+               {
+                       port_user[i] = NULL;
+                       mask_evtchn(i);
+               }
+       }
+
+       spin_unlock_irq(&port_user_lock);
+
+       kfree(u);
+
+       return 0;
 }
 
 static struct file_operations evtchn_fops = {
-    .owner   = THIS_MODULE,
-    .read    = evtchn_read,
-    .write   = evtchn_write,
-    .ioctl   = evtchn_ioctl,
-    .poll    = evtchn_poll,
-    .fasync  = evtchn_fasync,
-    .open    = evtchn_open,
-    .release = evtchn_release,
+       .owner   = THIS_MODULE,
+       .read    = evtchn_read,
+       .write   = evtchn_write,
+       .ioctl   = evtchn_ioctl,
+       .poll    = evtchn_poll,
+       .fasync  = evtchn_fasync,
+       .open    = evtchn_open,
+       .release = evtchn_release,
 };
 
 static struct miscdevice evtchn_miscdev = {
-    .minor        = EVTCHN_MINOR,
-    .name         = "evtchn",
-    .fops         = &evtchn_fops,
+       .minor        = EVTCHN_MINOR,
+       .name         = "evtchn",
+       .fops         = &evtchn_fops,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    .devfs_name   = "misc/evtchn",
+       .devfs_name   = "misc/evtchn",
 #endif
 };
 
 static int __init evtchn_init(void)
 {
-#ifdef OLD_DEVFS
-    devfs_handle_t symlink_handle;
-    int            pos;
-    char           link_dest[64];
-#endif
-    int err;
-
-    spin_lock_init(&port_user_lock);
-    memset(port_user, 0, sizeof(port_user));
-
-    /* (DEVFS) create '/dev/misc/evtchn'. */
-    err = misc_register(&evtchn_miscdev);
-    if ( err != 0 )
-    {
-        printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
-        return err;
-    }
-
-#ifdef OLD_DEVFS
-    /* (DEVFS) create directory '/dev/xen'. */
-    xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
-
-    /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
-    pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
-                              &link_dest[3], 
-                              sizeof(link_dest) - 3);
-    if ( pos >= 0 )
-        strncpy(&link_dest[pos], "../", 3);
-
-    /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
-    (void)devfs_mk_symlink(xen_dev_dir, 
-                           "evtchn", 
-                           DEVFS_FL_DEFAULT, 
-                           &link_dest[pos],
-                           &symlink_handle, 
-                           NULL);
-
-    /* (DEVFS) automatically destroy the symlink with its destination. */
-    devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
-#endif
-
-    printk("Event-channel device installed.\n");
-
-    return 0;
+       int err;
+
+       spin_lock_init(&port_user_lock);
+       memset(port_user, 0, sizeof(port_user));
+
+       /* (DEVFS) create '/dev/misc/evtchn'. */
+       err = misc_register(&evtchn_miscdev);
+       if (err != 0)
+       {
+               printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+               return err;
+       }
+
+       printk("Event-channel device installed.\n");
+
+       return 0;
 }
 
 static void evtchn_cleanup(void)
 {
-    misc_deregister(&evtchn_miscdev);
+       misc_deregister(&evtchn_miscdev);
 }
 
 module_init(evtchn_init);
 module_exit(evtchn_cleanup);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Sep 22 17:42:01 2005
@@ -18,16 +18,11 @@
 #include <asm-xen/xen-public/io/netif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
+#include <asm-xen/driver_util.h>
 
 #define GRANT_INVALID_REF (0xFFFF)
-
-#endif
-
-
 
 #if 0
 #define ASSERT(_p) \
@@ -44,74 +39,64 @@
 #define WPRINTK(fmt, args...) \
     printk(KERN_WARNING "xen_net: " fmt, ##args)
 
+typedef struct netif_st {
+       /* Unique identifier for this interface. */
+       domid_t          domid;
+       unsigned int     handle;
 
-typedef struct netif_st {
-    /* Unique identifier for this interface. */
-    domid_t          domid;
-    unsigned int     handle;
+       u8               fe_dev_addr[6];
 
-    u8               fe_dev_addr[6];
+       /* Physical parameters of the comms window. */
+       u16              tx_shmem_handle;
+       grant_ref_t      tx_shmem_ref; 
+       u16              rx_shmem_handle;
+       grant_ref_t      rx_shmem_ref; 
+       unsigned int     evtchn;
+       unsigned int     remote_evtchn;
 
-    /* Physical parameters of the comms window. */
-    unsigned long    tx_shmem_frame;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    u16              tx_shmem_handle;
-    unsigned long    tx_shmem_vaddr; 
-    grant_ref_t      tx_shmem_ref; 
-#endif
-    unsigned long    rx_shmem_frame;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    u16              rx_shmem_handle;
-    unsigned long    rx_shmem_vaddr; 
-    grant_ref_t      rx_shmem_ref; 
-#endif
-    unsigned int     evtchn;
-    unsigned int     remote_evtchn;
+       /* The shared rings and indexes. */
+       netif_tx_interface_t *tx;
+       netif_rx_interface_t *rx;
+       struct vm_struct *comms_area;
 
-    /* The shared rings and indexes. */
-    netif_tx_interface_t *tx;
-    netif_rx_interface_t *rx;
+       /* Private indexes into shared ring. */
+       NETIF_RING_IDX rx_req_cons;
+       NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+       NETIF_RING_IDX rx_resp_prod_copy;
+       NETIF_RING_IDX tx_req_cons;
+       NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
 
-    /* Private indexes into shared ring. */
-    NETIF_RING_IDX rx_req_cons;
-    NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
-#endif
-    NETIF_RING_IDX tx_req_cons;
-    NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
+       /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+       unsigned long   credit_bytes;
+       unsigned long   credit_usec;
+       unsigned long   remaining_credit;
+       struct timer_list credit_timeout;
 
-    /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
-    unsigned long   credit_bytes;
-    unsigned long   credit_usec;
-    unsigned long   remaining_credit;
-    struct timer_list credit_timeout;
+       /* Miscellaneous private stuff. */
+       enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+       int active;
+       struct list_head list;  /* scheduling list */
+       atomic_t         refcnt;
+       struct net_device *dev;
+       struct net_device_stats stats;
 
-    /* Miscellaneous private stuff. */
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    int active;
-    struct list_head list;  /* scheduling list */
-    atomic_t         refcnt;
-    struct net_device *dev;
-    struct net_device_stats stats;
-
-    struct work_struct free_work;
+       struct work_struct free_work;
 } netif_t;
 
 void netif_creditlimit(netif_t *netif);
 int  netif_disconnect(netif_t *netif);
 
 netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
-void free_netif_callback(netif_t *netif);
+void free_netif(netif_t *netif);
 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
              unsigned long rx_ring_ref, unsigned int evtchn);
 
 #define netif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define netif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            free_netif_callback(_b);              \
-    } while (0)
+#define netif_put(_b)                                          \
+       do {                                                    \
+               if ( atomic_dec_and_test(&(_b)->refcnt) )       \
+                       free_netif(_b);                         \
+       } while (0)
 
 void netif_xenbus_init(void);
 
@@ -123,3 +108,13 @@
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
 #endif /* __NETIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c      Thu Sep 22 
17:42:01 2005
@@ -11,296 +11,293 @@
 
 static void __netif_up(netif_t *netif)
 {
-    struct net_device *dev = netif->dev;
-    spin_lock_bh(&dev->xmit_lock);
-    netif->active = 1;
-    spin_unlock_bh(&dev->xmit_lock);
-    (void)bind_evtchn_to_irqhandler(
-        netif->evtchn, netif_be_int, 0, dev->name, netif);
-    netif_schedule_work(netif);
+       struct net_device *dev = netif->dev;
+       spin_lock_bh(&dev->xmit_lock);
+       netif->active = 1;
+       spin_unlock_bh(&dev->xmit_lock);
+       (void)bind_evtchn_to_irqhandler(
+               netif->evtchn, netif_be_int, 0, dev->name, netif);
+       netif_schedule_work(netif);
 }
 
 static void __netif_down(netif_t *netif)
 {
-    struct net_device *dev = netif->dev;
-    spin_lock_bh(&dev->xmit_lock);
-    netif->active = 0;
-    spin_unlock_bh(&dev->xmit_lock);
-    unbind_evtchn_from_irqhandler(netif->evtchn, netif);
-    netif_deschedule_work(netif);
+       struct net_device *dev = netif->dev;
+       spin_lock_bh(&dev->xmit_lock);
+       netif->active = 0;
+       spin_unlock_bh(&dev->xmit_lock);
+       unbind_evtchn_from_irqhandler(netif->evtchn, netif);
+       netif_deschedule_work(netif);
 }
 
 static int net_open(struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-    if (netif->status == CONNECTED)
-        __netif_up(netif);
-    netif_start_queue(dev);
-    return 0;
+       netif_t *netif = netdev_priv(dev);
+       if (netif->status == CONNECTED)
+               __netif_up(netif);
+       netif_start_queue(dev);
+       return 0;
 }
 
 static int net_close(struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-    netif_stop_queue(dev);
-    if (netif->status == CONNECTED)
-        __netif_down(netif);
-    return 0;
+       netif_t *netif = netdev_priv(dev);
+       netif_stop_queue(dev);
+       if (netif->status == CONNECTED)
+               __netif_down(netif);
+       return 0;
 }
 
 netif_t *alloc_netif(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
 {
-    int err = 0, i;
-    struct net_device *dev;
-    netif_t *netif;
-    char name[IFNAMSIZ] = {};
-
-    snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
-    dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
-    if (dev == NULL) {
-        DPRINTK("Could not create netif: out of memory\n");
-        return NULL;
-    }
-
-    netif = netdev_priv(dev);
-    memset(netif, 0, sizeof(*netif));
-    netif->domid  = domid;
-    netif->handle = handle;
-    netif->status = DISCONNECTED;
-    atomic_set(&netif->refcnt, 0);
-    netif->dev = dev;
-
-    netif->credit_bytes = netif->remaining_credit = ~0UL;
-    netif->credit_usec  = 0UL;
-    init_timer(&netif->credit_timeout);
-
-    dev->hard_start_xmit = netif_be_start_xmit;
-    dev->get_stats       = netif_be_get_stats;
-    dev->open            = net_open;
-    dev->stop            = net_close;
-    dev->features        = NETIF_F_NO_CSUM;
-
-    /* Disable queuing. */
-    dev->tx_queue_len = 0;
-
-    for (i = 0; i < ETH_ALEN; i++)
-       if (be_mac[i] != 0)
-           break;
-    if (i == ETH_ALEN) {
-        /*
-         * Initialise a dummy MAC address. We choose the numerically largest
-         * non-broadcast address to prevent the address getting stolen by an
-         * Ethernet bridge for STP purposes. (FE:FF:FF:FF:FF:FF)
-         */ 
-        memset(dev->dev_addr, 0xFF, ETH_ALEN);
-        dev->dev_addr[0] &= ~0x01;
-    } else
-        memcpy(dev->dev_addr, be_mac, ETH_ALEN);
-
-    rtnl_lock();
-    err = register_netdevice(dev);
-    rtnl_unlock();
-    if (err) {
-        DPRINTK("Could not register new net device %s: err=%d\n",
-                dev->name, err);
-        free_netdev(dev);
-        return NULL;
-    }
-
-    DPRINTK("Successfully created netif\n");
-    return netif;
-}
-
-static int map_frontend_pages(netif_t *netif, unsigned long localaddr,
-                              unsigned long tx_ring_ref, 
-                              unsigned long rx_ring_ref)
-{
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    struct gnttab_map_grant_ref op;
-
-    /* Map: Use the Grant table reference */
-    op.host_addr = localaddr;
-    op.flags     = GNTMAP_host_map;
-    op.ref       = tx_ring_ref;
-    op.dom       = netif->domid;
+       int err = 0, i;
+       struct net_device *dev;
+       netif_t *netif;
+       char name[IFNAMSIZ] = {};
+
+       snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+       dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
+       if (dev == NULL) {
+               DPRINTK("Could not create netif: out of memory\n");
+               return NULL;
+       }
+
+       netif = netdev_priv(dev);
+       memset(netif, 0, sizeof(*netif));
+       netif->domid  = domid;
+       netif->handle = handle;
+       netif->status = DISCONNECTED;
+       atomic_set(&netif->refcnt, 0);
+       netif->dev = dev;
+
+       netif->credit_bytes = netif->remaining_credit = ~0UL;
+       netif->credit_usec  = 0UL;
+       init_timer(&netif->credit_timeout);
+
+       dev->hard_start_xmit = netif_be_start_xmit;
+       dev->get_stats       = netif_be_get_stats;
+       dev->open            = net_open;
+       dev->stop            = net_close;
+       dev->features        = NETIF_F_NO_CSUM;
+
+       /* Disable queuing. */
+       dev->tx_queue_len = 0;
+
+       for (i = 0; i < ETH_ALEN; i++)
+               if (be_mac[i] != 0)
+                       break;
+       if (i == ETH_ALEN) {
+               /*
+                * Initialise a dummy MAC address. We choose the numerically
+                * largest non-broadcast address to prevent the address getting
+                * stolen by an Ethernet bridge for STP purposes.
+                 * (FE:FF:FF:FF:FF:FF) 
+                */ 
+               memset(dev->dev_addr, 0xFF, ETH_ALEN);
+               dev->dev_addr[0] &= ~0x01;
+       } else
+               memcpy(dev->dev_addr, be_mac, ETH_ALEN);
+
+       rtnl_lock();
+       err = register_netdevice(dev);
+       rtnl_unlock();
+       if (err) {
+               DPRINTK("Could not register new net device %s: err=%d\n",
+                       dev->name, err);
+               free_netdev(dev);
+               return NULL;
+       }
+
+       DPRINTK("Successfully created netif\n");
+       return netif;
+}
+
+static int map_frontend_pages(
+       netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+{
+       struct gnttab_map_grant_ref op;
+
+       op.host_addr = (unsigned long)netif->comms_area->addr;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = tx_ring_ref;
+       op.dom       = netif->domid;
     
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-    if (op.handle < 0) { 
-        DPRINTK(" Grant table operation failure mapping tx_ring_ref!\n");
-        return op.handle;
-    }
-
-    netif->tx_shmem_ref    = tx_ring_ref;
-    netif->tx_shmem_handle = op.handle;
-    netif->tx_shmem_vaddr  = localaddr;
-
-    /* Map: Use the Grant table reference */
-    op.host_addr = localaddr + PAGE_SIZE;
-    op.flags     = GNTMAP_host_map;
-    op.ref       = rx_ring_ref;
-    op.dom       = netif->domid;
-
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-    if (op.handle < 0) { 
-        DPRINTK(" Grant table operation failure mapping rx_ring_ref!\n");
-        return op.handle;
-    }
-
-    netif->rx_shmem_ref    = rx_ring_ref;
-    netif->rx_shmem_handle = op.handle;
-    netif->rx_shmem_vaddr  = localaddr + PAGE_SIZE;
-
-#else
-    pgprot_t      prot = __pgprot(_KERNPG_TABLE);
-    int           err;
-
-    err = direct_remap_pfn_range(&init_mm, localaddr,
-                                 tx_ring_ref, PAGE_SIZE,
-                                 prot, netif->domid); 
-    
-    err |= direct_remap_pfn_range(&init_mm, localaddr + PAGE_SIZE,
-                                 rx_ring_ref, PAGE_SIZE,
-                                 prot, netif->domid);
-
-    if (err)
-       return err;
-#endif
-
-    return 0;
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
+
+       if (op.handle < 0) { 
+               DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
+               return op.handle;
+       }
+
+       netif->tx_shmem_ref    = tx_ring_ref;
+       netif->tx_shmem_handle = op.handle;
+
+       op.host_addr = (unsigned long)netif->comms_area->addr + PAGE_SIZE;
+       op.flags     = GNTMAP_host_map;
+       op.ref       = rx_ring_ref;
+       op.dom       = netif->domid;
+
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
+
+       if (op.handle < 0) { 
+               DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
+               return op.handle;
+       }
+
+       netif->rx_shmem_ref    = rx_ring_ref;
+       netif->rx_shmem_handle = op.handle;
+
+       return 0;
 }
 
 static void unmap_frontend_pages(netif_t *netif)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    struct gnttab_unmap_grant_ref op;
-
-    op.host_addr    = netif->tx_shmem_vaddr;
-    op.handle       = netif->tx_shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-
-    op.host_addr    = netif->rx_shmem_vaddr;
-    op.handle       = netif->rx_shmem_handle;
-    op.dev_bus_addr = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-#endif
-
-    return; 
+       struct gnttab_unmap_grant_ref op;
+
+       op.host_addr    = (unsigned long)netif->comms_area->addr;
+       op.handle       = netif->tx_shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
+
+       op.host_addr    = (unsigned long)netif->comms_area->addr + PAGE_SIZE;
+       op.handle       = netif->rx_shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(netif->comms_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(netif->comms_area);
 }
 
 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
              unsigned long rx_ring_ref, unsigned int evtchn)
 {
-    struct vm_struct *vma;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
-
-    vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP);
-    if (vma == NULL)
-        return -ENOMEM;
-
-    err = map_frontend_pages(netif, (unsigned long)vma->addr, tx_ring_ref,
-                             rx_ring_ref);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
-
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = netif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_pages(netif);
-       vfree(vma->addr);
-       return err;
-    }
-
-    netif->evtchn = op.u.bind_interdomain.port1;
-    netif->remote_evtchn = evtchn;
-
-    netif->tx = (netif_tx_interface_t *)vma->addr;
-    netif->rx = (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE);
-    netif->tx->resp_prod = netif->rx->resp_prod = 0;
-    netif_get(netif);
-    wmb(); /* Other CPUs see new state before interface is started. */
-
-    rtnl_lock();
-    netif->status = CONNECTED;
-    wmb();
-    if (netif_running(netif->dev))
-        __netif_up(netif);
-    rtnl_unlock();
-
-    return 0;
-}
-
-static void free_netif(void *arg)
-{
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    netif_t *netif = (netif_t *)arg;
-
-    /*
-     * These can't be done in netif_disconnect() because at that point there
-     * may be outstanding requests in the network stack whose asynchronous
-     * responses must still be notified to the remote driver.
-     */
-
-    op.u.close.port = netif->evtchn;
-    op.u.close.dom = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = netif->remote_evtchn;
-    op.u.close.dom = netif->domid;
-    HYPERVISOR_event_channel_op(&op);
-
-    unregister_netdev(netif->dev);
-
-    if (netif->tx) {
-       unmap_frontend_pages(netif);
-       vfree(netif->tx); /* Frees netif->rx as well. */
-    }
-
-    free_netdev(netif->dev);
-}
-
-void free_netif_callback(netif_t *netif)
-{
-    INIT_WORK(&netif->free_work, free_netif, (void *)netif);
-    schedule_work(&netif->free_work);
+       evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+       int err;
+
+       netif->comms_area = alloc_vm_area(2*PAGE_SIZE);
+       if (netif->comms_area == NULL)
+               return -ENOMEM;
+
+       err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+       if (err) {
+               free_vm_area(netif->comms_area);
+               return err;
+       }
+
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = netif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_pages(netif);
+               free_vm_area(netif->comms_area);
+               return err;
+       }
+
+       netif->evtchn = op.u.bind_interdomain.port1;
+       netif->remote_evtchn = evtchn;
+
+       netif->tx = (netif_tx_interface_t *)netif->comms_area->addr;
+       netif->rx = (netif_rx_interface_t *)
+               ((char *)netif->comms_area->addr + PAGE_SIZE);
+       netif->tx->resp_prod = netif->rx->resp_prod = 0;
+       netif_get(netif);
+       wmb(); /* Other CPUs see new state before interface is started. */
+
+       rtnl_lock();
+       netif->status = CONNECTED;
+       wmb();
+       if (netif_running(netif->dev))
+               __netif_up(netif);
+       rtnl_unlock();
+
+       return 0;
+}
+
+static void free_netif_callback(void *arg)
+{
+       evtchn_op_t op = { .cmd = EVTCHNOP_close };
+       netif_t *netif = (netif_t *)arg;
+
+       /*
+        * These can't be done in netif_disconnect() because at that point
+        * there may be outstanding requests in the network stack whose
+        * asynchronous responses must still be notified to the remote driver.
+        */
+
+       op.u.close.port = netif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = netif->remote_evtchn;
+       op.u.close.dom = netif->domid;
+       HYPERVISOR_event_channel_op(&op);
+
+       unregister_netdev(netif->dev);
+
+       if (netif->tx) {
+               unmap_frontend_pages(netif);
+               free_vm_area(netif->comms_area);
+       }
+
+       free_netdev(netif->dev);
+}
+
+void free_netif(netif_t *netif)
+{
+       INIT_WORK(&netif->free_work, free_netif_callback, (void *)netif);
+       schedule_work(&netif->free_work);
 }
 
 void netif_creditlimit(netif_t *netif)
 {
 #if 0
-    /* Set the credit limit (reset remaining credit to new limit). */
-    netif->credit_bytes = netif->remaining_credit = creditlimit->credit_bytes;
-    netif->credit_usec = creditlimit->period_usec;
-
-    if (netif->status == CONNECTED) {
-        /*
-         * Schedule work so that any packets waiting under previous credit 
-         * limit are dealt with (acts like a replenishment point).
-         */
-        netif->credit_timeout.expires = jiffies;
-        netif_schedule_work(netif);
-    }
+       /* Set the credit limit (reset remaining credit to new limit). */
+       netif->credit_bytes     = creditlimit->credit_bytes;
+       netif->remaining_credit = creditlimit->credit_bytes;
+       netif->credit_usec      = creditlimit->period_usec;
+
+       if (netif->status == CONNECTED) {
+               /*
+                * Schedule work so that any packets waiting under previous
+                * credit limit are dealt with (acts as a replenishment point).
+                */
+               netif->credit_timeout.expires = jiffies;
+               netif_schedule_work(netif);
+       }
 #endif
 }
 
 int netif_disconnect(netif_t *netif)
 {
 
-    if (netif->status == CONNECTED) {
-        rtnl_lock();
-        netif->status = DISCONNECTING;
-        wmb();
-        if (netif_running(netif->dev))
-            __netif_down(netif);
-        rtnl_unlock();
-        netif_put(netif);
-        return 0; /* Caller should not send response message. */
-    }
-
-    return 1;
-}
+       if (netif->status == CONNECTED) {
+               rtnl_lock();
+               netif->status = DISCONNECTING;
+               wmb();
+               if (netif_running(netif->dev))
+                       __netif_down(netif);
+               rtnl_unlock();
+               netif_put(netif);
+               return 0; /* Caller should not send response message. */
+       }
+
+       return 1;
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Sep 22 
17:42:01 2005
@@ -23,7 +23,7 @@
 static int  make_rx_response(netif_t *netif, 
                              u16      id, 
                              s8       st,
-                             unsigned long addr,
+                             u16      offset,
                              u16      size,
                              u16      csum_valid);
 
@@ -41,11 +41,7 @@
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
-static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
-#else
-static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
-#endif
+static gnttab_transfer_t grant_rx_op[MAX_PENDING_REQS];
 static unsigned char rx_notify[NR_EVENT_CHANNELS];
 
 /* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -57,8 +53,8 @@
 #define PKT_PROT_LEN 64
 
 static struct {
-    netif_tx_request_t req;
-    netif_t *netif;
+       netif_tx_request_t req;
+       netif_t *netif;
 } pending_tx_info[MAX_PENDING_REQS];
 static u16 pending_ring[MAX_PENDING_REQS];
 typedef unsigned int PEND_RING_IDX;
@@ -72,14 +68,9 @@
 
 static struct sk_buff_head tx_queue;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
 static u16 grant_tx_ref[MAX_PENDING_REQS];
 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
-
-#else
-static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
-#endif
 
 static struct list_head net_schedule_list;
 static spinlock_t net_schedule_list_lock;
@@ -91,49 +82,49 @@
 
 static unsigned long alloc_mfn(void)
 {
-    unsigned long mfn = 0, flags;
-    struct xen_memory_reservation reservation = {
-        .extent_start = mfn_list,
-        .nr_extents   = MAX_MFN_ALLOC,
-        .extent_order = 0,
-        .domid        = DOMID_SELF
-    };
-    spin_lock_irqsave(&mfn_lock, flags);
-    if ( unlikely(alloc_index == 0) )
-        alloc_index = HYPERVISOR_memory_op(
-            XENMEM_increase_reservation, &reservation);
-    if ( alloc_index != 0 )
-        mfn = mfn_list[--alloc_index];
-    spin_unlock_irqrestore(&mfn_lock, flags);
-    return mfn;
-}
-
-#ifndef CONFIG_XEN_NETDEV_GRANT
+       unsigned long mfn = 0, flags;
+       struct xen_memory_reservation reservation = {
+               .extent_start = mfn_list,
+               .nr_extents   = MAX_MFN_ALLOC,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+       spin_lock_irqsave(&mfn_lock, flags);
+       if ( unlikely(alloc_index == 0) )
+               alloc_index = HYPERVISOR_memory_op(
+                       XENMEM_increase_reservation, &reservation);
+       if ( alloc_index != 0 )
+               mfn = mfn_list[--alloc_index];
+       spin_unlock_irqrestore(&mfn_lock, flags);
+       return mfn;
+}
+
+#if 0
 static void free_mfn(unsigned long mfn)
 {
-    unsigned long flags;
-    struct xen_memory_reservation reservation = {
-        .extent_start = &mfn,
-        .nr_extents   = 1,
-        .extent_order = 0,
-        .domid        = DOMID_SELF
-    };
-    spin_lock_irqsave(&mfn_lock, flags);
-    if ( alloc_index != MAX_MFN_ALLOC )
-        mfn_list[alloc_index++] = mfn;
-    else if ( HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation)
-              != 1 )
-        BUG();
-    spin_unlock_irqrestore(&mfn_lock, flags);
+       unsigned long flags;
+       struct xen_memory_reservation reservation = {
+               .extent_start = &mfn,
+               .nr_extents   = 1,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+       spin_lock_irqsave(&mfn_lock, flags);
+       if ( alloc_index != MAX_MFN_ALLOC )
+               mfn_list[alloc_index++] = mfn;
+       else
+               BUG_ON(HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                           &reservation) != 1);
+       spin_unlock_irqrestore(&mfn_lock, flags);
 }
 #endif
 
 static inline void maybe_schedule_tx_action(void)
 {
-    smp_mb();
-    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-         !list_empty(&net_schedule_list) )
-        tasklet_schedule(&net_tx_tasklet);
+       smp_mb();
+       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+           !list_empty(&net_schedule_list))
+               tasklet_schedule(&net_tx_tasklet);
 }
 
 /*
@@ -142,77 +133,66 @@
  */
 static inline int is_xen_skb(struct sk_buff *skb)
 {
-    extern kmem_cache_t *skbuff_cachep;
-    kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
-    return (cp == skbuff_cachep);
+       extern kmem_cache_t *skbuff_cachep;
+       kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
+       return (cp == skbuff_cachep);
 }
 
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-
-    ASSERT(skb->dev == dev);
-
-    /* Drop the packet if the target domain has no receive buffers. */
-    if ( !netif->active || 
-         (netif->rx_req_cons == netif->rx->req_prod) ||
-         ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) )
-        goto drop;
-
-    /*
-     * We do not copy the packet unless:
-     *  1. The data is shared; or
-     *  2. The data is not allocated from our special cache.
-     * NB. We also couldn't cope with fragmented packets, but we won't get
-     *     any because we not advertise the NETIF_F_SG feature.
-     */
-    if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
-    {
-        int hlen = skb->data - skb->head;
-        struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
-        if ( unlikely(nskb == NULL) )
-            goto drop;
-        skb_reserve(nskb, hlen);
-        __skb_put(nskb, skb->len);
-        if (skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen))
-            BUG();
-        nskb->dev = skb->dev;
-        nskb->proto_csum_valid = skb->proto_csum_valid;
-        dev_kfree_skb(skb);
-        skb = nskb;
-    }
-#ifdef CONFIG_XEN_NETDEV_GRANT
-#ifdef DEBUG_GRANT
-    printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x 
gr=%04x\n",
-           netif->rx->req_prod,
-           netif->rx_req_cons,
-           netif->rx->ring[
-                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
-           netif->rx->ring[
-                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
-#endif
-#endif
-    netif->rx_req_cons++;
-    netif_get(netif);
-
-    skb_queue_tail(&rx_queue, skb);
-    tasklet_schedule(&net_rx_tasklet);
-
-    return 0;
+       netif_t *netif = netdev_priv(dev);
+
+       ASSERT(skb->dev == dev);
+
+       /* Drop the packet if the target domain has no receive buffers. */
+       if (!netif->active || 
+           (netif->rx_req_cons == netif->rx->req_prod) ||
+           ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE))
+               goto drop;
+
+       /*
+        * We do not copy the packet unless:
+        *  1. The data is shared; or
+        *  2. The data is not allocated from our special cache.
+        * NB. We also couldn't cope with fragmented packets, but we won't get
+        *     any because we not advertise the NETIF_F_SG feature.
+        */
+       if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) {
+               int hlen = skb->data - skb->head;
+               struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
+               if ( unlikely(nskb == NULL) )
+                       goto drop;
+               skb_reserve(nskb, hlen);
+               __skb_put(nskb, skb->len);
+               BUG_ON(skb_copy_bits(skb, -hlen, nskb->data - hlen,
+                                    skb->len + hlen));
+               nskb->dev = skb->dev;
+               nskb->proto_csum_valid = skb->proto_csum_valid;
+               dev_kfree_skb(skb);
+               skb = nskb;
+       }
+
+       netif->rx_req_cons++;
+       netif_get(netif);
+
+       skb_queue_tail(&rx_queue, skb);
+       tasklet_schedule(&net_rx_tasklet);
+
+       return 0;
 
  drop:
-    netif->stats.tx_dropped++;
-    dev_kfree_skb(skb);
-    return 0;
+       netif->stats.tx_dropped++;
+       dev_kfree_skb(skb);
+       return 0;
 }
 
 #if 0
 static void xen_network_done_notify(void)
 {
-    static struct net_device *eth0_dev = NULL;
-    if ( unlikely(eth0_dev == NULL) )
-        eth0_dev = __dev_get_by_name("eth0");
-    netif_rx_schedule(eth0_dev);
+       static struct net_device *eth0_dev = NULL;
+       if (unlikely(eth0_dev == NULL))
+               eth0_dev = __dev_get_by_name("eth0");
+       netif_rx_schedule(eth0_dev);
 }
 /* 
  * Add following to poll() function in NAPI driver (Tigon3 is example):
@@ -221,776 +201,644 @@
  */
 int xen_network_done(void)
 {
-    return skb_queue_empty(&rx_queue);
+       return skb_queue_empty(&rx_queue);
 }
 #endif
 
 static void net_rx_action(unsigned long unused)
 {
-    netif_t *netif = NULL; 
-    s8 status;
-    u16 size, id, evtchn;
-    multicall_entry_t *mcl;
-    mmu_update_t *mmu;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_donate_t *gop;
-#else
-    struct mmuext_op *mmuext;
+       netif_t *netif = NULL; 
+       s8 status;
+       u16 size, id, evtchn;
+       multicall_entry_t *mcl;
+       mmu_update_t *mmu;
+       gnttab_transfer_t *gop;
+       unsigned long vdata, old_mfn, new_mfn;
+       struct sk_buff_head rxq;
+       struct sk_buff *skb;
+       u16 notify_list[NETIF_RX_RING_SIZE];
+       int notify_nr = 0;
+
+       skb_queue_head_init(&rxq);
+
+       mcl = rx_mcl;
+       mmu = rx_mmu;
+       gop = grant_rx_op;
+
+       while ((skb = skb_dequeue(&rx_queue)) != NULL) {
+               netif   = netdev_priv(skb->dev);
+               vdata   = (unsigned long)skb->data;
+               old_mfn = virt_to_mfn(vdata);
+
+               /* Memory squeeze? Back off for an arbitrary while. */
+               if ((new_mfn = alloc_mfn()) == 0) {
+                       if ( net_ratelimit() )
+                               WPRINTK("Memory squeeze in netback driver.\n");
+                       mod_timer(&net_timer, jiffies + HZ);
+                       skb_queue_head(&rx_queue, skb);
+                       break;
+               }
+               /*
+                * Set the new P2M table entry before reassigning the old data
+                * page. Heed the comment in pgtable-2level.h:pte_page(). :-)
+                */
+               phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] =
+                       new_mfn;
+
+               MULTI_update_va_mapping(mcl, vdata,
+                                       pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
+               mcl++;
+
+               gop->mfn = old_mfn;
+               gop->domid = netif->domid;
+               gop->ref = netif->rx->ring[
+                       MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+               netif->rx_resp_prod_copy++;
+               gop++;
+
+               mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+                       MMU_MACHPHYS_UPDATE;
+               mmu->val = __pa(vdata) >> PAGE_SHIFT;  
+               mmu++;
+
+               __skb_queue_tail(&rxq, skb);
+
+               /* Filled the batch queue? */
+               if ((mcl - rx_mcl) == ARRAY_SIZE(rx_mcl))
+                       break;
+       }
+
+       if (mcl == rx_mcl)
+               return;
+
+       mcl->op = __HYPERVISOR_mmu_update;
+       mcl->args[0] = (unsigned long)rx_mmu;
+       mcl->args[1] = mmu - rx_mmu;
+       mcl->args[2] = 0;
+       mcl->args[3] = DOMID_SELF;
+       mcl++;
+
+       mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+       BUG_ON(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0);
+
+       mcl = rx_mcl;
+       if( HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, 
+                                     gop - grant_rx_op)) { 
+               /*
+                * The other side has given us a bad grant ref, or has no 
+                * headroom, or has gone away. Unfortunately the current grant
+                * table code doesn't inform us which is the case, so not much
+                * we can do. 
+                */
+               DPRINTK("net_rx: transfer to DOM%u failed; dropping (up to) "
+                       "%d packets.\n",
+                       grant_rx_op[0].domid, gop - grant_rx_op); 
+       }
+       gop = grant_rx_op;
+
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               netif   = netdev_priv(skb->dev);
+               size    = skb->tail - skb->data;
+
+               /* Rederive the machine addresses. */
+               new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+               old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
+               atomic_set(&(skb_shinfo(skb)->dataref), 1);
+               skb_shinfo(skb)->nr_frags = 0;
+               skb_shinfo(skb)->frag_list = NULL;
+
+               netif->stats.tx_bytes += size;
+               netif->stats.tx_packets++;
+
+               /* The update_va_mapping() must not fail. */
+               BUG_ON(mcl[0].result != 0);
+
+               /* Check the reassignment error code. */
+               status = NETIF_RSP_OKAY;
+               if(gop->status != 0) { 
+                       DPRINTK("Bad status %d from grant transfer to DOM%u\n",
+                               gop->status, netif->domid);
+                       /* XXX SMH: should free 'old_mfn' here */
+                       status = NETIF_RSP_ERROR; 
+               } 
+               evtchn = netif->evtchn;
+               id = netif->rx->ring[
+                       MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
+               if (make_rx_response(netif, id, status,
+                                    (unsigned long)skb->data & ~PAGE_MASK,
+                                    size, skb->proto_csum_valid) &&
+                   (rx_notify[evtchn] == 0)) {
+                       rx_notify[evtchn] = 1;
+                       notify_list[notify_nr++] = evtchn;
+               }
+
+               netif_put(netif);
+               dev_kfree_skb(skb);
+               mcl++;
+               gop++;
+       }
+
+       while (notify_nr != 0) {
+               evtchn = notify_list[--notify_nr];
+               rx_notify[evtchn] = 0;
+               notify_via_evtchn(evtchn);
+       }
+
+       /* More work to do? */
+       if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+               tasklet_schedule(&net_rx_tasklet);
+#if 0
+       else
+               xen_network_done_notify();
 #endif
-    unsigned long vdata, old_mfn, new_mfn;
-    struct sk_buff_head rxq;
-    struct sk_buff *skb;
-    u16 notify_list[NETIF_RX_RING_SIZE];
-    int notify_nr = 0;
-
-    skb_queue_head_init(&rxq);
-
-    mcl = rx_mcl;
-    mmu = rx_mmu;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gop = grant_rx_op;
-#else
-    mmuext = rx_mmuext;
-#endif
-
-    while ( (skb = skb_dequeue(&rx_queue)) != NULL )
-    {
-        netif   = netdev_priv(skb->dev);
-        vdata   = (unsigned long)skb->data;
-        old_mfn = virt_to_mfn(vdata);
-
-        /* Memory squeeze? Back off for an arbitrary while. */
-        if ( (new_mfn = alloc_mfn()) == 0 )
-        {
-            if ( net_ratelimit() )
-                WPRINTK("Memory squeeze in netback driver.\n");
-            mod_timer(&net_timer, jiffies + HZ);
-            skb_queue_head(&rx_queue, skb);
-            break;
-        }
-        /*
-         * Set the new P2M table entry before reassigning the old data page.
-         * Heed the comment in pgtable-2level.h:pte_page(). :-)
-         */
-        phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
-
-        MULTI_update_va_mapping(mcl, vdata,
-                               pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
-        mcl++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        gop->mfn = old_mfn;
-        gop->domid = netif->domid;
-        gop->handle = netif->rx->ring[
-        MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
-        netif->rx_resp_prod_copy++;
-        gop++;
-#else
-        mcl->op = __HYPERVISOR_mmuext_op;
-        mcl->args[0] = (unsigned long)mmuext;
-        mcl->args[1] = 1;
-        mcl->args[2] = 0;
-        mcl->args[3] = netif->domid;
-        mcl++;
-
-        mmuext->cmd = MMUEXT_REASSIGN_PAGE;
-        mmuext->arg1.mfn = old_mfn;
-        mmuext++;
-#endif
-        mmu->ptr = ((unsigned long long)new_mfn << PAGE_SHIFT) | 
MMU_MACHPHYS_UPDATE;
-        mmu->val = __pa(vdata) >> PAGE_SHIFT;  
-        mmu++;
-
-        __skb_queue_tail(&rxq, skb);
-
-#ifdef DEBUG_GRANT
-        dump_packet('a', old_mfn, vdata);
-#endif
-        /* Filled the batch queue? */
-        if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
-            break;
-    }
-
-    if ( mcl == rx_mcl )
-        return;
-
-    mcl->op = __HYPERVISOR_mmu_update;
-    mcl->args[0] = (unsigned long)rx_mmu;
-    mcl->args[1] = mmu - rx_mmu;
-    mcl->args[2] = 0;
-    mcl->args[3] = DOMID_SELF;
-    mcl++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-#else
-    mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-#endif
-    if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
-        BUG();
-
-    mcl = rx_mcl;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    if(HYPERVISOR_grant_table_op(GNTTABOP_donate, grant_rx_op, 
-                                 gop - grant_rx_op)) { 
-        /* 
-        ** The other side has given us a bad grant ref, or has no headroom, 
-        ** or has gone away. Unfortunately the current grant table code 
-        ** doesn't inform us which is the case, so not much we can do. 
-        */
-        DPRINTK("net_rx: donate to DOM%u failed; dropping (up to) %d "
-                "packets.\n", grant_rx_op[0].domid, gop - grant_rx_op); 
-    }
-    gop = grant_rx_op;
-#else
-    mmuext = rx_mmuext;
-#endif
-    while ( (skb = __skb_dequeue(&rxq)) != NULL )
-    {
-        netif   = netdev_priv(skb->dev);
-        size    = skb->tail - skb->data;
-
-        /* Rederive the machine addresses. */
-        new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
-#else
-        old_mfn = mmuext[0].arg1.mfn;
-#endif
-        atomic_set(&(skb_shinfo(skb)->dataref), 1);
-        skb_shinfo(skb)->nr_frags = 0;
-        skb_shinfo(skb)->frag_list = NULL;
-
-        netif->stats.tx_bytes += size;
-        netif->stats.tx_packets++;
-
-        /* The update_va_mapping() must not fail. */
-        BUG_ON(mcl[0].result != 0);
-
-        /* Check the reassignment error code. */
-        status = NETIF_RSP_OKAY;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        if(gop->status != 0) { 
-            DPRINTK("Bad status %d from grant donate to DOM%u\n", 
-                    gop->status, netif->domid);
-            /* XXX SMH: should free 'old_mfn' here */
-            status = NETIF_RSP_ERROR; 
-        } 
-#else
-        if ( unlikely(mcl[1].result != 0) )
-        {
-            DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
-            free_mfn(old_mfn);
-            status = NETIF_RSP_ERROR;
-        }
-#endif
-        evtchn = netif->evtchn;
-        id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
-        if ( make_rx_response(netif, id, status,
-                              (old_mfn << PAGE_SHIFT) | /* XXX */
-                              ((unsigned long)skb->data & ~PAGE_MASK),
-                              size, skb->proto_csum_valid) &&
-             (rx_notify[evtchn] == 0) )
-        {
-            rx_notify[evtchn] = 1;
-            notify_list[notify_nr++] = evtchn;
-        }
-
-        netif_put(netif);
-        dev_kfree_skb(skb);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mcl++;
-        gop++;
-#else
-        mcl += 2;
-        mmuext += 1;
-#endif
-    }
-
-    while ( notify_nr != 0 )
-    {
-        evtchn = notify_list[--notify_nr];
-        rx_notify[evtchn] = 0;
-        notify_via_evtchn(evtchn);
-    }
-
-  out: 
-    /* More work to do? */
-    if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
-        tasklet_schedule(&net_rx_tasklet);
-#if 0
-    else
-        xen_network_done_notify();
-#endif
 }
 
 static void net_alarm(unsigned long unused)
 {
-    tasklet_schedule(&net_rx_tasklet);
+       tasklet_schedule(&net_rx_tasklet);
 }
 
 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 {
-    netif_t *netif = netdev_priv(dev);
-    return &netif->stats;
+       netif_t *netif = netdev_priv(dev);
+       return &netif->stats;
 }
 
 static int __on_net_schedule_list(netif_t *netif)
 {
-    return netif->list.next != NULL;
+       return netif->list.next != NULL;
 }
 
 static void remove_from_net_schedule_list(netif_t *netif)
 {
-    spin_lock_irq(&net_schedule_list_lock);
-    if ( likely(__on_net_schedule_list(netif)) )
-    {
-        list_del(&netif->list);
-        netif->list.next = NULL;
-        netif_put(netif);
-    }
-    spin_unlock_irq(&net_schedule_list_lock);
+       spin_lock_irq(&net_schedule_list_lock);
+       if (likely(__on_net_schedule_list(netif))) {
+               list_del(&netif->list);
+               netif->list.next = NULL;
+               netif_put(netif);
+       }
+       spin_unlock_irq(&net_schedule_list_lock);
 }
 
 static void add_to_net_schedule_list_tail(netif_t *netif)
 {
-    if ( __on_net_schedule_list(netif) )
-        return;
-
-    spin_lock_irq(&net_schedule_list_lock);
-    if ( !__on_net_schedule_list(netif) && netif->active )
-    {
-        list_add_tail(&netif->list, &net_schedule_list);
-        netif_get(netif);
-    }
-    spin_unlock_irq(&net_schedule_list_lock);
+       if (__on_net_schedule_list(netif))
+               return;
+
+       spin_lock_irq(&net_schedule_list_lock);
+       if (!__on_net_schedule_list(netif) && netif->active) {
+               list_add_tail(&netif->list, &net_schedule_list);
+               netif_get(netif);
+       }
+       spin_unlock_irq(&net_schedule_list_lock);
 }
 
 void netif_schedule_work(netif_t *netif)
 {
-    if ( (netif->tx_req_cons != netif->tx->req_prod) &&
-         ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
-    {
-        add_to_net_schedule_list_tail(netif);
-        maybe_schedule_tx_action();
-    }
+       if ((netif->tx_req_cons != netif->tx->req_prod) &&
+           ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE)) {
+               add_to_net_schedule_list_tail(netif);
+               maybe_schedule_tx_action();
+       }
 }
 
 void netif_deschedule_work(netif_t *netif)
 {
-    remove_from_net_schedule_list(netif);
+       remove_from_net_schedule_list(netif);
 }
 
 
 static void tx_credit_callback(unsigned long data)
 {
-    netif_t *netif = (netif_t *)data;
-    netif->remaining_credit = netif->credit_bytes;
-    netif_schedule_work(netif);
+       netif_t *netif = (netif_t *)data;
+       netif->remaining_credit = netif->credit_bytes;
+       netif_schedule_work(netif);
 }
 
 inline static void net_tx_action_dealloc(void)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_unmap_grant_ref_t *gop;
-#else
-    multicall_entry_t *mcl;
-#endif
-    u16 pending_idx;
-    PEND_RING_IDX dc, dp;
-    netif_t *netif;
-
-    dc = dealloc_cons;
-    dp = dealloc_prod;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    /*
-     * Free up any grants we have finished using
-     */
-    gop = tx_unmap_ops;
-    while ( dc != dp )
-    {
-        pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-        gop->host_addr    = MMAP_VADDR(pending_idx);
-        gop->dev_bus_addr = 0;
-        gop->handle       = grant_tx_ref[pending_idx];
-        grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
-        gop++;
-    }
-    BUG_ON(HYPERVISOR_grant_table_op(
-               GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
-#else
-    mcl = tx_mcl;
-    while ( dc != dp )
-    {
-        pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
-       MULTI_update_va_mapping(mcl, MMAP_VADDR(pending_idx),
-                               __pte(0), 0);
-        mcl++;     
-    }
-
-    mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-    if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
-        BUG();
-
-    mcl = tx_mcl;
-#endif
-    while ( dealloc_cons != dp )
-    {
-#ifndef CONFIG_XEN_NETDEV_GRANT
-        /* The update_va_mapping() must not fail. */
-        BUG_ON(mcl[0].result != 0);
-#endif
-
-        pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
-
-        netif = pending_tx_info[pending_idx].netif;
-
-        make_tx_response(netif, pending_tx_info[pending_idx].req.id, 
-                         NETIF_RSP_OKAY);
+       gnttab_unmap_grant_ref_t *gop;
+       u16 pending_idx;
+       PEND_RING_IDX dc, dp;
+       netif_t *netif;
+
+       dc = dealloc_cons;
+       dp = dealloc_prod;
+
+       /*
+        * Free up any grants we have finished using
+        */
+       gop = tx_unmap_ops;
+       while (dc != dp) {
+               pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+               gop->host_addr    = MMAP_VADDR(pending_idx);
+               gop->dev_bus_addr = 0;
+               gop->handle       = grant_tx_ref[pending_idx];
+               grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+               gop++;
+       }
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
+
+       while (dealloc_cons != dp) {
+               pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+               netif = pending_tx_info[pending_idx].netif;
+
+               make_tx_response(netif, pending_tx_info[pending_idx].req.id, 
+                                NETIF_RSP_OKAY);
         
-        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-
-        /*
-         * Scheduling checks must happen after the above response is posted.
-         * This avoids a possible race with a guest OS on another CPU if that
-         * guest is testing against 'resp_prod' when deciding whether to notify
-         * us when it queues additional packets.
-         */
-        mb();
-        if ( (netif->tx_req_cons != netif->tx->req_prod) &&
-             ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
-            add_to_net_schedule_list_tail(netif);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+               /*
+                * Scheduling checks must happen after the above response is
+                * posted. This avoids a possible race with a guest OS on
+                * another CPU if that guest is testing against 'resp_prod'
+                * when deciding whether to notify us when it queues additional
+                 * packets.
+                */
+               mb();
+               if ((netif->tx_req_cons != netif->tx->req_prod) &&
+                   ((netif->tx_req_cons-netif->tx_resp_prod) !=
+                    NETIF_TX_RING_SIZE))
+                       add_to_net_schedule_list_tail(netif);
         
-        netif_put(netif);
-
-#ifndef CONFIG_XEN_NETDEV_GRANT
-        mcl++;
-#endif
-    }
-
+               netif_put(netif);
+       }
 }
 
 /* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
-    struct list_head *ent;
-    struct sk_buff *skb;
-    netif_t *netif;
-    netif_tx_request_t txreq;
-    u16 pending_idx;
-    NETIF_RING_IDX i;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_map_grant_ref_t *mop;
-#else
-    multicall_entry_t *mcl;
-#endif
-    unsigned int data_len;
-
-    if ( dealloc_cons != dealloc_prod )
-        net_tx_action_dealloc();
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    mop = tx_map_ops;
-#else
-    mcl = tx_mcl;
-#endif
-    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-            !list_empty(&net_schedule_list) )
-    {
-        /* Get a netif from the list with work to do. */
-        ent = net_schedule_list.next;
-        netif = list_entry(ent, netif_t, list);
-        netif_get(netif);
-        remove_from_net_schedule_list(netif);
-
-        /* Work to do? */
-        i = netif->tx_req_cons;
-        if ( (i == netif->tx->req_prod) ||
-             ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
-        {
-            netif_put(netif);
-            continue;
-        }
-
-        rmb(); /* Ensure that we see the request before we copy it. */
-        memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, 
-               sizeof(txreq));
-        /* Credit-based scheduling. */
-        if ( txreq.size > netif->remaining_credit )
-        {
-            unsigned long now = jiffies;
-            unsigned long next_credit = 
-                netif->credit_timeout.expires +
-                msecs_to_jiffies(netif->credit_usec / 1000);
-
-            /* Timer could already be pending in some rare cases. */
-            if ( timer_pending(&netif->credit_timeout) )
-                break;
-
-            /* Already passed the point at which we can replenish credit? */
-            if ( time_after_eq(now, next_credit) )
-            {
-                netif->credit_timeout.expires = now;
-                netif->remaining_credit = netif->credit_bytes;
-            }
-
-            /* Still too big to send right now? Then set a timer callback. */
-            if ( txreq.size > netif->remaining_credit )
-            {
-                netif->remaining_credit = 0;
-                netif->credit_timeout.expires  = next_credit;
-                netif->credit_timeout.data     = (unsigned long)netif;
-                netif->credit_timeout.function = tx_credit_callback;
-                add_timer_on(&netif->credit_timeout, smp_processor_id());
-                break;
-            }
-        }
-        netif->remaining_credit -= txreq.size;
-
-        /*
-         * Why the barrier? It ensures that the frontend sees updated req_cons
-         * before we check for more work to schedule.
-         */
-        netif->tx->req_cons = ++netif->tx_req_cons;
-        mb();
-
-        netif_schedule_work(netif);
-
-        if ( unlikely(txreq.size < ETH_HLEN) || 
-             unlikely(txreq.size > ETH_FRAME_LEN) )
-        {
-            DPRINTK("Bad packet size: %d\n", txreq.size);
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            continue; 
-        }
-
-        /* No crossing a page boundary as the payload mustn't fragment. */
-        if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) ) 
-        {
-            DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n", 
-                    txreq.addr, txreq.size, 
-                    (txreq.addr &~PAGE_MASK) + txreq.size);
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            continue;
-        }
-
-        pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-
-        data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
-
-        if ( unlikely((skb = alloc_skb(data_len+16, GFP_ATOMIC)) == NULL) )
-        {
-            DPRINTK("Can't allocate a skb in start_xmit.\n");
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            break;
-        }
-
-        /* Packets passed to netif_rx() must have some headroom. */
-        skb_reserve(skb, 16);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mop->host_addr = MMAP_VADDR(pending_idx);
-        mop->dom       = netif->domid;
-        mop->ref       = txreq.addr >> PAGE_SHIFT;
-        mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
-        mop++;
-#else
-       MULTI_update_va_mapping_otherdomain(
-           mcl, MMAP_VADDR(pending_idx),
-           pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
-           0, netif->domid);
-
-        mcl++;
-#endif
-
-        memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
-        pending_tx_info[pending_idx].netif = netif;
-        *((u16 *)skb->data) = pending_idx;
-
-        __skb_queue_tail(&tx_queue, skb);
-
-        pending_cons++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
-            break;
-#else
-        /* Filled the batch queue? */
-        if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
-            break;
-#endif
-    }
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    if ( mop == tx_map_ops )
-        return;
-
-    BUG_ON(HYPERVISOR_grant_table_op(
-        GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
-
-    mop = tx_map_ops;
-#else
-    if ( mcl == tx_mcl )
-        return;
-
-    BUG_ON(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0);
-
-    mcl = tx_mcl;
-#endif
-    while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
-    {
-        pending_idx = *((u16 *)skb->data);
-        netif       = pending_tx_info[pending_idx].netif;
-        memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
-
-        /* Check the remap error code. */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        /* 
-           XXX SMH: error returns from grant operations are pretty poorly
-           specified/thought out, but the below at least conforms with 
-           what the rest of the code uses. 
-        */
-        if ( unlikely(mop->handle < 0) )
-        {
-            printk(KERN_ALERT "#### netback grant fails\n");
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            kfree_skb(skb);
-            mop++;
-            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-            continue;
-        }
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
-                             FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
-        grant_tx_ref[pending_idx] = mop->handle;
-#else
-        if ( unlikely(mcl[0].result != 0) )
-        {
-            DPRINTK("Bad page frame\n");
-            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
-            netif_put(netif);
-            kfree_skb(skb);
-            mcl++;
-            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-            continue;
-        }
-
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
-            FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
-#endif
-
-        data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
-
-        __skb_put(skb, data_len);
-        memcpy(skb->data, 
-               (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
-               data_len);
-        if ( data_len < txreq.size )
-        {
-            /* Append the packet payload as a fragment. */
-            skb_shinfo(skb)->frags[0].page        = 
-                virt_to_page(MMAP_VADDR(pending_idx));
-            skb_shinfo(skb)->frags[0].size        = txreq.size - data_len;
-            skb_shinfo(skb)->frags[0].page_offset = 
-                (txreq.addr + data_len) & ~PAGE_MASK;
-            skb_shinfo(skb)->nr_frags = 1;
-        }
-        else
-        {
-            /* Schedule a response immediately. */
-            netif_idx_release(pending_idx);
-        }
-
-        skb->data_len  = txreq.size - data_len;
-        skb->len      += skb->data_len;
-
-        skb->dev      = netif->dev;
-        skb->protocol = eth_type_trans(skb, skb->dev);
-
-        /* No checking needed on localhost, but remember the field is blank. */
-        skb->ip_summed        = CHECKSUM_UNNECESSARY;
-        skb->proto_csum_valid = 1;
-        skb->proto_csum_blank = txreq.csum_blank;
-
-        netif->stats.rx_bytes += txreq.size;
-        netif->stats.rx_packets++;
-
-        netif_rx(skb);
-        netif->dev->last_rx = jiffies;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mop++;
-#else
-        mcl++;
-#endif
-    }
+       struct list_head *ent;
+       struct sk_buff *skb;
+       netif_t *netif;
+       netif_tx_request_t txreq;
+       u16 pending_idx;
+       NETIF_RING_IDX i;
+       gnttab_map_grant_ref_t *mop;
+       unsigned int data_len;
+
+       if (dealloc_cons != dealloc_prod)
+               net_tx_action_dealloc();
+
+       mop = tx_map_ops;
+       while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
+               !list_empty(&net_schedule_list)) {
+               /* Get a netif from the list with work to do. */
+               ent = net_schedule_list.next;
+               netif = list_entry(ent, netif_t, list);
+               netif_get(netif);
+               remove_from_net_schedule_list(netif);
+
+               /* Work to do? */
+               i = netif->tx_req_cons;
+               if ((i == netif->tx->req_prod) ||
+                   ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE)) {
+                       netif_put(netif);
+                       continue;
+               }
+
+               rmb(); /* Ensure that we see the request before we copy it. */
+               memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, 
+                      sizeof(txreq));
+               /* Credit-based scheduling. */
+               if (txreq.size > netif->remaining_credit) {
+                       unsigned long now = jiffies;
+                       unsigned long next_credit = 
+                               netif->credit_timeout.expires +
+                               msecs_to_jiffies(netif->credit_usec / 1000);
+
+                       /* Timer could already be pending in rare cases. */
+                       if (timer_pending(&netif->credit_timeout))
+                               break;
+
+                       /* Passed the point where we can replenish credit? */
+                       if (time_after_eq(now, next_credit)) {
+                               netif->credit_timeout.expires = now;
+                               netif->remaining_credit = netif->credit_bytes;
+                       }
+
+                       /* Still too big to send right now? Set a callback. */
+                       if (txreq.size > netif->remaining_credit) {
+                               netif->remaining_credit = 0;
+                               netif->credit_timeout.expires  = 
+                                       next_credit;
+                               netif->credit_timeout.data     =
+                                       (unsigned long)netif;
+                               netif->credit_timeout.function =
+                                       tx_credit_callback;
+                               add_timer_on(&netif->credit_timeout,
+                                            smp_processor_id());
+                               break;
+                       }
+               }
+               netif->remaining_credit -= txreq.size;
+
+               /*
+                * Why the barrier? It ensures that the frontend sees updated
+                * req_cons before we check for more work to schedule.
+                */
+               netif->tx->req_cons = ++netif->tx_req_cons;
+               mb();
+
+               netif_schedule_work(netif);
+
+               if (unlikely(txreq.size < ETH_HLEN) || 
+                   unlikely(txreq.size > ETH_FRAME_LEN)) {
+                       DPRINTK("Bad packet size: %d\n", txreq.size);
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       continue; 
+               }
+
+               /* No crossing a page as the payload mustn't fragment. */
+               if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+                       DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n", 
+                               txreq.addr, txreq.size, 
+                               (txreq.addr &~PAGE_MASK) + txreq.size);
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       continue;
+               }
+
+               pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+               data_len = (txreq.size > PKT_PROT_LEN) ?
+                       PKT_PROT_LEN : txreq.size;
+
+               skb = alloc_skb(data_len+16, GFP_ATOMIC);
+               if (unlikely(skb == NULL)) {
+                       DPRINTK("Can't allocate a skb in start_xmit.\n");
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       break;
+               }
+
+               /* Packets passed to netif_rx() must have some headroom. */
+               skb_reserve(skb, 16);
+
+               mop->host_addr = MMAP_VADDR(pending_idx);
+               mop->dom       = netif->domid;
+               mop->ref       = txreq.gref;
+               mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
+               mop++;
+
+               memcpy(&pending_tx_info[pending_idx].req,
+                      &txreq, sizeof(txreq));
+               pending_tx_info[pending_idx].netif = netif;
+               *((u16 *)skb->data) = pending_idx;
+
+               __skb_queue_tail(&tx_queue, skb);
+
+               pending_cons++;
+
+               if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+                       break;
+       }
+
+       if (mop == tx_map_ops)
+               return;
+
+       BUG_ON(HYPERVISOR_grant_table_op(
+               GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
+
+       mop = tx_map_ops;
+       while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+               pending_idx = *((u16 *)skb->data);
+               netif       = pending_tx_info[pending_idx].netif;
+               memcpy(&txreq, &pending_tx_info[pending_idx].req,
+                      sizeof(txreq));
+
+               /* Check the remap error code. */
+               if (unlikely(mop->handle < 0)) {
+                       printk(KERN_ALERT "#### netback grant fails\n");
+                       make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+                       netif_put(netif);
+                       kfree_skb(skb);
+                       mop++;
+                       pending_ring[MASK_PEND_IDX(pending_prod++)] =
+                               pending_idx;
+                       continue;
+               }
+               phys_to_machine_mapping[
+                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
+               grant_tx_ref[pending_idx] = mop->handle;
+
+               data_len = (txreq.size > PKT_PROT_LEN) ?
+                       PKT_PROT_LEN : txreq.size;
+
+               __skb_put(skb, data_len);
+               memcpy(skb->data, 
+                      (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
+                      data_len);
+               if (data_len < txreq.size) {
+                       /* Append the packet payload as a fragment. */
+                       skb_shinfo(skb)->frags[0].page        = 
+                               virt_to_page(MMAP_VADDR(pending_idx));
+                       skb_shinfo(skb)->frags[0].size        =
+                               txreq.size - data_len;
+                       skb_shinfo(skb)->frags[0].page_offset = 
+                               txreq.offset + data_len;
+                       skb_shinfo(skb)->nr_frags = 1;
+               } else {
+                       /* Schedule a response immediately. */
+                       netif_idx_release(pending_idx);
+               }
+
+               skb->data_len  = txreq.size - data_len;
+               skb->len      += skb->data_len;
+
+               skb->dev      = netif->dev;
+               skb->protocol = eth_type_trans(skb, skb->dev);
+
+               /*
+                 * No checking needed on localhost, but remember the field is
+                 * blank. 
+                 */
+               skb->ip_summed        = CHECKSUM_UNNECESSARY;
+               skb->proto_csum_valid = 1;
+               skb->proto_csum_blank = txreq.csum_blank;
+
+               netif->stats.rx_bytes += txreq.size;
+               netif->stats.rx_packets++;
+
+               netif_rx(skb);
+               netif->dev->last_rx = jiffies;
+
+               mop++;
+       }
 }
 
 static void netif_idx_release(u16 pending_idx)
 {
-    static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
-    unsigned long flags;
-
-    spin_lock_irqsave(&_lock, flags);
-    dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
-    spin_unlock_irqrestore(&_lock, flags);
-
-    tasklet_schedule(&net_tx_tasklet);
+       static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
+       unsigned long flags;
+
+       spin_lock_irqsave(&_lock, flags);
+       dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+       spin_unlock_irqrestore(&_lock, flags);
+
+       tasklet_schedule(&net_tx_tasklet);
 }
 
 static void netif_page_release(struct page *page)
 {
-    u16 pending_idx = page - virt_to_page(mmap_vstart);
-
-    /* Ready for next use. */
-    set_page_count(page, 1);
-
-    netif_idx_release(pending_idx);
+       u16 pending_idx = page - virt_to_page(mmap_vstart);
+
+       /* Ready for next use. */
+       set_page_count(page, 1);
+
+       netif_idx_release(pending_idx);
 }
 
 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
-    netif_t *netif = dev_id;
-    if ( tx_work_exists(netif) )
-    {
-        add_to_net_schedule_list_tail(netif);
-        maybe_schedule_tx_action();
-    }
-    return IRQ_HANDLED;
+       netif_t *netif = dev_id;
+       if (tx_work_exists(netif)) {
+               add_to_net_schedule_list_tail(netif);
+               maybe_schedule_tx_action();
+       }
+       return IRQ_HANDLED;
 }
 
 static void make_tx_response(netif_t *netif, 
                              u16      id,
                              s8       st)
 {
-    NETIF_RING_IDX i = netif->tx_resp_prod;
-    netif_tx_response_t *resp;
-
-    resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
-    resp->id     = id;
-    resp->status = st;
-    wmb();
-    netif->tx->resp_prod = netif->tx_resp_prod = ++i;
-
-    mb(); /* Update producer before checking event threshold. */
-    if ( i == netif->tx->event )
-        notify_via_evtchn(netif->evtchn);
+       NETIF_RING_IDX i = netif->tx_resp_prod;
+       netif_tx_response_t *resp;
+
+       resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
+       resp->id     = id;
+       resp->status = st;
+       wmb();
+       netif->tx->resp_prod = netif->tx_resp_prod = ++i;
+
+       mb(); /* Update producer before checking event threshold. */
+       if (i == netif->tx->event)
+               notify_via_evtchn(netif->evtchn);
 }
 
 static int make_rx_response(netif_t *netif, 
                             u16      id, 
                             s8       st,
-                            unsigned long addr,
+                            u16      offset,
                             u16      size,
                             u16      csum_valid)
 {
-    NETIF_RING_IDX i = netif->rx_resp_prod;
-    netif_rx_response_t *resp;
-
-    resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-    resp->addr       = addr;
-    resp->csum_valid = csum_valid;
-    resp->id         = id;
-    resp->status     = (s16)size;
-    if ( st < 0 )
-        resp->status = (s16)st;
-    wmb();
-    netif->rx->resp_prod = netif->rx_resp_prod = ++i;
-
-    mb(); /* Update producer before checking event threshold. */
-    return (i == netif->rx->event);
+       NETIF_RING_IDX i = netif->rx_resp_prod;
+       netif_rx_response_t *resp;
+
+       resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
+       resp->offset     = offset;
+       resp->csum_valid = csum_valid;
+       resp->id         = id;
+       resp->status     = (s16)size;
+       if (st < 0)
+               resp->status = (s16)st;
+       wmb();
+       netif->rx->resp_prod = netif->rx_resp_prod = ++i;
+
+       mb(); /* Update producer before checking event threshold. */
+       return (i == netif->rx->event);
 }
 
 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
 {
-    struct list_head *ent;
-    netif_t *netif;
-    int i = 0;
-
-    printk(KERN_ALERT "netif_schedule_list:\n");
-    spin_lock_irq(&net_schedule_list_lock);
-
-    list_for_each ( ent, &net_schedule_list )
-    {
-        netif = list_entry(ent, netif_t, list);
-        printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n",
-               i, netif->rx_req_cons, netif->rx_resp_prod);               
-        printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
-               netif->tx_req_cons, netif->tx_resp_prod);
-        printk(KERN_ALERT "   shared(rx_req_prod=%08x rx_resp_prod=%08x\n",
-               netif->rx->req_prod, netif->rx->resp_prod);
-        printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
-               netif->rx->event, netif->tx->req_prod);
-        printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
-               netif->tx->resp_prod, netif->tx->event);
-        i++;
-    }
-
-    spin_unlock_irq(&net_schedule_list_lock);
-    printk(KERN_ALERT " ** End of netif_schedule_list **\n");
-
-    return IRQ_HANDLED;
+       struct list_head *ent;
+       netif_t *netif;
+       int i = 0;
+
+       printk(KERN_ALERT "netif_schedule_list:\n");
+       spin_lock_irq(&net_schedule_list_lock);
+
+       list_for_each (ent, &net_schedule_list) {
+               netif = list_entry(ent, netif_t, list);
+               printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+                      "rx_resp_prod=%08x\n",
+                      i, netif->rx_req_cons, netif->rx_resp_prod);
+               printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
+                      netif->tx_req_cons, netif->tx_resp_prod);
+               printk(KERN_ALERT "   shared(rx_req_prod=%08x "
+                      "rx_resp_prod=%08x\n",
+                      netif->rx->req_prod, netif->rx->resp_prod);
+               printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
+                      netif->rx->event, netif->tx->req_prod);
+               printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
+                      netif->tx->resp_prod, netif->tx->event);
+               i++;
+       }
+
+       spin_unlock_irq(&net_schedule_list_lock);
+       printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+       return IRQ_HANDLED;
 }
 
 static int __init netback_init(void)
 {
-    int i;
-    struct page *page;
-
-    if ( !(xen_start_info->flags & SIF_NET_BE_DOMAIN) &&
-         !(xen_start_info->flags & SIF_INITDOMAIN) )
-        return 0;
-
-    IPRINTK("Initialising Xen netif backend.\n");
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    IPRINTK("Using grant tables.\n");
-#endif
-
-    /* We can increase reservation by this much in net_rx_action(). */
-    balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
-
-    skb_queue_head_init(&rx_queue);
-    skb_queue_head_init(&tx_queue);
-
-    init_timer(&net_timer);
-    net_timer.data = 0;
-    net_timer.function = net_alarm;
+       int i;
+       struct page *page;
+
+       if (!(xen_start_info->flags & SIF_NET_BE_DOMAIN) &&
+           !(xen_start_info->flags & SIF_INITDOMAIN))
+               return 0;
+
+       IPRINTK("Initialising Xen netif backend.\n");
+
+       /* We can increase reservation by this much in net_rx_action(). */
+       balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
+
+       skb_queue_head_init(&rx_queue);
+       skb_queue_head_init(&tx_queue);
+
+       init_timer(&net_timer);
+       net_timer.data = 0;
+       net_timer.function = net_alarm;
     
-    page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-    {
-        page = virt_to_page(MMAP_VADDR(i));
-        set_page_count(page, 1);
-        SetPageForeign(page, netif_page_release);
-    }
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
-
-    spin_lock_init(&net_schedule_list_lock);
-    INIT_LIST_HEAD(&net_schedule_list);
-
-    netif_xenbus_init();
-
-    (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
-                      netif_be_dbg, SA_SHIRQ, 
-                      "net-be-dbg", &netif_be_dbg);
-
-    return 0;
+       page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
+       BUG_ON(page == NULL);
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       for (i = 0; i < MAX_PENDING_REQS; i++) {
+               page = virt_to_page(MMAP_VADDR(i));
+               set_page_count(page, 1);
+               SetPageForeign(page, netif_page_release);
+       }
+
+       pending_cons = 0;
+       pending_prod = MAX_PENDING_REQS;
+       for (i = 0; i < MAX_PENDING_REQS; i++)
+               pending_ring[i] = i;
+
+       spin_lock_init(&net_schedule_list_lock);
+       INIT_LIST_HEAD(&net_schedule_list);
+
+       netif_xenbus_init();
+
+       (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
+                         netif_be_dbg, SA_SHIRQ, 
+                         "net-be-dbg", &netif_be_dbg);
+
+       return 0;
 }
 
 static void netback_cleanup(void)
 {
-    BUG();
+       BUG();
 }
 
 module_init(netback_init);
 module_exit(netback_cleanup);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Sep 22 17:42:01 2005
@@ -242,6 +242,7 @@
        be->dev = dev;
        be->backend_watch.node = dev->nodename;
        be->backend_watch.callback = backend_changed;
+       /* Registration implicitly calls backend_changed. */
        err = register_xenbus_watch(&be->backend_watch);
        if (err) {
                be->backend_watch.node = NULL;
@@ -263,8 +264,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return 0;
 
  free_be:
@@ -294,3 +293,13 @@
 {
        xenbus_register_backend(&netback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Sep 22 
17:42:01 2005
@@ -54,43 +54,10 @@
 #include <asm-xen/balloon.h>
 #include <asm/page.h>
 #include <asm/uaccess.h>
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
 
-static grant_ref_t gref_tx_head;
-static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; 
-
-static grant_ref_t gref_rx_head;
-static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
-
 #define GRANT_INVALID_REF      (0xFFFF)
-
-#ifdef GRANT_DEBUG
-static void
-dump_packet(int tag, void *addr, u32 ap)
-{
-    unsigned char *p = (unsigned char *)ap;
-    int i;
-    
-    printk(KERN_ALERT "#### rx_poll   %c %08x ", tag & 0xff, addr);
-    for (i = 0; i < 20; i++) {
-        printk("%02x", p[i]);
-    }
-    printk("\n");
-}
-
-#define GDPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else 
-#define dump_packet(x,y,z)  ((void)0)  
-#define GDPRINTK(_f, _a...) ((void)0)
-#endif
-
-#endif
-
-
 
 #ifndef __GFP_NOWARN
 #define __GFP_NOWARN 0
@@ -124,7 +91,6 @@
 #define NETIF_STATE_DISCONNECTED 0
 #define NETIF_STATE_CONNECTED    1
 
-
 static unsigned int netif_state = NETIF_STATE_DISCONNECTED;
 
 static void network_tx_buf_gc(struct net_device *dev);
@@ -147,45 +113,50 @@
 #define netfront_info net_private
 struct net_private
 {
-    struct list_head list;
-    struct net_device *netdev;
-
-    struct net_device_stats stats;
-    NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
-    unsigned int tx_full;
+       struct list_head list;
+       struct net_device *netdev;
+
+       struct net_device_stats stats;
+       NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
+       unsigned int tx_full;
     
-    netif_tx_interface_t *tx;
-    netif_rx_interface_t *rx;
-
-    spinlock_t   tx_lock;
-    spinlock_t   rx_lock;
-
-    unsigned int handle;
-    unsigned int evtchn;
-
-    /* What is the status of our connection to the remote backend? */
+       netif_tx_interface_t *tx;
+       netif_rx_interface_t *rx;
+
+       spinlock_t   tx_lock;
+       spinlock_t   rx_lock;
+
+       unsigned int handle;
+       unsigned int evtchn;
+
+       /* What is the status of our connection to the remote backend? */
 #define BEST_CLOSED       0
 #define BEST_DISCONNECTED 1
 #define BEST_CONNECTED    2
-    unsigned int backend_state;
-
-    /* Is this interface open or closed (down or up)? */
+       unsigned int backend_state;
+
+       /* Is this interface open or closed (down or up)? */
 #define UST_CLOSED        0
 #define UST_OPEN          1
-    unsigned int user_state;
-
-    /* Receive-ring batched refills. */
+       unsigned int user_state;
+
+       /* Receive-ring batched refills. */
 #define RX_MIN_TARGET 8
 #define RX_MAX_TARGET NETIF_RX_RING_SIZE
-    int rx_min_target, rx_max_target, rx_target;
-    struct sk_buff_head rx_batch;
-
-    /*
-     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
-     * array is an index into a chain of free entries.
-     */
-    struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
-    struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+       int rx_min_target, rx_max_target, rx_target;
+       struct sk_buff_head rx_batch;
+
+       /*
+        * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+        * array is an index into a chain of free entries.
+        */
+       struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
+       struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+
+       grant_ref_t gref_tx_head;
+       grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; 
+       grant_ref_t gref_rx_head;
+       grant_ref_t grant_rx_ref[NETIF_TX_RING_SIZE + 1]; 
 
        struct xenbus_device *xbdev;
        char *backend;
@@ -197,32 +168,32 @@
 };
 
 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
-#define ADD_ID_TO_FREELIST(_list, _id)             \
-    (_list)[(_id)] = (_list)[0];                   \
-    (_list)[0]     = (void *)(unsigned long)(_id);
-#define GET_ID_FROM_FREELIST(_list)                \
- ({ unsigned long _id = (unsigned long)(_list)[0]; \
-    (_list)[0]  = (_list)[_id];                    \
-    (unsigned short)_id; })
+#define ADD_ID_TO_FREELIST(_list, _id)                 \
+       (_list)[(_id)] = (_list)[0];                    \
+       (_list)[0]     = (void *)(unsigned long)(_id);
+#define GET_ID_FROM_FREELIST(_list)                            \
+       ({ unsigned long _id = (unsigned long)(_list)[0];       \
+          (_list)[0]  = (_list)[_id];                          \
+          (unsigned short)_id; })
 
 #ifdef DEBUG
 static char *be_state_name[] = {
-    [BEST_CLOSED]       = "closed",
-    [BEST_DISCONNECTED] = "disconnected",
-    [BEST_CONNECTED]    = "connected",
+       [BEST_CLOSED]       = "closed",
+       [BEST_DISCONNECTED] = "disconnected",
+       [BEST_CONNECTED]    = "connected",
 };
 #endif
 
 #ifdef DEBUG
 #define DPRINTK(fmt, args...) \
-    printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
+       printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, 
##args)
 #else
 #define DPRINTK(fmt, args...) ((void)0)
 #endif
 #define IPRINTK(fmt, args...) \
-    printk(KERN_INFO "xen_net: " fmt, ##args)
+       printk(KERN_INFO "xen_net: " fmt, ##args)
 #define WPRINTK(fmt, args...) \
-    printk(KERN_WARNING "xen_net: " fmt, ##args)
+       printk(KERN_WARNING "xen_net: " fmt, ##args)
 
 /** Send a packet on a net device to encourage switches to learn the
  * MAC. We send a fake ARP request.
@@ -232,628 +203,582 @@
  */
 static int send_fake_arp(struct net_device *dev)
 {
-    struct sk_buff *skb;
-    u32             src_ip, dst_ip;
-
-    dst_ip = INADDR_BROADCAST;
-    src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
-
-    /* No IP? Then nothing to do. */
-    if (src_ip == 0)
-        return 0;
-
-    skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
-                     dst_ip, dev, src_ip,
-                     /*dst_hw*/ NULL, /*src_hw*/ NULL, 
-                     /*target_hw*/ dev->dev_addr);
-    if (skb == NULL)
-        return -ENOMEM;
-
-    return dev_queue_xmit(skb);
+       struct sk_buff *skb;
+       u32             src_ip, dst_ip;
+
+       dst_ip = INADDR_BROADCAST;
+       src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
+
+       /* No IP? Then nothing to do. */
+       if (src_ip == 0)
+               return 0;
+
+       skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
+                        dst_ip, dev, src_ip,
+                        /*dst_hw*/ NULL, /*src_hw*/ NULL, 
+                        /*target_hw*/ dev->dev_addr);
+       if (skb == NULL)
+               return -ENOMEM;
+
+       return dev_queue_xmit(skb);
 }
 
 static int network_open(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-
-    memset(&np->stats, 0, sizeof(np->stats));
-
-    np->user_state = UST_OPEN;
-
-    network_alloc_rx_buffers(dev);
-    np->rx->event = np->rx_resp_cons + 1;
-
-    netif_start_queue(dev);
-
-    return 0;
+       struct net_private *np = netdev_priv(dev);
+
+       memset(&np->stats, 0, sizeof(np->stats));
+
+       np->user_state = UST_OPEN;
+
+       network_alloc_rx_buffers(dev);
+       np->rx->event = np->rx_resp_cons + 1;
+
+       netif_start_queue(dev);
+
+       return 0;
 }
 
 static void network_tx_buf_gc(struct net_device *dev)
 {
-    NETIF_RING_IDX i, prod;
-    unsigned short id;
-    struct net_private *np = netdev_priv(dev);
-    struct sk_buff *skb;
-
-    if (np->backend_state != BEST_CONNECTED)
-        return;
-
-    do {
-        prod = np->tx->resp_prod;
-        rmb(); /* Ensure we see responses up to 'rp'. */
-
-        for (i = np->tx_resp_cons; i != prod; i++) {
-            id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
-            skb = np->tx_skbs[id];
-#ifdef CONFIG_XEN_NETDEV_GRANT
-            if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) {
-                /* other domain is still using this grant - shouldn't happen
-                   but if it does, we'll try to reclaim the grant later */
-                printk(KERN_ALERT "network_tx_buf_gc: warning -- grant "
-                       "still in use by backend domain.\n");
-                goto out; 
-            }
-            gnttab_end_foreign_access_ref(grant_tx_ref[id], GNTMAP_readonly);
-            gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
-            grant_tx_ref[id] = GRANT_INVALID_REF;
-#endif
-            ADD_ID_TO_FREELIST(np->tx_skbs, id);
-            dev_kfree_skb_irq(skb);
-        }
+       NETIF_RING_IDX i, prod;
+       unsigned short id;
+       struct net_private *np = netdev_priv(dev);
+       struct sk_buff *skb;
+
+       if (np->backend_state != BEST_CONNECTED)
+               return;
+
+       do {
+               prod = np->tx->resp_prod;
+               rmb(); /* Ensure we see responses up to 'rp'. */
+
+               for (i = np->tx_resp_cons; i != prod; i++) {
+                       id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
+                       skb = np->tx_skbs[id];
+                       if (unlikely(gnttab_query_foreign_access(
+                               np->grant_tx_ref[id]) != 0)) {
+                               printk(KERN_ALERT "network_tx_buf_gc: warning "
+                                      "-- grant still in use by backend "
+                                      "domain.\n");
+                               goto out; 
+                       }
+                       gnttab_end_foreign_access_ref(
+                               np->grant_tx_ref[id], GNTMAP_readonly);
+                       gnttab_release_grant_reference(
+                               &np->gref_tx_head, np->grant_tx_ref[id]);
+                       np->grant_tx_ref[id] = GRANT_INVALID_REF;
+                       ADD_ID_TO_FREELIST(np->tx_skbs, id);
+                       dev_kfree_skb_irq(skb);
+               }
         
-        np->tx_resp_cons = prod;
+               np->tx_resp_cons = prod;
         
-        /*
-         * Set a new event, then check for race with update of tx_cons. Note
-         * that it is essential to schedule a callback, no matter how few
-         * buffers are pending. Even if there is space in the transmit ring,
-         * higher layers may be blocked because too much data is outstanding:
-         * in such cases notification from Xen is likely to be the only kick
-         * that we'll get.
-         */
-        np->tx->event = 
-            prod + ((np->tx->req_prod - prod) >> 1) + 1;
-        mb();
-    } while (prod != np->tx->resp_prod);
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-  out: 
-#endif
-
-    if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
-        np->tx_full = 0;
-        if (np->user_state == UST_OPEN)
-            netif_wake_queue(dev);
-    }
+               /*
+                * Set a new event, then check for race with update of tx_cons.
+                * Note that it is essential to schedule a callback, no matter
+                * how few buffers are pending. Even if there is space in the
+                * transmit ring, higher layers may be blocked because too much
+                * data is outstanding: in such cases notification from Xen is
+                * likely to be the only kick that we'll get.
+                */
+               np->tx->event = prod + ((np->tx->req_prod - prod) >> 1) + 1;
+               mb();
+       } while (prod != np->tx->resp_prod);
+
+ out: 
+       if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
+               np->tx_full = 0;
+               if (np->user_state == UST_OPEN)
+                       netif_wake_queue(dev);
+       }
 }
 
 
 static void network_alloc_rx_buffers(struct net_device *dev)
 {
-    unsigned short id;
-    struct net_private *np = netdev_priv(dev);
-    struct sk_buff *skb;
-    int i, batch_target;
-    NETIF_RING_IDX req_prod = np->rx->req_prod;
-    struct xen_memory_reservation reservation;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    grant_ref_t ref;
-#endif
-
-    if (unlikely(np->backend_state != BEST_CONNECTED))
-        return;
-
-    /*
-     * Allocate skbuffs greedily, even though we batch updates to the
-     * receive ring. This creates a less bursty demand on the memory allocator,
-     * so should reduce the chance of failed allocation requests both for
-     * ourself and for other kernel subsystems.
-     */
-    batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
-    for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
-        if (unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL))
-            break;
-        __skb_queue_tail(&np->rx_batch, skb);
-    }
-
-    /* Is the batch large enough to be worthwhile? */
-    if (i < (np->rx_target/2))
-        return;
-
-    for (i = 0; ; i++) {
-        if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
-            break;
-
-        skb->dev = dev;
-
-        id = GET_ID_FROM_FREELIST(np->rx_skbs);
-
-        np->rx_skbs[id] = skb;
+       unsigned short id;
+       struct net_private *np = netdev_priv(dev);
+       struct sk_buff *skb;
+       int i, batch_target;
+       NETIF_RING_IDX req_prod = np->rx->req_prod;
+       struct xen_memory_reservation reservation;
+       grant_ref_t ref;
+
+       if (unlikely(np->backend_state != BEST_CONNECTED))
+               return;
+
+       /*
+        * Allocate skbuffs greedily, even though we batch updates to the
+        * receive ring. This creates a less bursty demand on the memory
+        * allocator, so should reduce the chance of failed allocation requests
+        *  both for ourself and for other kernel subsystems.
+        */
+       batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
+       for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
+               skb = alloc_xen_skb(dev->mtu + RX_HEADROOM);
+               if (skb == NULL)
+                       break;
+               __skb_queue_tail(&np->rx_batch, skb);
+       }
+
+       /* Is the batch large enough to be worthwhile? */
+       if (i < (np->rx_target/2))
+               return;
+
+       for (i = 0; ; i++) {
+               if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
+                       break;
+
+               skb->dev = dev;
+
+               id = GET_ID_FROM_FREELIST(np->rx_skbs);
+
+               np->rx_skbs[id] = skb;
         
-        np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-       ref = gnttab_claim_grant_reference(&gref_rx_head);
-        if (unlikely((signed short)ref < 0)) {
-            printk(KERN_ALERT "#### netfront can't claim rx reference\n");
-            BUG();
-        }
-        grant_rx_ref[id] = ref;
-        gnttab_grant_foreign_transfer_ref(ref, np->backend_id,
-                                          virt_to_mfn(skb->head));
-        np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
-#endif
-        rx_pfn_array[i] = virt_to_mfn(skb->head);
-
-       /* Remove this page from pseudo phys map before passing back to Xen. */
-       phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] 
-           = INVALID_P2M_ENTRY;
-
-       MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
-                               __pte(0), 0);
-    }
-
-    /* After all PTEs have been zapped we blow away stale TLB entries. */
-    rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-
-    /* Give away a batch of pages. */
-    rx_mcl[i].op = __HYPERVISOR_memory_op;
-    rx_mcl[i].args[0] = XENMEM_decrease_reservation;
-    rx_mcl[i].args[1] = (unsigned long)&reservation;
-
-    reservation.extent_start = rx_pfn_array;
-    reservation.nr_extents   = i;
-    reservation.extent_order = 0;
-    reservation.address_bits = 0;
-    reservation.domid        = DOMID_SELF;
-
-    /* Tell the ballon driver what is going on. */
-    balloon_update_driver_allowance(i);
-
-    /* Zap PTEs and give away pages in one big multicall. */
-    (void)HYPERVISOR_multicall(rx_mcl, i+1);
-
-    /* Check return status of HYPERVISOR_memory_op(). */
-    if (unlikely(rx_mcl[i].result != i))
-        panic("Unable to reduce memory reservation\n");
-
-    /* Above is a suitable barrier to ensure backend will see requests. */
-    np->rx->req_prod = req_prod + i;
-
-    /* Adjust our floating fill target if we risked running out of buffers. */
-    if (((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
-         ((np->rx_target *= 2) > np->rx_max_target))
-        np->rx_target = np->rx_max_target;
+               np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_rx_head);
+               BUG_ON((signed short)ref < 0);
+               np->grant_rx_ref[id] = ref;
+               gnttab_grant_foreign_transfer_ref(ref, np->backend_id);
+               np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+               rx_pfn_array[i] = virt_to_mfn(skb->head);
+
+               /* Remove this page from map before passing back to Xen. */
+               phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] 
+                       = INVALID_P2M_ENTRY;
+
+               MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
+                                       __pte(0), 0);
+       }
+
+       /* After all PTEs have been zapped we blow away stale TLB entries. */
+       rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+
+       /* Give away a batch of pages. */
+       rx_mcl[i].op = __HYPERVISOR_memory_op;
+       rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+       rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+       reservation.extent_start = rx_pfn_array;
+       reservation.nr_extents   = i;
+       reservation.extent_order = 0;
+       reservation.address_bits = 0;
+       reservation.domid        = DOMID_SELF;
+
+       /* Tell the ballon driver what is going on. */
+       balloon_update_driver_allowance(i);
+
+       /* Zap PTEs and give away pages in one big multicall. */
+       (void)HYPERVISOR_multicall(rx_mcl, i+1);
+
+       /* Check return status of HYPERVISOR_memory_op(). */
+       if (unlikely(rx_mcl[i].result != i))
+               panic("Unable to reduce memory reservation\n");
+
+       /* Above is a suitable barrier to ensure backend will see requests. */
+       np->rx->req_prod = req_prod + i;
+
+       /* Adjust our fill target if we risked running out of buffers. */
+       if (((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
+           ((np->rx_target *= 2) > np->rx_max_target))
+               np->rx_target = np->rx_max_target;
 }
 
 
 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-    unsigned short id;
-    struct net_private *np = netdev_priv(dev);
-    netif_tx_request_t *tx;
-    NETIF_RING_IDX i;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    grant_ref_t ref;
-    unsigned long mfn;
-#endif
-
-    if (unlikely(np->tx_full)) {
-        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
-        netif_stop_queue(dev);
-        goto drop;
-    }
-
-    if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
-                  PAGE_SIZE)) {
-        struct sk_buff *nskb;
-        if (unlikely((nskb = alloc_xen_skb(skb->len)) == NULL))
-            goto drop;
-        skb_put(nskb, skb->len);
-        memcpy(nskb->data, skb->data, skb->len);
-        nskb->dev = skb->dev;
-        dev_kfree_skb(skb);
-        skb = nskb;
-    }
+       unsigned short id;
+       struct net_private *np = netdev_priv(dev);
+       netif_tx_request_t *tx;
+       NETIF_RING_IDX i;
+       grant_ref_t ref;
+       unsigned long mfn;
+
+       if (unlikely(np->tx_full)) {
+               printk(KERN_ALERT "%s: full queue wasn't stopped!\n",
+                      dev->name);
+               netif_stop_queue(dev);
+               goto drop;
+       }
+
+       if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
+                    PAGE_SIZE)) {
+               struct sk_buff *nskb;
+               if (unlikely((nskb = alloc_xen_skb(skb->len)) == NULL))
+                       goto drop;
+               skb_put(nskb, skb->len);
+               memcpy(nskb->data, skb->data, skb->len);
+               nskb->dev = skb->dev;
+               dev_kfree_skb(skb);
+               skb = nskb;
+       }
     
-    spin_lock_irq(&np->tx_lock);
-
-    if (np->backend_state != BEST_CONNECTED) {
-        spin_unlock_irq(&np->tx_lock);
-        goto drop;
-    }
-
-    i = np->tx->req_prod;
-
-    id = GET_ID_FROM_FREELIST(np->tx_skbs);
-    np->tx_skbs[id] = skb;
-
-    tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
-
-    tx->id   = id;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    ref = gnttab_claim_grant_reference(&gref_tx_head);
-    if (unlikely((signed short)ref < 0)) {
-        printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
-        BUG();
-    }
-    mfn = virt_to_mfn(skb->data);
-    gnttab_grant_foreign_access_ref(ref, np->backend_id, mfn, GNTMAP_readonly);
-    tx->addr = ref << PAGE_SHIFT;
-    grant_tx_ref[id] = ref;
-#else
-    tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
-#endif
-    tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
-    tx->size = skb->len;
-    tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
-
-    wmb(); /* Ensure that backend will see the request. */
-    np->tx->req_prod = i + 1;
-
-    network_tx_buf_gc(dev);
-
-    if ((i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1)) {
-        np->tx_full = 1;
-        netif_stop_queue(dev);
-    }
-
-    spin_unlock_irq(&np->tx_lock);
-
-    np->stats.tx_bytes += skb->len;
-    np->stats.tx_packets++;
-
-    /* Only notify Xen if we really have to. */
-    mb();
-    if (np->tx->TX_TEST_IDX == i)
-        notify_via_evtchn(np->evtchn);
-
-    return 0;
+       spin_lock_irq(&np->tx_lock);
+
+       if (np->backend_state != BEST_CONNECTED) {
+               spin_unlock_irq(&np->tx_lock);
+               goto drop;
+       }
+
+       i = np->tx->req_prod;
+
+       id = GET_ID_FROM_FREELIST(np->tx_skbs);
+       np->tx_skbs[id] = skb;
+
+       tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
+
+       tx->id   = id;
+       ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+       BUG_ON((signed short)ref < 0);
+       mfn = virt_to_mfn(skb->data);
+       gnttab_grant_foreign_access_ref(
+               ref, np->backend_id, mfn, GNTMAP_readonly);
+       tx->gref = np->grant_tx_ref[id] = ref;
+       tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
+       tx->size = skb->len;
+       tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
+
+       wmb(); /* Ensure that backend will see the request. */
+       np->tx->req_prod = i + 1;
+
+       network_tx_buf_gc(dev);
+
+       if ((i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1)) {
+               np->tx_full = 1;
+               netif_stop_queue(dev);
+       }
+
+       spin_unlock_irq(&np->tx_lock);
+
+       np->stats.tx_bytes += skb->len;
+       np->stats.tx_packets++;
+
+       /* Only notify Xen if we really have to. */
+       mb();
+       if (np->tx->TX_TEST_IDX == i)
+               notify_via_evtchn(np->evtchn);
+
+       return 0;
 
  drop:
-    np->stats.tx_dropped++;
-    dev_kfree_skb(skb);
-    return 0;
+       np->stats.tx_dropped++;
+       dev_kfree_skb(skb);
+       return 0;
 }
 
 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 {
-    struct net_device *dev = dev_id;
-    struct net_private *np = netdev_priv(dev);
-    unsigned long flags;
-
-    spin_lock_irqsave(&np->tx_lock, flags);
-    network_tx_buf_gc(dev);
-    spin_unlock_irqrestore(&np->tx_lock, flags);
-
-    if((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN))
-        netif_rx_schedule(dev);
-
-    return IRQ_HANDLED;
+       struct net_device *dev = dev_id;
+       struct net_private *np = netdev_priv(dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&np->tx_lock, flags);
+       network_tx_buf_gc(dev);
+       spin_unlock_irqrestore(&np->tx_lock, flags);
+
+       if ((np->rx_resp_cons != np->rx->resp_prod) &&
+           (np->user_state == UST_OPEN))
+               netif_rx_schedule(dev);
+
+       return IRQ_HANDLED;
 }
 
 
 static int netif_poll(struct net_device *dev, int *pbudget)
 {
-    struct net_private *np = netdev_priv(dev);
-    struct sk_buff *skb, *nskb;
-    netif_rx_response_t *rx;
-    NETIF_RING_IDX i, rp;
-    mmu_update_t *mmu = rx_mmu;
-    multicall_entry_t *mcl = rx_mcl;
-    int work_done, budget, more_to_do = 1;
-    struct sk_buff_head rxq;
-    unsigned long flags;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    unsigned long mfn;
-    grant_ref_t ref;
-#endif
-
-    spin_lock(&np->rx_lock);
-
-    if (np->backend_state != BEST_CONNECTED) {
-        spin_unlock(&np->rx_lock);
-        return 0;
-    }
-
-    skb_queue_head_init(&rxq);
-
-    if ((budget = *pbudget) > dev->quota)
-        budget = dev->quota;
-    rp = np->rx->resp_prod;
-    rmb(); /* Ensure we see queued responses up to 'rp'. */
-
-    for (i = np->rx_resp_cons, work_done = 0; 
-                   (i != rp) && (work_done < budget);
-                   i++, work_done++) {
-        rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-        /*
-         * An error here is very odd. Usually indicates a backend bug,
-         * low-memory condition, or that we didn't have reservation headroom.
-         */
-        if (unlikely(rx->status <= 0)) {
-            if (net_ratelimit())
-                printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
-            np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
-            wmb();
-            np->rx->req_prod++;
-            work_done--;
-            continue;
-        }
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        ref = grant_rx_ref[rx->id]; 
-
-        if(ref == GRANT_INVALID_REF) { 
-            printk(KERN_WARNING "Bad rx grant reference %d from dom %d.\n",
-                   ref, np->backend_id);
-            np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
-            wmb();
-            np->rx->req_prod++;
-            work_done--;
-            continue;
-        }
-
-        grant_rx_ref[rx->id] = GRANT_INVALID_REF;
-        mfn = gnttab_end_foreign_transfer_ref(ref);
-        gnttab_release_grant_reference(&gref_rx_head, ref);
-#endif
-
-        skb = np->rx_skbs[rx->id];
-        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
-
-        /* NB. We handle skb overflow later. */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        skb->data = skb->head + rx->addr;
-#else
-        skb->data = skb->head + (rx->addr & ~PAGE_MASK);
-#endif
-        skb->len  = rx->status;
-        skb->tail = skb->data + skb->len;
-
-        if ( rx->csum_valid )
-            skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-        np->stats.rx_packets++;
-        np->stats.rx_bytes += rx->status;
-
-        /* Remap the page. */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        mmu->ptr = ((unsigned long long)mfn << PAGE_SHIFT) | 
MMU_MACHPHYS_UPDATE;
-#else
-        mmu->ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
-#endif
-        mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
-        mmu++;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-       MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
-                               pfn_pte_ma(mfn, PAGE_KERNEL), 0);
-#else
-       MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
-                               pfn_pte_ma(rx->addr >> PAGE_SHIFT, 
-                                           PAGE_KERNEL), 0);
-#endif
-        mcl++;
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
-        GDPRINTK("#### rx_poll     enqueue vdata=%p mfn=%lu ref=%x\n",
-                skb->data, mfn, ref);
-#else
-        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
-            rx->addr >> PAGE_SHIFT;
-#endif 
-
-
-        __skb_queue_tail(&rxq, skb);
-    }
-
-
-    /* Some pages are no longer absent... */
-    balloon_update_driver_allowance(-work_done);
-
-    /* Do all the remapping work, and M->P updates, in one big hypercall. */
-    if (likely((mcl - rx_mcl) != 0)) {
-        mcl->op = __HYPERVISOR_mmu_update;
-        mcl->args[0] = (unsigned long)rx_mmu;
-        mcl->args[1] = mmu - rx_mmu;
-        mcl->args[2] = 0;
-        mcl->args[3] = DOMID_SELF;
-        mcl++;
-        (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
-    }
-
-    while ((skb = __skb_dequeue(&rxq)) != NULL) {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        GDPRINTK("#### rx_poll     dequeue vdata=%p mfn=%lu\n",
-                skb->data, virt_to_mfn(skb->data));
-        dump_packet('d', skb->data, (unsigned long)skb->data);
-#endif
-        /*
-         * Enough room in skbuff for the data we were passed? Also, Linux 
-         * expects at least 16 bytes headroom in each receive buffer.
-         */
-        if (unlikely(skb->tail > skb->end) || 
-                       unlikely((skb->data - skb->head) < 16)) {
-            nskb = NULL;
-
-
-            /* Only copy the packet if it fits in the current MTU. */
-            if (skb->len <= (dev->mtu + ETH_HLEN)) {
-                if ((skb->tail > skb->end) && net_ratelimit())
-                    printk(KERN_INFO "Received packet needs %zd bytes more "
-                           "headroom.\n", skb->tail - skb->end);
-
-                if ((nskb = alloc_xen_skb(skb->len + 2)) != NULL) {
-                    skb_reserve(nskb, 2);
-                    skb_put(nskb, skb->len);
-                    memcpy(nskb->data, skb->data, skb->len);
-                    nskb->dev = skb->dev;
-                }
-            }
-            else if (net_ratelimit())
-                printk(KERN_INFO "Received packet too big for MTU "
-                       "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
-
-            /* Reinitialise and then destroy the old skbuff. */
-            skb->len  = 0;
-            skb->tail = skb->data;
-            init_skb_shinfo(skb);
-            dev_kfree_skb(skb);
-
-            /* Switch old for new, if we copied the buffer. */
-            if ((skb = nskb) == NULL)
-                continue;
-        }
+       struct net_private *np = netdev_priv(dev);
+       struct sk_buff *skb, *nskb;
+       netif_rx_response_t *rx;
+       NETIF_RING_IDX i, rp;
+       mmu_update_t *mmu = rx_mmu;
+       multicall_entry_t *mcl = rx_mcl;
+       int work_done, budget, more_to_do = 1;
+       struct sk_buff_head rxq;
+       unsigned long flags;
+       unsigned long mfn;
+       grant_ref_t ref;
+
+       spin_lock(&np->rx_lock);
+
+       if (np->backend_state != BEST_CONNECTED) {
+               spin_unlock(&np->rx_lock);
+               return 0;
+       }
+
+       skb_queue_head_init(&rxq);
+
+       if ((budget = *pbudget) > dev->quota)
+               budget = dev->quota;
+       rp = np->rx->resp_prod;
+       rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+       for (i = np->rx_resp_cons, work_done = 0; 
+            (i != rp) && (work_done < budget);
+            i++, work_done++) {
+               rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
+               /*
+                * An error here is very odd. Usually indicates a backend bug,
+                * low-mem condition, or we didn't have reservation headroom.
+                */
+               if (unlikely(rx->status <= 0)) {
+                       if (net_ratelimit())
+                               printk(KERN_WARNING "Bad rx buffer "
+                                      "(memory squeeze?).\n");
+                       np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].
+                               req.id = rx->id;
+                       wmb();
+                       np->rx->req_prod++;
+                       work_done--;
+                       continue;
+               }
+
+               ref = np->grant_rx_ref[rx->id]; 
+
+               if(ref == GRANT_INVALID_REF) { 
+                       printk(KERN_WARNING "Bad rx grant reference %d "
+                              "from dom %d.\n",
+                              ref, np->backend_id);
+                       np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].
+                               req.id = rx->id;
+                       wmb();
+                       np->rx->req_prod++;
+                       work_done--;
+                       continue;
+               }
+
+               np->grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+               mfn = gnttab_end_foreign_transfer_ref(ref);
+               gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+               skb = np->rx_skbs[rx->id];
+               ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
+
+               /* NB. We handle skb overflow later. */
+               skb->data = skb->head + rx->offset;
+               skb->len  = rx->status;
+               skb->tail = skb->data + skb->len;
+
+               if ( rx->csum_valid )
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+               np->stats.rx_packets++;
+               np->stats.rx_bytes += rx->status;
+
+               /* Remap the page. */
+               mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+               mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
+               mmu++;
+               MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
+                                       pfn_pte_ma(mfn, PAGE_KERNEL), 0);
+               mcl++;
+
+               phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
+
+               __skb_queue_tail(&rxq, skb);
+       }
+
+       /* Some pages are no longer absent... */
+       balloon_update_driver_allowance(-work_done);
+
+       /* Do all the remapping work, and M2P updates, in one big hypercall. */
+       if (likely((mcl - rx_mcl) != 0)) {
+               mcl->op = __HYPERVISOR_mmu_update;
+               mcl->args[0] = (unsigned long)rx_mmu;
+               mcl->args[1] = mmu - rx_mmu;
+               mcl->args[2] = 0;
+               mcl->args[3] = DOMID_SELF;
+               mcl++;
+               (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+       }
+
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               /*
+                * Enough room in skbuff for the data we were passed? Also,
+                * Linux expects at least 16 bytes headroom in each rx buffer.
+                */
+               if (unlikely(skb->tail > skb->end) || 
+                   unlikely((skb->data - skb->head) < 16)) {
+                       nskb = NULL;
+
+                       /* Only copy the packet if it fits in the MTU. */
+                       if (skb->len <= (dev->mtu + ETH_HLEN)) {
+                               if ((skb->tail > skb->end) && net_ratelimit())
+                                       printk(KERN_INFO "Received packet "
+                                              "needs %zd bytes more "
+                                              "headroom.\n",
+                                              skb->tail - skb->end);
+
+                               nskb = alloc_xen_skb(skb->len + 2);
+                               if (nskb != NULL) {
+                                       skb_reserve(nskb, 2);
+                                       skb_put(nskb, skb->len);
+                                       memcpy(nskb->data,
+                                              skb->data,
+                                              skb->len);
+                                       nskb->dev = skb->dev;
+                               }
+                       }
+                       else if (net_ratelimit())
+                               printk(KERN_INFO "Received packet too big for "
+                                      "MTU (%d > %d)\n",
+                                      skb->len - ETH_HLEN, dev->mtu);
+
+                       /* Reinitialise and then destroy the old skbuff. */
+                       skb->len  = 0;
+                       skb->tail = skb->data;
+                       init_skb_shinfo(skb);
+                       dev_kfree_skb(skb);
+
+                       /* Switch old for new, if we copied the buffer. */
+                       if ((skb = nskb) == NULL)
+                               continue;
+               }
         
-        /* Set the shared-info area, which is hidden behind the real data. */
-        init_skb_shinfo(skb);
-        /* Ethernet-specific work. Delayed to here as it peeks the header. */
-        skb->protocol = eth_type_trans(skb, dev);
-
-        /* Pass it up. */
-        netif_receive_skb(skb);
-        dev->last_rx = jiffies;
-    }
-
-    np->rx_resp_cons = i;
-
-    /* If we get a callback with very few responses, reduce fill target. */
-    /* NB. Note exponential increase, linear decrease. */
-    if (((np->rx->req_prod - np->rx->resp_prod) > ((3*np->rx_target) / 4)) &&
-         (--np->rx_target < np->rx_min_target))
-        np->rx_target = np->rx_min_target;
-
-    network_alloc_rx_buffers(dev);
-
-    *pbudget   -= work_done;
-    dev->quota -= work_done;
-
-    if (work_done < budget) {
-        local_irq_save(flags);
-
-        np->rx->event = i + 1;
+               /* Set the shinfo area, which is hidden behind the data. */
+               init_skb_shinfo(skb);
+               /* Ethernet work: Delayed to here as it peeks the header. */
+               skb->protocol = eth_type_trans(skb, dev);
+
+               /* Pass it up. */
+               netif_receive_skb(skb);
+               dev->last_rx = jiffies;
+       }
+
+       np->rx_resp_cons = i;
+
+       /* If we get a callback with very few responses, reduce fill target. */
+       /* NB. Note exponential increase, linear decrease. */
+       if (((np->rx->req_prod - np->rx->resp_prod) >
+            ((3*np->rx_target) / 4)) &&
+           (--np->rx_target < np->rx_min_target))
+               np->rx_target = np->rx_min_target;
+
+       network_alloc_rx_buffers(dev);
+
+       *pbudget   -= work_done;
+       dev->quota -= work_done;
+
+       if (work_done < budget) {
+               local_irq_save(flags);
+
+               np->rx->event = i + 1;
     
-        /* Deal with hypervisor racing our resetting of rx_event. */
-        mb();
-        if (np->rx->resp_prod == i) {
-            __netif_rx_complete(dev);
-            more_to_do = 0;
-        }
-
-        local_irq_restore(flags);
-    }
-
-    spin_unlock(&np->rx_lock);
-
-    return more_to_do;
+               /* Deal with hypervisor racing our resetting of rx_event. */
+               mb();
+               if (np->rx->resp_prod == i) {
+                       __netif_rx_complete(dev);
+                       more_to_do = 0;
+               }
+
+               local_irq_restore(flags);
+       }
+
+       spin_unlock(&np->rx_lock);
+
+       return more_to_do;
 }
 
 
 static int network_close(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-    np->user_state = UST_CLOSED;
-    netif_stop_queue(np->netdev);
-    return 0;
+       struct net_private *np = netdev_priv(dev);
+       np->user_state = UST_CLOSED;
+       netif_stop_queue(np->netdev);
+       return 0;
 }
 
 
 static struct net_device_stats *network_get_stats(struct net_device *dev)
 {
-    struct net_private *np = netdev_priv(dev);
-    return &np->stats;
+       struct net_private *np = netdev_priv(dev);
+       return &np->stats;
 }
 
 static void network_connect(struct net_device *dev)
 {
-    struct net_private *np;
-    int i, requeue_idx;
-    netif_tx_request_t *tx;
-
-    np = netdev_priv(dev);
-    spin_lock_irq(&np->tx_lock);
-    spin_lock(&np->rx_lock);
-
-    /* Recovery procedure: */
-
-    /* Step 1: Reinitialise variables. */
-    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
-    np->rx->event = np->tx->event = 1;
-
-    /* Step 2: Rebuild the RX and TX ring contents.
-     * NB. We could just free the queued TX packets now but we hope
-     * that sending them out might do some good.  We have to rebuild
-     * the RX ring because some of our pages are currently flipped out
-     * so we can't just free the RX skbs.
-     * NB2. Freelist index entries are always going to be less than
-     *  __PAGE_OFFSET, whereas pointers to skbs will always be equal or
-     * greater than __PAGE_OFFSET: we use this property to distinguish
-     * them.
-     */
-
-    /* Rebuild the TX buffer freelist and the TX ring itself.
-     * NB. This reorders packets.  We could keep more private state
-     * to avoid this but maybe it doesn't matter so much given the
-     * interface has been down.
-     */
-    for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
-        if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
-            struct sk_buff *skb = np->tx_skbs[i];
-
-            tx = &np->tx->ring[requeue_idx++].req;
-
-            tx->id   = i;
-#ifdef CONFIG_XEN_NETDEV_GRANT
-            gnttab_grant_foreign_access_ref(grant_tx_ref[i], np->backend_id, 
-                                            virt_to_mfn(np->tx_skbs[i]->data),
-                                            GNTMAP_readonly); 
-            tx->addr = grant_tx_ref[i] << PAGE_SHIFT; 
-#else
-            tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
-#endif
-            tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
-            tx->size = skb->len;
-
-            np->stats.tx_bytes += skb->len;
-            np->stats.tx_packets++;
-        }
-    }
-    wmb();
-    np->tx->req_prod = requeue_idx;
-
-    /* Rebuild the RX buffer freelist and the RX ring itself. */
-    for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++) { 
-        if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET) {
-#ifdef CONFIG_XEN_NETDEV_GRANT 
-            /* Reinstate the grant ref so backend can 'donate' mfn to us. */
-            gnttab_grant_foreign_transfer_ref(grant_rx_ref[i], np->backend_id,
-                                              virt_to_mfn(np->rx_skbs[i]->head)
-                );
-            np->rx->ring[requeue_idx].req.gref = grant_rx_ref[i];
-#endif
-            np->rx->ring[requeue_idx].req.id   = i;
-            requeue_idx++; 
-        }
-    }
-
-    wmb();                
-    np->rx->req_prod = requeue_idx;
-
-    /* Step 3: All public and private state should now be sane.  Get
-     * ready to start sending and receiving packets and give the driver
-     * domain a kick because we've probably just requeued some
-     * packets.
-     */
-    np->backend_state = BEST_CONNECTED;
-    wmb();
-    notify_via_evtchn(np->evtchn);  
-    network_tx_buf_gc(dev);
-
-    if (np->user_state == UST_OPEN)
-        netif_start_queue(dev);
-
-    spin_unlock(&np->rx_lock);
-    spin_unlock_irq(&np->tx_lock);
+       struct net_private *np;
+       int i, requeue_idx;
+       netif_tx_request_t *tx;
+
+       np = netdev_priv(dev);
+       spin_lock_irq(&np->tx_lock);
+       spin_lock(&np->rx_lock);
+
+       /* Recovery procedure: */
+
+       /* Step 1: Reinitialise variables. */
+       np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+       np->rx->event = np->tx->event = 1;
+
+       /* Step 2: Rebuild the RX and TX ring contents.
+        * NB. We could just free the queued TX packets now but we hope
+        * that sending them out might do some good.  We have to rebuild
+        * the RX ring because some of our pages are currently flipped out
+        * so we can't just free the RX skbs.
+        * NB2. Freelist index entries are always going to be less than
+        *  __PAGE_OFFSET, whereas pointers to skbs will always be equal or
+        * greater than __PAGE_OFFSET: we use this property to distinguish
+        * them.
+        */
+
+       /* Rebuild the TX buffer freelist and the TX ring itself.
+        * NB. This reorders packets.  We could keep more private state
+        * to avoid this but maybe it doesn't matter so much given the
+        * interface has been down.
+        */
+       for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
+               if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
+                       struct sk_buff *skb = np->tx_skbs[i];
+
+                       tx = &np->tx->ring[requeue_idx++].req;
+
+                       tx->id   = i;
+                       gnttab_grant_foreign_access_ref(
+                               np->grant_tx_ref[i], np->backend_id, 
+                               virt_to_mfn(np->tx_skbs[i]->data),
+                               GNTMAP_readonly); 
+                       tx->gref = np->grant_tx_ref[i];
+                       tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
+                       tx->size = skb->len;
+
+                       np->stats.tx_bytes += skb->len;
+                       np->stats.tx_packets++;
+               }
+       }
+       wmb();
+       np->tx->req_prod = requeue_idx;
+
+       /* Rebuild the RX buffer freelist and the RX ring itself. */
+       for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++) { 
+               if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET) {
+                       gnttab_grant_foreign_transfer_ref(
+                               np->grant_rx_ref[i], np->backend_id);
+                       np->rx->ring[requeue_idx].req.gref =
+                               np->grant_rx_ref[i];
+                       np->rx->ring[requeue_idx].req.id = i;
+                       requeue_idx++; 
+               }
+       }
+
+       wmb();                
+       np->rx->req_prod = requeue_idx;
+
+       /* Step 3: All public and private state should now be sane.  Get
+        * ready to start sending and receiving packets and give the driver
+        * domain a kick because we've probably just requeued some
+        * packets.
+        */
+       np->backend_state = BEST_CONNECTED;
+       wmb();
+       notify_via_evtchn(np->evtchn);  
+       network_tx_buf_gc(dev);
+
+       if (np->user_state == UST_OPEN)
+               netif_start_queue(dev);
+
+       spin_unlock(&np->rx_lock);
+       spin_unlock_irq(&np->tx_lock);
 }
 
 static void show_device(struct net_private *np)
@@ -890,6 +815,13 @@
        show_device(np);
 }
 
+static void netif_uninit(struct net_device *dev)
+{
+       struct net_private *np = netdev_priv(dev);
+       gnttab_free_grant_references(np->gref_tx_head);
+       gnttab_free_grant_references(np->gref_rx_head);
+}
+
 static struct ethtool_ops network_ethtool_ops =
 {
        .get_tx_csum = ethtool_op_get_tx_csum,
@@ -904,84 +836,99 @@
 static int create_netdev(int handle, struct xenbus_device *dev,
                         struct net_device **val)
 {
-    int i, err = 0;
-    struct net_device *netdev = NULL;
-    struct net_private *np = NULL;
-
-    if ((netdev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
-        printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
-        err = -ENOMEM;
-        goto exit;
-    }
-
-    np                = netdev_priv(netdev);
-    np->backend_state = BEST_CLOSED;
-    np->user_state    = UST_CLOSED;
-    np->handle        = handle;
-    np->xbdev         = dev;
+       int i, err = 0;
+       struct net_device *netdev = NULL;
+       struct net_private *np = NULL;
+
+       if ((netdev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
+               printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
+                      __FUNCTION__);
+               err = -ENOMEM;
+               goto exit;
+       }
+
+       np                = netdev_priv(netdev);
+       np->backend_state = BEST_CLOSED;
+       np->user_state    = UST_CLOSED;
+       np->handle        = handle;
+       np->xbdev         = dev;
     
-    spin_lock_init(&np->tx_lock);
-    spin_lock_init(&np->rx_lock);
-
-    skb_queue_head_init(&np->rx_batch);
-    np->rx_target     = RX_MIN_TARGET;
-    np->rx_min_target = RX_MIN_TARGET;
-    np->rx_max_target = RX_MAX_TARGET;
-
-    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
-    for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
-        np->tx_skbs[i] = (void *)((unsigned long) i+1);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        grant_tx_ref[i] = GRANT_INVALID_REF;
-#endif
-    }
-
-    for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
-        np->rx_skbs[i] = (void *)((unsigned long) i+1);
-#ifdef CONFIG_XEN_NETDEV_GRANT
-        grant_rx_ref[i] = GRANT_INVALID_REF;
-#endif
-    }
-
-    netdev->open            = network_open;
-    netdev->hard_start_xmit = network_start_xmit;
-    netdev->stop            = network_close;
-    netdev->get_stats       = network_get_stats;
-    netdev->poll            = netif_poll;
-    netdev->weight          = 64;
-    netdev->features        = NETIF_F_IP_CSUM;
-
-    SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
-
-    if ((err = register_netdev(netdev)) != 0) {
-        printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
-        goto exit;
-    }
-
-    if ((err = xennet_proc_addif(netdev)) != 0) {
-        unregister_netdev(netdev);
-        goto exit;
-    }
-
-    np->netdev = netdev;
-
-  exit:
-    if ((err != 0) && (netdev != NULL))
-        kfree(netdev);
-    else if (val != NULL)
-        *val = netdev;
-    return err;
+       spin_lock_init(&np->tx_lock);
+       spin_lock_init(&np->rx_lock);
+
+       skb_queue_head_init(&np->rx_batch);
+       np->rx_target     = RX_MIN_TARGET;
+       np->rx_min_target = RX_MIN_TARGET;
+       np->rx_max_target = RX_MAX_TARGET;
+
+       /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
+       for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
+               np->tx_skbs[i] = (void *)((unsigned long) i+1);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
+               np->rx_skbs[i] = (void *)((unsigned long) i+1);
+               np->grant_rx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       /* A grant for every tx ring slot */
+       if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+                                         &np->gref_tx_head) < 0) {
+               printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+               goto exit;
+       }
+       /* A grant for every rx ring slot */
+       if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+                                         &np->gref_rx_head) < 0) {
+               printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+               gnttab_free_grant_references(np->gref_tx_head);
+               goto exit;
+       }
+
+       netdev->open            = network_open;
+       netdev->hard_start_xmit = network_start_xmit;
+       netdev->stop            = network_close;
+       netdev->get_stats       = network_get_stats;
+       netdev->poll            = netif_poll;
+       netdev->uninit          = netif_uninit;
+       netdev->weight          = 64;
+       netdev->features        = NETIF_F_IP_CSUM;
+
+       SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
+
+       if ((err = register_netdev(netdev)) != 0) {
+               printk(KERN_WARNING "%s> register_netdev err=%d\n",
+                      __FUNCTION__, err);
+               goto exit_free_grefs;
+       }
+
+       if ((err = xennet_proc_addif(netdev)) != 0) {
+               unregister_netdev(netdev);
+               goto exit_free_grefs;
+       }
+
+       np->netdev = netdev;
+
+ exit:
+       if ((err != 0) && (netdev != NULL))
+               kfree(netdev);
+       else if (val != NULL)
+               *val = netdev;
+       return err;
+
+ exit_free_grefs:
+       gnttab_free_grant_references(np->gref_tx_head);
+       gnttab_free_grant_references(np->gref_rx_head);
+       goto exit;
 }
 
 static int destroy_netdev(struct net_device *netdev)
 {
-
 #ifdef CONFIG_PROC_FS
        xennet_proc_delif(netdev);
 #endif
-
         unregister_netdev(netdev);
-
        return 0;
 }
 
@@ -992,20 +939,20 @@
 static int 
 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
 {
-    struct in_ifaddr  *ifa = (struct in_ifaddr *)ptr; 
-    struct net_device *dev = ifa->ifa_dev->dev;
-
-    /* UP event and is it one of our devices? */
-    if (event == NETDEV_UP && dev->open == network_open)
-        (void)send_fake_arp(dev);
+       struct in_ifaddr  *ifa = (struct in_ifaddr *)ptr; 
+       struct net_device *dev = ifa->ifa_dev->dev;
+
+       /* UP event and is it one of our devices? */
+       if (event == NETDEV_UP && dev->open == network_open)
+               (void)send_fake_arp(dev);
         
-    return NOTIFY_DONE;
+       return NOTIFY_DONE;
 }
 
 static struct notifier_block notifier_inetdev = {
-    .notifier_call  = inetdev_notify,
-    .next           = NULL,
-    .priority       = 0
+       .notifier_call  = inetdev_notify,
+       .next           = NULL,
+       .priority       = 0
 };
 
 static struct xenbus_device_id netfront_ids[] = {
@@ -1022,10 +969,8 @@
        evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
        int err;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        info->tx_ring_ref = GRANT_INVALID_REF;
        info->rx_ring_ref = GRANT_INVALID_REF;
-#endif
 
        info->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
        if (info->tx == 0) {
@@ -1043,7 +988,6 @@
        memset(info->rx, 0, PAGE_SIZE);
        info->backend_state = BEST_DISCONNECTED;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        err = gnttab_grant_foreign_access(info->backend_id,
                                          virt_to_mfn(info->tx), 0);
        if (err < 0) {
@@ -1059,11 +1003,6 @@
                goto out;
        }
        info->rx_ring_ref = err;
-
-#else
-       info->tx_ring_ref = virt_to_mfn(info->tx);
-       info->rx_ring_ref = virt_to_mfn(info->rx);
-#endif
 
        op.u.alloc_unbound.dom = info->backend_id;
        err = HYPERVISOR_event_channel_op(&op);
@@ -1082,7 +1021,6 @@
                free_page((unsigned long)info->rx);
        info->rx = 0;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        if (info->tx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->tx_ring_ref, 0);
        info->tx_ring_ref = GRANT_INVALID_REF;
@@ -1090,7 +1028,6 @@
        if (info->rx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->rx_ring_ref, 0);
        info->rx_ring_ref = GRANT_INVALID_REF;
-#endif
 
        return err;
 }
@@ -1104,7 +1041,6 @@
                free_page((unsigned long)info->rx);
        info->rx = 0;
 
-#ifdef CONFIG_XEN_NETDEV_GRANT
        if (info->tx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->tx_ring_ref, 0);
        info->tx_ring_ref = GRANT_INVALID_REF;
@@ -1112,7 +1048,6 @@
        if (info->rx_ring_ref != GRANT_INVALID_REF)
                gnttab_end_foreign_access(info->rx_ring_ref, 0);
        info->rx_ring_ref = GRANT_INVALID_REF;
-#endif
 
        unbind_evtchn_from_irqhandler(info->evtchn, info->netdev);
        info->evtchn = 0;
@@ -1282,10 +1217,6 @@
                return err;
        }
 
-
-       /* Call once in case entries already there. */
-       watch_for_status(&info->watch, info->watch.node);
-
        return 0;
 }
 
@@ -1344,72 +1275,50 @@
 
 static int wait_for_netif(void)
 {
-    int err = 0;
-    int i;
-
-    /*
-     * We should figure out how many and which devices we need to
-     * proceed and only wait for those.  For now, continue once the
-     * first device is around.
-     */
-    for ( i=0; netif_state != NETIF_STATE_CONNECTED && (i < 10*HZ); i++ )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-    }
-
-    if (netif_state != NETIF_STATE_CONNECTED) {
-        WPRINTK("Timeout connecting to device!\n");
-        err = -ENOSYS;
-    }
-    return err;
+       int err = 0;
+       int i;
+
+       /*
+        * We should figure out how many and which devices we need to
+        * proceed and only wait for those.  For now, continue once the
+        * first device is around.
+        */
+       for ( i=0; netif_state != NETIF_STATE_CONNECTED && (i < 10*HZ); i++ )
+       {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+       }
+
+       if (netif_state != NETIF_STATE_CONNECTED) {
+               WPRINTK("Timeout connecting to device!\n");
+               err = -ENOSYS;
+       }
+       return err;
 }
 
 static int __init netif_init(void)
 {
-    int err = 0;
-
-    if (xen_start_info->flags & SIF_INITDOMAIN)
-        return 0;
-
-    if ((err = xennet_proc_init()) != 0)
-        return err;
-
-    IPRINTK("Initialising virtual ethernet driver.\n");
-
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    IPRINTK("Using grant tables.\n"); 
-
-    /* A grant for every tx ring slot */
-    if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
-                                      &gref_tx_head) < 0) {
-        printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
-        return 1;
-    }
-    /* A grant for every rx ring slot */
-    if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
-                                      &gref_rx_head) < 0) {
-        printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
-        return 1;
-    }
-#endif
-
-
-    (void)register_inetaddr_notifier(&notifier_inetdev);
-
-    init_net_xenbus();
-
-    wait_for_netif();
-
-    return err;
+       int err = 0;
+
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               return 0;
+
+       if ((err = xennet_proc_init()) != 0)
+               return err;
+
+       IPRINTK("Initialising virtual ethernet driver.\n");
+
+       (void)register_inetaddr_notifier(&notifier_inetdev);
+
+       init_net_xenbus();
+
+       wait_for_netif();
+
+       return err;
 }
 
 static void netif_exit(void)
 {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    gnttab_free_grant_references(gref_tx_head);
-    gnttab_free_grant_references(gref_rx_head);
-#endif
 }
 
 #ifdef CONFIG_PROC_FS
@@ -1419,147 +1328,159 @@
 #define TARGET_CUR 2UL
 
 static int xennet_proc_read(
-    char *page, char **start, off_t off, int count, int *eof, void *data)
-{
-    struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
-    struct net_private *np = netdev_priv(dev);
-    int len = 0, which_target = (long)data & 3;
+       char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+       struct net_device *dev =
+               (struct net_device *)((unsigned long)data & ~3UL);
+       struct net_private *np = netdev_priv(dev);
+       int len = 0, which_target = (long)data & 3;
     
-    switch (which_target)
-    {
-    case TARGET_MIN:
-        len = sprintf(page, "%d\n", np->rx_min_target);
-        break;
-    case TARGET_MAX:
-        len = sprintf(page, "%d\n", np->rx_max_target);
-        break;
-    case TARGET_CUR:
-        len = sprintf(page, "%d\n", np->rx_target);
-        break;
-    }
-
-    *eof = 1;
-    return len;
+       switch (which_target)
+       {
+       case TARGET_MIN:
+               len = sprintf(page, "%d\n", np->rx_min_target);
+               break;
+       case TARGET_MAX:
+               len = sprintf(page, "%d\n", np->rx_max_target);
+               break;
+       case TARGET_CUR:
+               len = sprintf(page, "%d\n", np->rx_target);
+               break;
+       }
+
+       *eof = 1;
+       return len;
 }
 
 static int xennet_proc_write(
-    struct file *file, const char __user *buffer,
-    unsigned long count, void *data)
-{
-    struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
-    struct net_private *np = netdev_priv(dev);
-    int which_target = (long)data & 3;
-    char string[64];
-    long target;
-
-    if (!capable(CAP_SYS_ADMIN))
-        return -EPERM;
-
-    if (count <= 1)
-        return -EBADMSG; /* runt */
-    if (count > sizeof(string))
-        return -EFBIG;   /* too long */
-
-    if (copy_from_user(string, buffer, count))
-        return -EFAULT;
-    string[sizeof(string)-1] = '\0';
-
-    target = simple_strtol(string, NULL, 10);
-    if (target < RX_MIN_TARGET)
-        target = RX_MIN_TARGET;
-    if (target > RX_MAX_TARGET)
-        target = RX_MAX_TARGET;
-
-    spin_lock(&np->rx_lock);
-
-    switch (which_target)
-    {
-    case TARGET_MIN:
-        if (target > np->rx_max_target)
-            np->rx_max_target = target;
-        np->rx_min_target = target;
-        if (target > np->rx_target)
-            np->rx_target = target;
-        break;
-    case TARGET_MAX:
-        if (target < np->rx_min_target)
-            np->rx_min_target = target;
-        np->rx_max_target = target;
-        if (target < np->rx_target)
-            np->rx_target = target;
-        break;
-    case TARGET_CUR:
-        break;
-    }
-
-    network_alloc_rx_buffers(dev);
-
-    spin_unlock(&np->rx_lock);
-
-    return count;
+       struct file *file, const char __user *buffer,
+       unsigned long count, void *data)
+{
+       struct net_device *dev =
+               (struct net_device *)((unsigned long)data & ~3UL);
+       struct net_private *np = netdev_priv(dev);
+       int which_target = (long)data & 3;
+       char string[64];
+       long target;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (count <= 1)
+               return -EBADMSG; /* runt */
+       if (count > sizeof(string))
+               return -EFBIG;   /* too long */
+
+       if (copy_from_user(string, buffer, count))
+               return -EFAULT;
+       string[sizeof(string)-1] = '\0';
+
+       target = simple_strtol(string, NULL, 10);
+       if (target < RX_MIN_TARGET)
+               target = RX_MIN_TARGET;
+       if (target > RX_MAX_TARGET)
+               target = RX_MAX_TARGET;
+
+       spin_lock(&np->rx_lock);
+
+       switch (which_target)
+       {
+       case TARGET_MIN:
+               if (target > np->rx_max_target)
+                       np->rx_max_target = target;
+               np->rx_min_target = target;
+               if (target > np->rx_target)
+                       np->rx_target = target;
+               break;
+       case TARGET_MAX:
+               if (target < np->rx_min_target)
+                       np->rx_min_target = target;
+               np->rx_max_target = target;
+               if (target < np->rx_target)
+                       np->rx_target = target;
+               break;
+       case TARGET_CUR:
+               break;
+       }
+
+       network_alloc_rx_buffers(dev);
+
+       spin_unlock(&np->rx_lock);
+
+       return count;
 }
 
 static int xennet_proc_init(void)
 {
-    if (proc_mkdir("xen/net", NULL) == NULL)
-        return -ENOMEM;
-    return 0;
+       if (proc_mkdir("xen/net", NULL) == NULL)
+               return -ENOMEM;
+       return 0;
 }
 
 static int xennet_proc_addif(struct net_device *dev)
 {
-    struct proc_dir_entry *dir, *min, *max, *cur;
-    char name[30];
-
-    sprintf(name, "xen/net/%s", dev->name);
-
-    dir = proc_mkdir(name, NULL);
-    if (!dir)
-        goto nomem;
-
-    min = create_proc_entry("rxbuf_min", 0644, dir);
-    max = create_proc_entry("rxbuf_max", 0644, dir);
-    cur = create_proc_entry("rxbuf_cur", 0444, dir);
-    if (!min || !max || !cur)
-        goto nomem;
-
-    min->read_proc  = xennet_proc_read;
-    min->write_proc = xennet_proc_write;
-    min->data       = (void *)((unsigned long)dev | TARGET_MIN);
-
-    max->read_proc  = xennet_proc_read;
-    max->write_proc = xennet_proc_write;
-    max->data       = (void *)((unsigned long)dev | TARGET_MAX);
-
-    cur->read_proc  = xennet_proc_read;
-    cur->write_proc = xennet_proc_write;
-    cur->data       = (void *)((unsigned long)dev | TARGET_CUR);
-
-    return 0;
+       struct proc_dir_entry *dir, *min, *max, *cur;
+       char name[30];
+
+       sprintf(name, "xen/net/%s", dev->name);
+
+       dir = proc_mkdir(name, NULL);
+       if (!dir)
+               goto nomem;
+
+       min = create_proc_entry("rxbuf_min", 0644, dir);
+       max = create_proc_entry("rxbuf_max", 0644, dir);
+       cur = create_proc_entry("rxbuf_cur", 0444, dir);
+       if (!min || !max || !cur)
+               goto nomem;
+
+       min->read_proc  = xennet_proc_read;
+       min->write_proc = xennet_proc_write;
+       min->data       = (void *)((unsigned long)dev | TARGET_MIN);
+
+       max->read_proc  = xennet_proc_read;
+       max->write_proc = xennet_proc_write;
+       max->data       = (void *)((unsigned long)dev | TARGET_MAX);
+
+       cur->read_proc  = xennet_proc_read;
+       cur->write_proc = xennet_proc_write;
+       cur->data       = (void *)((unsigned long)dev | TARGET_CUR);
+
+       return 0;
 
  nomem:
-    xennet_proc_delif(dev);
-    return -ENOMEM;
+       xennet_proc_delif(dev);
+       return -ENOMEM;
 }
 
 static void xennet_proc_delif(struct net_device *dev)
 {
-    char name[30];
-
-    sprintf(name, "xen/net/%s/rxbuf_min", dev->name);
-    remove_proc_entry(name, NULL);
-
-    sprintf(name, "xen/net/%s/rxbuf_max", dev->name);
-    remove_proc_entry(name, NULL);
-
-    sprintf(name, "xen/net/%s/rxbuf_cur", dev->name);
-    remove_proc_entry(name, NULL);
-
-    sprintf(name, "xen/net/%s", dev->name);
-    remove_proc_entry(name, NULL);
+       char name[30];
+
+       sprintf(name, "xen/net/%s/rxbuf_min", dev->name);
+       remove_proc_entry(name, NULL);
+
+       sprintf(name, "xen/net/%s/rxbuf_max", dev->name);
+       remove_proc_entry(name, NULL);
+
+       sprintf(name, "xen/net/%s/rxbuf_cur", dev->name);
+       remove_proc_entry(name, NULL);
+
+       sprintf(name, "xen/net/%s", dev->name);
+       remove_proc_entry(name, NULL);
 }
 
 #endif
 
 module_init(netif_init);
 module_exit(netif_exit);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c        Thu Sep 22 
17:42:01 2005
@@ -41,232 +41,253 @@
 static int privcmd_ioctl(struct inode *inode, struct file *file,
                          unsigned int cmd, unsigned long data)
 {
-    int ret = -ENOSYS;
-
-    switch ( cmd )
-    {
-    case IOCTL_PRIVCMD_HYPERCALL:
-    {
-        privcmd_hypercall_t hypercall;
+       int ret = -ENOSYS;
+
+       switch (cmd) {
+       case IOCTL_PRIVCMD_HYPERCALL: {
+               privcmd_hypercall_t hypercall;
   
-        if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
-            return -EFAULT;
+               if (copy_from_user(&hypercall, (void *)data,
+                                  sizeof(hypercall)))
+                       return -EFAULT;
 
 #if defined(__i386__)
-        __asm__ __volatile__ (
-            "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
-            "movl  4(%%eax),%%ebx ;"
-            "movl  8(%%eax),%%ecx ;"
-            "movl 12(%%eax),%%edx ;"
-            "movl 16(%%eax),%%esi ;"
-            "movl 20(%%eax),%%edi ;"
-            "movl   (%%eax),%%eax ;"
-            TRAP_INSTR "; "
-            "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
-            : "=a" (ret) : "0" (&hypercall) : "memory" );
+               __asm__ __volatile__ (
+                       "pushl %%ebx; pushl %%ecx; pushl %%edx; "
+                       "pushl %%esi; pushl %%edi; "
+                       "movl  4(%%eax),%%ebx ;"
+                       "movl  8(%%eax),%%ecx ;"
+                       "movl 12(%%eax),%%edx ;"
+                       "movl 16(%%eax),%%esi ;"
+                       "movl 20(%%eax),%%edi ;"
+                       "movl   (%%eax),%%eax ;"
+                       TRAP_INSTR "; "
+                       "popl %%edi; popl %%esi; popl %%edx; "
+                       "popl %%ecx; popl %%ebx"
+                       : "=a" (ret) : "0" (&hypercall) : "memory" );
 #elif defined (__x86_64__)
-        {
-            long ign1, ign2, ign3;
-            __asm__ __volatile__ (
-                "movq %8,%%r10; movq %9,%%r8;" TRAP_INSTR
-                : "=a" (ret), "=D" (ign1), "=S" (ign2), "=d" (ign3)
-                : "0" ((unsigned long)hypercall.op), 
-                "1" ((unsigned long)hypercall.arg[0]), 
-                "2" ((unsigned long)hypercall.arg[1]),
-                "3" ((unsigned long)hypercall.arg[2]), 
-                "g" ((unsigned long)hypercall.arg[3]),
-                "g" ((unsigned long)hypercall.arg[4])
-                : "r11","rcx","r8","r10","memory");
-        }
+               {
+                       long ign1, ign2, ign3;
+                       __asm__ __volatile__ (
+                               "movq %8,%%r10; movq %9,%%r8;" TRAP_INSTR
+                               : "=a" (ret), "=D" (ign1),
+                                 "=S" (ign2), "=d" (ign3)
+                               : "0" ((unsigned long)hypercall.op), 
+                               "1" ((unsigned long)hypercall.arg[0]), 
+                               "2" ((unsigned long)hypercall.arg[1]),
+                               "3" ((unsigned long)hypercall.arg[2]), 
+                               "g" ((unsigned long)hypercall.arg[3]),
+                               "g" ((unsigned long)hypercall.arg[4])
+                               : "r11","rcx","r8","r10","memory");
+               }
 #elif defined (__ia64__)
-       __asm__ __volatile__ (
-           ";; mov r14=%2; mov r15=%3; mov r16=%4; mov r17=%5; mov r18=%6; mov
-r2=%1; break 0x1000;; mov %0=r8 ;;"
-           : "=r" (ret)
-           : "r" (hypercall.op),
-             "r" (hypercall.arg[0]),
-             "r" (hypercall.arg[1]),
-             "r" (hypercall.arg[2]),
-             "r" (hypercall.arg[3]),
-             "r" (hypercall.arg[4])
-           : "r14","r15","r16","r17","r18","r2","r8","memory");
+               __asm__ __volatile__ (
+                       ";; mov r14=%2; mov r15=%3; "
+                       "mov r16=%4; mov r17=%5; mov r18=%6;"
+                       "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
+                       : "=r" (ret)
+                       : "r" (hypercall.op),
+                       "r" (hypercall.arg[0]),
+                       "r" (hypercall.arg[1]),
+                       "r" (hypercall.arg[2]),
+                       "r" (hypercall.arg[3]),
+                       "r" (hypercall.arg[4])
+                       : "r14","r15","r16","r17","r18","r2","r8","memory");
 #endif
-    }
-    break;
+       }
+       break;
 
 #if defined(CONFIG_XEN_PRIVILEGED_GUEST)
-    case IOCTL_PRIVCMD_MMAP:
-    {
+       case IOCTL_PRIVCMD_MMAP: {
 #define PRIVCMD_MMAP_SZ 32
-        privcmd_mmap_t mmapcmd;
-        privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ], *p;
-        int i, rc;
-
-        if ( copy_from_user(&mmapcmd, (void *)data, sizeof(mmapcmd)) )
-            return -EFAULT;
-
-        p = mmapcmd.entry;
-
-        for (i=0; i<mmapcmd.num; i+=PRIVCMD_MMAP_SZ, p+=PRIVCMD_MMAP_SZ)
-        {
-            int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
-                PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
-
-
-            if ( copy_from_user(&msg, p, n*sizeof(privcmd_mmap_entry_t)) )
-                return -EFAULT;
+               privcmd_mmap_t mmapcmd;
+               privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ], *p;
+               int i, rc;
+
+               if (copy_from_user(&mmapcmd, (void *)data, sizeof(mmapcmd)))
+                       return -EFAULT;
+
+               p = mmapcmd.entry;
+
+               for (i = 0; i < mmapcmd.num;
+                    i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
+                       int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
+                               PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
+
+                       if (copy_from_user(&msg, p,
+                                          n*sizeof(privcmd_mmap_entry_t)))
+                               return -EFAULT;
      
-            for ( j = 0; j < n; j++ )
-            {
-                struct vm_area_struct *vma = 
-                    find_vma( current->mm, msg[j].va );
-
-                if ( !vma )
-                    return -EINVAL;
-
-                if ( msg[j].va > PAGE_OFFSET )
-                    return -EINVAL;
-
-                if ( (msg[j].va + (msg[j].npages<<PAGE_SHIFT)) > vma->vm_end )
-                    return -EINVAL;
-
-                if ( (rc = direct_remap_pfn_range(vma->vm_mm, 
-                                                   msg[j].va&PAGE_MASK, 
-                                                   msg[j].mfn, 
-                                                   msg[j].npages<<PAGE_SHIFT, 
-                                                   vma->vm_page_prot,
-                                                   mmapcmd.dom)) < 0 )
-                    return rc;
-            }
-        }
-        ret = 0;
-    }
-    break;
-
-    case IOCTL_PRIVCMD_MMAPBATCH:
-    {
-        mmu_update_t u;
-        privcmd_mmapbatch_t m;
-        struct vm_area_struct *vma = NULL;
-        unsigned long *p, addr;
-        unsigned long mfn, ptep;
-        int i;
-
-        if ( copy_from_user(&m, (void *)data, sizeof(m)) )
-        { ret = -EFAULT; goto batch_err; }
-
-        vma = find_vma( current->mm, m.addr );
-
-        if ( !vma )
-        { ret = -EINVAL; goto batch_err; }
-
-        if ( m.addr > PAGE_OFFSET )
-        { ret = -EFAULT; goto batch_err; }
-
-        if ( (m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end )
-        { ret = -EFAULT; goto batch_err; }
-
-        p = m.arr;
-        addr = m.addr;
-        for ( i = 0; i < m.num; i++, addr += PAGE_SIZE, p++ )
-        {
-            if ( get_user(mfn, p) )
-                return -EFAULT;
-
-            ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
-            if (ret)
-                goto batch_err;
-
-            u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
-            u.ptr = ptep;
-
-            if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
-                put_user(0xF0000000 | mfn, p);
-        }
-
-        ret = 0;
-        break;
-
-    batch_err:
-        printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%p %lx-%lx\n", 
-               ret, vma, m.addr, m.num, m.arr,
-               vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
-        break;
-    }
-    break;
+                       for (j = 0; j < n; j++) {
+                               struct vm_area_struct *vma = 
+                                       find_vma( current->mm, msg[j].va );
+
+                               if (!vma)
+                                       return -EINVAL;
+
+                               if (msg[j].va > PAGE_OFFSET)
+                                       return -EINVAL;
+
+                               if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
+                                   > vma->vm_end )
+                                       return -EINVAL;
+
+                               if ((rc = direct_remap_pfn_range(
+                                       vma,
+                                       msg[j].va&PAGE_MASK, 
+                                       msg[j].mfn, 
+                                       msg[j].npages<<PAGE_SHIFT, 
+                                       vma->vm_page_prot,
+                                       mmapcmd.dom)) < 0)
+                                       return rc;
+                       }
+               }
+               ret = 0;
+       }
+       break;
+
+       case IOCTL_PRIVCMD_MMAPBATCH: {
+               mmu_update_t u;
+               privcmd_mmapbatch_t m;
+               struct vm_area_struct *vma = NULL;
+               unsigned long *p, addr;
+               unsigned long mfn, ptep;
+               int i;
+
+               if (copy_from_user(&m, (void *)data, sizeof(m))) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               vma = find_vma( current->mm, m.addr );
+               if (!vma) {
+                       ret = -EINVAL;
+                       goto batch_err;
+               }
+
+               if (m.addr > PAGE_OFFSET) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               p = m.arr;
+               addr = m.addr;
+               for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
+                       if (get_user(mfn, p))
+                               return -EFAULT;
+
+                       ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
+                       if (ret)
+                               goto batch_err;
+
+                       u.val = pte_val_ma(pfn_pte_ma(mfn, vma->vm_page_prot));
+                       u.ptr = ptep;
+
+                       if (HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0)
+                               put_user(0xF0000000 | mfn, p);
+               }
+
+               ret = 0;
+               break;
+
+       batch_err:
+               printk("batch_err ret=%d vma=%p addr=%lx "
+                      "num=%d arr=%p %lx-%lx\n", 
+                      ret, vma, m.addr, m.num, m.arr,
+                      vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
+               break;
+       }
+       break;
 #endif
 
-    case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN:
-    {
-        unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
-        pgd_t *pgd = pgd_offset_k(m2pv);
-        pud_t *pud = pud_offset(pgd, m2pv);
-        pmd_t *pmd = pmd_offset(pud, m2pv);
-        unsigned long m2p_start_mfn = (*(unsigned long *)pmd) >> PAGE_SHIFT; 
-        ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0;
-    }
-    break;
-
-    case IOCTL_PRIVCMD_INITDOMAIN_STORE:
-    {
-        extern int do_xenbus_probe(void*);
-        unsigned long page;
-
-        if (xen_start_info->store_evtchn != 0) {
-            ret = xen_start_info->store_mfn;
-            break;
-        }
-
-        /* Allocate page. */
-        page = get_zeroed_page(GFP_KERNEL);
-        if (!page) {
-            ret = -ENOMEM;
-            break;
-        }
-
-        /* We don't refcnt properly, so set reserved on page.
-         * (this allocation is permanent) */
-        SetPageReserved(virt_to_page(page));
-
-        /* Initial connect. Setup channel and page. */
-        xen_start_info->store_evtchn = data;
-        xen_start_info->store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >>
-                                              PAGE_SHIFT);
-        ret = xen_start_info->store_mfn;
-
-        /* We'll return then this will wait for daemon to answer */
-        kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
-    }
-    break;
-
-    default:
-        ret = -EINVAL;
-        break;
-    }
-    return ret;
+       case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: {
+               unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
+               pgd_t *pgd = pgd_offset_k(m2pv);
+               pud_t *pud = pud_offset(pgd, m2pv);
+               pmd_t *pmd = pmd_offset(pud, m2pv);
+               unsigned long m2p_start_mfn =
+                       (*(unsigned long *)pmd) >> PAGE_SHIFT; 
+               ret = put_user(m2p_start_mfn, (unsigned long *)data) ?
+                       -EFAULT: 0;
+       }
+       break;
+
+       case IOCTL_PRIVCMD_INITDOMAIN_STORE: {
+               extern int do_xenbus_probe(void*);
+               unsigned long page;
+
+               if (xen_start_info->store_evtchn != 0) {
+                       ret = xen_start_info->store_mfn;
+                       break;
+               }
+
+               /* Allocate page. */
+               page = get_zeroed_page(GFP_KERNEL);
+               if (!page) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               /* We don't refcnt properly, so set reserved on page.
+                * (this allocation is permanent) */
+               SetPageReserved(virt_to_page(page));
+
+               /* Initial connect. Setup channel and page. */
+               xen_start_info->store_evtchn = data;
+               xen_start_info->store_mfn =
+                       pfn_to_mfn(virt_to_phys((void *)page) >>
+                                  PAGE_SHIFT);
+               ret = xen_start_info->store_mfn;
+
+               /* We'll return then this will wait for daemon to answer */
+               kthread_run(do_xenbus_probe, NULL, "xenbus_probe");
+       }
+       break;
+
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
 }
 
 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
 {
-    /* DONTCOPY is essential for Xen as copy_page_range is broken. */
-    vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
-
-    return 0;
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+
+       return 0;
 }
 
 static struct file_operations privcmd_file_ops = {
-    .ioctl = privcmd_ioctl,
-    .mmap  = privcmd_mmap,
+       .ioctl = privcmd_ioctl,
+       .mmap  = privcmd_mmap,
 };
 
 
 static int __init privcmd_init(void)
 {
-    privcmd_intf = create_xen_proc_entry("privcmd", 0400);
-    if ( privcmd_intf != NULL )
-        privcmd_intf->proc_fops = &privcmd_file_ops;
-
-    return 0;
+       privcmd_intf = create_xen_proc_entry("privcmd", 0400);
+       if (privcmd_intf != NULL)
+               privcmd_intf->proc_fops = &privcmd_file_ops;
+
+       return 0;
 }
 
 __initcall(privcmd_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h Thu Sep 22 17:42:01 2005
@@ -11,10 +11,10 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <asm-xen/evtchn.h>
+#include <asm-xen/driver_util.h>
 #include <asm-xen/xen-public/io/tpmif.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
-#include <asm-xen/xen-public/io/domain_controller.h>
 
 #if 0
 #define ASSERT(_p) \
@@ -34,12 +34,12 @@
        unsigned int handle;
 
        /* Physical parameters of the comms window. */
-       unsigned long tx_shmem_frame;
        unsigned int evtchn;
        unsigned int remote_evtchn;
 
        /* The shared rings and indexes. */
        tpmif_tx_interface_t *tx;
+       struct vm_struct *tx_area;
 
        /* Miscellaneous private stuff. */
        enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
@@ -55,9 +55,7 @@
        struct work_struct work;
 
        u16 shmem_handle;
-       unsigned long shmem_vaddr;
        grant_ref_t shmem_ref;
-
 } tpmif_t;
 
 void tpmif_disconnect_complete(tpmif_t * tpmif);
@@ -86,3 +84,13 @@
 #define MMAP_VADDR(t,_req) ((t)->mmap_vstart + ((_req) * PAGE_SIZE))
 
 #endif /* __TPMIF__BACKEND__COMMON_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c      Thu Sep 22 
17:42:01 2005
@@ -1,4 +1,4 @@
-/******************************************************************************
+ /*****************************************************************************
  * drivers/xen/tpmback/interface.c
  *
  * Vritual TPM interface management.
@@ -14,187 +14,192 @@
 #include "common.h"
 #include <asm-xen/balloon.h>
 
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-
 #define TPMIF_HASHSZ (2 << 5)
 #define TPMIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(TPMIF_HASHSZ-1))
 
 static kmem_cache_t *tpmif_cachep;
 int num_frontends = 0;
+
 LIST_HEAD(tpmif_list);
 
-
-tpmif_t *alloc_tpmif(domid_t domid, long int instance)
-{
-    struct page *page;
-    tpmif_t *tpmif;
-
-    tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
-    if (!tpmif)
-        return ERR_PTR(-ENOMEM);
-
-    memset(tpmif, 0, sizeof(*tpmif));
-    tpmif->domid        = domid;
-    tpmif->status       = DISCONNECTED;
-    tpmif->tpm_instance = instance;
-    atomic_set(&tpmif->refcnt, 1);
-
-    page = balloon_alloc_empty_page_range(TPMIF_TX_RING_SIZE);
-    BUG_ON(page == NULL);
-    tpmif->mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    list_add(&tpmif->tpmif_list, &tpmif_list);
-    num_frontends++;
-
-    return tpmif;
-}
-
-
-void free_tpmif(tpmif_t *tpmif)
-{
-    num_frontends--;
-    list_del(&tpmif->tpmif_list);
-    kmem_cache_free(tpmif_cachep, tpmif);
-}
-
-
-tpmif_t *tpmif_find(domid_t domid, long int instance)
-{
-    tpmif_t *tpmif;
-
-    list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
-        if (tpmif->tpm_instance == instance) {
-            if (tpmif->domid == domid) {
-                tpmif_get(tpmif);
-                return tpmif;
-           } else {
-               return NULL;
-           }
-        }
-    }
-
-    return alloc_tpmif(domid, instance);
-}
-
-
-static int map_frontend_page(tpmif_t *tpmif, unsigned long localaddr,
-                            unsigned long shared_page)
-{
-    struct gnttab_map_grant_ref op = {
-        .host_addr = localaddr,
-        .flags     = GNTMAP_host_map,
-        .ref       = shared_page,
-        .dom       = tpmif->domid,
-    };
-
-    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-
-    if (op.handle < 0) {
-       DPRINTK(" Grant table operation failure !\n");
-       return op.handle;
-    }
-
-    tpmif->shmem_ref    = shared_page;
-    tpmif->shmem_handle = op.handle;
-    tpmif->shmem_vaddr  = localaddr;
-    return 0;
-}
-
-
-static void unmap_frontend_page(tpmif_t *tpmif)
-{
-    struct gnttab_unmap_grant_ref op;
-
-    op.host_addr = tpmif->shmem_vaddr;
-    op.handle = tpmif->shmem_handle;
-    op.dev_bus_addr = 0;
-
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-}
-
-
-int tpmif_map(tpmif_t *tpmif,
-              unsigned long shared_page, unsigned int evtchn)
-{
-    struct vm_struct *vma;
-    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
-    int err;
-
-    BUG_ON(tpmif->remote_evtchn);
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-       return -ENOMEM;
-
-    err = map_frontend_page(tpmif,
-                            VMALLOC_VMADDR(vma->addr),
-                            shared_page);
-    if (err) {
-        vfree(vma->addr);
-       return err;
-    }
-
-    op.u.bind_interdomain.dom1 = DOMID_SELF;
-    op.u.bind_interdomain.dom2 = tpmif->domid;
-    op.u.bind_interdomain.port1 = 0;
-    op.u.bind_interdomain.port2 = evtchn;
-    err = HYPERVISOR_event_channel_op(&op);
-    if (err) {
-       unmap_frontend_page(tpmif);
-       vfree(vma->addr);
-       return err;
-    }
-
-    tpmif->evtchn = op.u.bind_interdomain.port1;
-    tpmif->remote_evtchn = evtchn;
-
-    tpmif->tx = (tpmif_tx_interface_t *) vma->addr;
-
-    bind_evtchn_to_irqhandler(tpmif->evtchn,
-                              tpmif_be_int,
-                              0,
-                              "tpmif-backend",
-                             tpmif);
-    tpmif->status        = CONNECTED;
-    tpmif->shmem_ref     = shared_page;
-    tpmif->active        = 1;
-
-    return 0;
-}
-
-
-static void __tpmif_disconnect_complete(void *arg)
-{
-    evtchn_op_t op = { .cmd = EVTCHNOP_close };
-    tpmif_t *tpmif = (tpmif_t *) arg;
-
-    op.u.close.port = tpmif->evtchn;
-    op.u.close.dom  = DOMID_SELF;
-    HYPERVISOR_event_channel_op(&op);
-    op.u.close.port = tpmif->remote_evtchn;
-    op.u.close.dom  = tpmif->domid;
-    HYPERVISOR_event_channel_op(&op);
-
-    if (tpmif->evtchn)
-         unbind_evtchn_from_irqhandler(tpmif->evtchn, tpmif);
-
-    if (tpmif->tx) {
-        unmap_frontend_page(tpmif);
-        vfree(tpmif->tx);
-    }
-
-    free_tpmif(tpmif);
-}
-
-
-void tpmif_disconnect_complete(tpmif_t * tpmif)
-{
-    INIT_WORK(&tpmif->work, __tpmif_disconnect_complete, (void *)tpmif);
-    schedule_work(&tpmif->work);
-}
-
-
-void __init tpmif_interface_init(void)
-{
-    tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof(tpmif_t),
-                                     0, 0, NULL, NULL);
-}
+tpmif_t *
+alloc_tpmif(domid_t domid, long int instance)
+{
+       struct page *page;
+       tpmif_t *tpmif;
+
+       tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
+       if (!tpmif)
+               return ERR_PTR(-ENOMEM);
+
+       memset(tpmif, 0, sizeof (*tpmif));
+       tpmif->domid = domid;
+       tpmif->status = DISCONNECTED;
+       tpmif->tpm_instance = instance;
+       atomic_set(&tpmif->refcnt, 1);
+
+       page = balloon_alloc_empty_page_range(TPMIF_TX_RING_SIZE);
+       BUG_ON(page == NULL);
+       tpmif->mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       list_add(&tpmif->tpmif_list, &tpmif_list);
+       num_frontends++;
+
+       return tpmif;
+}
+
+void
+free_tpmif(tpmif_t * tpmif)
+{
+       num_frontends--;
+       list_del(&tpmif->tpmif_list);
+       kmem_cache_free(tpmif_cachep, tpmif);
+}
+
+tpmif_t *
+tpmif_find(domid_t domid, long int instance)
+{
+       tpmif_t *tpmif;
+
+       list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
+               if (tpmif->tpm_instance == instance) {
+                       if (tpmif->domid == domid) {
+                               tpmif_get(tpmif);
+                               return tpmif;
+                       } else {
+                               return NULL;
+                       }
+               }
+       }
+
+       return alloc_tpmif(domid, instance);
+}
+
+static int
+map_frontend_page(tpmif_t *tpmif, unsigned long shared_page)
+{
+       struct gnttab_map_grant_ref op = {
+               .host_addr = (unsigned long)tpmif->tx_area->addr,
+               .flags = GNTMAP_host_map,
+               .ref = shared_page,
+               .dom = tpmif->domid,
+       };
+
+       lock_vm_area(tpmif->tx_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1));
+       unlock_vm_area(tpmif->tx_area);
+
+       if (op.handle < 0) {
+               DPRINTK(" Grant table operation failure !\n");
+               return op.handle;
+       }
+
+       tpmif->shmem_ref = shared_page;
+       tpmif->shmem_handle = op.handle;
+
+       return 0;
+}
+
+static void
+unmap_frontend_page(tpmif_t *tpmif)
+{
+       struct gnttab_unmap_grant_ref op;
+
+       op.host_addr    = (unsigned long)tpmif->tx_area->addr;
+       op.handle       = tpmif->shmem_handle;
+       op.dev_bus_addr = 0;
+
+       lock_vm_area(tpmif->tx_area);
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+       unlock_vm_area(tpmif->tx_area);
+}
+
+int
+tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn)
+{
+       evtchn_op_t op = {.cmd = EVTCHNOP_bind_interdomain };
+       int err;
+
+       BUG_ON(tpmif->remote_evtchn);
+
+       if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL)
+               return -ENOMEM;
+
+       err = map_frontend_page(tpmif, shared_page);
+       if (err) {
+               free_vm_area(tpmif->tx_area);
+               return err;
+       }
+
+       op.u.bind_interdomain.dom1 = DOMID_SELF;
+       op.u.bind_interdomain.dom2 = tpmif->domid;
+       op.u.bind_interdomain.port1 = 0;
+       op.u.bind_interdomain.port2 = evtchn;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               unmap_frontend_page(tpmif);
+               free_vm_area(tpmif->tx_area);
+               return err;
+       }
+
+       tpmif->evtchn = op.u.bind_interdomain.port1;
+       tpmif->remote_evtchn = evtchn;
+
+       tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr;
+
+       bind_evtchn_to_irqhandler(tpmif->evtchn,
+                                 tpmif_be_int, 0, "tpmif-backend", tpmif);
+       tpmif->status = CONNECTED;
+       tpmif->shmem_ref = shared_page;
+       tpmif->active = 1;
+
+       return 0;
+}
+
+static void
+__tpmif_disconnect_complete(void *arg)
+{
+       evtchn_op_t op = {.cmd = EVTCHNOP_close };
+       tpmif_t *tpmif = (tpmif_t *) arg;
+
+       op.u.close.port = tpmif->evtchn;
+       op.u.close.dom = DOMID_SELF;
+       HYPERVISOR_event_channel_op(&op);
+       op.u.close.port = tpmif->remote_evtchn;
+       op.u.close.dom = tpmif->domid;
+       HYPERVISOR_event_channel_op(&op);
+
+       if (tpmif->evtchn)
+               unbind_evtchn_from_irqhandler(tpmif->evtchn, tpmif);
+
+       if (tpmif->tx) {
+               unmap_frontend_page(tpmif);
+               free_vm_area(tpmif->tx_area);
+       }
+
+       free_tpmif(tpmif);
+}
+
+void
+tpmif_disconnect_complete(tpmif_t * tpmif)
+{
+       INIT_WORK(&tpmif->work, __tpmif_disconnect_complete, (void *)tpmif);
+       schedule_work(&tpmif->work);
+}
+
+void __init
+tpmif_interface_init(void)
+{
+       tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
+                                        0, 0, NULL, NULL);
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c        Thu Sep 22 
17:42:01 2005
@@ -566,7 +566,7 @@
                                 * the more time we give the TPM to process the 
request.
                                 */
                                mod_timer(&pak->processing_timer,
-                                         jiffies + (num_frontends * 10 * HZ));
+                                         jiffies + (num_frontends * 60 * HZ));
                                dataex.copied_so_far = 0;
                        }
                }
@@ -850,7 +850,7 @@
                write_lock_irqsave(&dataex.pak_lock, flags);
                list_add_tail(&pak->next, &dataex.pending_pak);
                /* give the TPM some time to pick up the request */
-               mod_timer(&pak->processing_timer, jiffies + (10 * HZ));
+               mod_timer(&pak->processing_timer, jiffies + (30 * HZ));
                write_unlock_irqrestore(&dataex.pak_lock,
                                        flags);
 
@@ -1075,3 +1075,13 @@
 }
 
 __initcall(tpmback_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Thu Sep 22 17:42:01 2005
@@ -213,6 +213,7 @@
 
        be->dev = dev;
        be->backend_watch.node     = dev->nodename;
+       /* Implicitly calls backend_changed() once. */
        be->backend_watch.callback = backend_changed;
        be->instance = -1;
        err = register_xenbus_watch(&be->backend_watch);
@@ -236,8 +237,6 @@
        }
 
        dev->data = be;
-
-       backend_changed(&be->backend_watch, dev->nodename);
        return err;
 
 free_be:
@@ -269,3 +268,13 @@
 {
        xenbus_register_backend(&tpmback);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c      Thu Sep 22 
17:42:01 2005
@@ -46,7 +46,6 @@
 #include <asm-xen/xen-public/io/tpmif.h>
 #include <asm/uaccess.h>
 #include <asm-xen/xenbus.h>
-#include <asm-xen/xen-public/io/domain_controller.h>
 #include <asm-xen/xen-public/grant_table.h>
 
 #include "tpmfront.h"
@@ -258,18 +257,24 @@
 
        tpm_allocate_buffers(tp);
 
-       info->ring_ref = gnttab_claim_grant_reference(&gref_head);
-       ASSERT(info->ring_ref != -ENOSPC);
-       gnttab_grant_foreign_access_ref(info->ring_ref,
-                                       backend_id,
-                                       (virt_to_machine(tp->tx) >> PAGE_SHIFT),
-                                       0);
+       err = gnttab_grant_foreign_access(backend_id,
+                                         (virt_to_machine(tp->tx) >> 
PAGE_SHIFT),
+                                         0);
+
+       if (err == -ENOSPC) {
+               free_page((unsigned long)sring);
+               tp->tx = NULL;
+               xenbus_dev_error(dev, err, "allocating grant reference");
+               return err;
+       }
+       info->ring_ref = err;
 
        op.u.alloc_unbound.dom = backend_id;
        err = HYPERVISOR_event_channel_op(&op);
        if (err) {
+               gnttab_end_foreign_access(info->ring_ref, 0);
                free_page((unsigned long)sring);
-               tp->tx = 0;
+               tp->tx = NULL;
                xenbus_dev_error(dev, err, "allocating event channel");
                return err;
        }
@@ -283,6 +288,7 @@
        tpmif_set_connected_state(tp,0);
 
        if ( tp->tx != NULL ) {
+               gnttab_end_foreign_access(info->ring_ref, 0);
                free_page((unsigned long)tp->tx);
                tp->tx = NULL;
        }
@@ -412,7 +418,6 @@
                return err;
        }
 
-       watch_for_status(&info->watch, info->watch.node);
        return 0;
 }
 
@@ -736,3 +741,13 @@
 }
 
 __initcall(tpmif_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h
--- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h      Thu Sep 22 
17:42:01 2005
@@ -2,7 +2,8 @@
 #define TPM_FRONT_H
 
 
-struct tpm_private {
+struct tpm_private
+{
        tpmif_tx_interface_t *tx;
        unsigned int evtchn;
        int connected;
@@ -29,10 +30,21 @@
 };
 
 
-struct tx_buffer {
+struct tx_buffer
+{
        unsigned int size;      // available space in data
        unsigned int len;       // used space in data
        unsigned char *data;    // pointer to a page
 };
 
 #endif
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Thu Sep 22 
17:42:01 2005
@@ -231,3 +231,13 @@
 
        unbind_evtchn_from_irqhandler(xen_start_info->store_evtchn, &xb_waitq);
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Thu Sep 22 
17:42:01 2005
@@ -39,3 +39,13 @@
 extern wait_queue_head_t xb_waitq;
 
 #endif /* _XENBUS_COMMS_H */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c      Thu Sep 22 
17:42:01 2005
@@ -186,3 +186,13 @@
 }
 
 __initcall(xenbus_dev_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu Sep 22 
17:42:01 2005
@@ -687,3 +687,13 @@
 }
 
 postcore_initcall(xenbus_probe_init);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Thu Sep 22 
17:42:01 2005
@@ -253,31 +253,19 @@
 EXPORT_SYMBOL(xenbus_read);
 
 /* Write the value of a single file.
- * Returns -err on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns -err on failure.
  */
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags)
-{
-       const char *flags, *path;
-       struct kvec iovec[3];
+int xenbus_write(const char *dir, const char *node, const char *string)
+{
+       const char *path;
+       struct kvec iovec[2];
 
        path = join(dir, node);
-       /* Format: Flags (as string), path, data. */
-       if (createflags == 0)
-               flags = XS_WRITE_NONE;
-       else if (createflags == O_CREAT)
-               flags = XS_WRITE_CREATE;
-       else if (createflags == (O_CREAT|O_EXCL))
-               flags = XS_WRITE_CREATE_EXCL;
-       else
-               return -EINVAL;
 
        iovec[0].iov_base = (void *)path;
        iovec[0].iov_len = strlen(path) + 1;
-       iovec[1].iov_base = (void *)flags;
-       iovec[1].iov_len = strlen(flags) + 1;
-       iovec[2].iov_base = (void *)string;
-       iovec[2].iov_len = strlen(string);
+       iovec[1].iov_base = (void *)string;
+       iovec[1].iov_len = strlen(string);
 
        return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
 }
@@ -357,7 +345,7 @@
        va_end(ap);
 
        BUG_ON(ret > sizeof(printf_buffer)-1);
-       return xenbus_write(dir, node, printf_buffer, O_CREAT);
+       return xenbus_write(dir, node, printf_buffer);
 }
 EXPORT_SYMBOL(xenbus_printf);
 
@@ -377,7 +365,7 @@
 
        BUG_ON(len + ret > sizeof(printf_buffer)-1);
        dev->has_error = 1;
-       if (xenbus_write(dev->nodename, "error", printf_buffer, O_CREAT) != 0)
+       if (xenbus_write(dev->nodename, "error", printf_buffer) != 0)
                printk("xenbus: failed to write error node for %s (%s)\n",
                       dev->nodename, printf_buffer);
 }
@@ -578,3 +566,13 @@
                return PTR_ERR(watcher);
        return 0;
 }
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h      Thu Sep 22 
17:42:01 2005
@@ -261,7 +261,6 @@
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)     (phys_to_machine(__pa(v)))
-#define machine_to_virt(m)     (__va(machine_to_phys(m)))
 #define virt_to_mfn(v)         (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
 #define mfn_to_virt(m)         (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h   Thu Sep 22 
17:42:01 2005
@@ -460,7 +460,7 @@
 #define kern_addr_valid(addr)  (1)
 #endif /* !CONFIG_DISCONTIGMEM */
 
-int direct_remap_pfn_range(struct mm_struct *mm,
+int direct_remap_pfn_range(struct vm_area_struct *vma,
                             unsigned long address, 
                             unsigned long mfn,
                             unsigned long size, 
@@ -474,10 +474,10 @@
                     unsigned long size);
 
 #define io_remap_page_range(vma,from,phys,size,prot) \
-direct_remap_pfn_range(vma->vm_mm,from,phys>>PAGE_SHIFT,size,prot,DOMID_IO)
+direct_remap_pfn_range(vma,from,(phys)>>PAGE_SHIFT,size,prot,DOMID_IO)
 
 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
-direct_remap_pfn_range(vma->vm_mm,from,pfn,size,prot,DOMID_IO)
+direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
 
 #define MK_IOSPACE_PFN(space, pfn)     (pfn)
 #define GET_IOSPACE(pfn)               0
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h    Thu Sep 22 
17:42:01 2005
@@ -497,11 +497,22 @@
  * includes these barriers, for example.
  */
 
+/*
+ * Don't use smp_processor_id() in preemptible code: debug builds will barf.
+ * It's okay in these cases as we only read the upcall mask in preemptible
+ * regions, which is always safe.
+ */
+#ifdef CONFIG_SMP
+#define __this_cpu()   __smp_processor_id()
+#else
+#define __this_cpu()   0
+#endif
+
 #define __cli()                                                                
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
        barrier();                                                      \
@@ -512,7 +523,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 0;                                  \
        barrier(); /* unmask then check (avoid races) */                \
        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
@@ -523,7 +534,7 @@
 #define __save_flags(x)                                                        
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
 } while (0)
 
@@ -532,7 +543,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                barrier(); /* unmask then check (avoid races) */        \
                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
@@ -548,7 +559,7 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
@@ -561,14 +572,8 @@
 #define local_irq_disable()    __cli()
 #define local_irq_enable()     __sti()
 
-/* Don't use smp_processor_id: this is called in debug versions of that fn. */
-#ifdef CONFIG_SMP
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask
-#else
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask
-#endif
+#define irqs_disabled()                                                        
\
+       HYPERVISOR_shared_info->vcpu_data[__this_cpu()].evtchn_upcall_mask
 
 /*
  * disable hlt during certain critical i/o operations
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h     Thu Sep 
22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h     Thu Sep 
22 17:42:01 2005
@@ -35,7 +35,7 @@
         * of cr3/ldt (i.e., not in __switch_to).
         */
        __asm__ __volatile__ (
-               "movl %%es,%0 ; movl %%ds,%1 ; movl %%fs,%2 ; movl %%gs,%3"
+               "mov %%es,%0 ; mov %%ds,%1 ; mov %%fs,%2 ; mov %%gs,%3"
                : "=m" (current->thread.es),
                  "=m" (current->thread.ds),
                  "=m" (current->thread.fsindex),
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h    Thu Sep 22 
17:42:01 2005
@@ -239,7 +239,6 @@
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)     (phys_to_machine(__pa(v)))
-#define machine_to_virt(m)     (__va(machine_to_phys(m)))
 #define virt_to_mfn(v)         (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
 #define mfn_to_virt(m)         (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Sep 22 
17:42:01 2005
@@ -526,7 +526,7 @@
 
 #define DOMID_LOCAL (0xFFFFU)
 
-int direct_remap_pfn_range(struct mm_struct *mm,
+int direct_remap_pfn_range(struct vm_area_struct *vma,
                             unsigned long address,
                             unsigned long mfn,
                             unsigned long size,
@@ -542,10 +542,10 @@
                     unsigned long size);
 
 #define io_remap_page_range(vma, vaddr, paddr, size, prot)             \
-               
direct_remap_pfn_range((vma)->vm_mm,vaddr,paddr>>PAGE_SHIFT,size,prot,DOMID_IO)
+               
direct_remap_pfn_range(vma,vaddr,(paddr)>>PAGE_SHIFT,size,prot,DOMID_IO)
 
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)                \
-               
direct_remap_pfn_range((vma)->vm_mm,vaddr,pfn,size,prot,DOMID_IO)
+               direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
 
 #define MK_IOSPACE_PFN(space, pfn)     (pfn)
 #define GET_IOSPACE(pfn)               0
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h  Thu Sep 22 
17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h  Thu Sep 22 
17:42:01 2005
@@ -321,11 +321,22 @@
  * includes these barriers, for example.
  */
 
+/*
+ * Don't use smp_processor_id() in preemptible code: debug builds will barf.
+ * It's okay in these cases as we only read the upcall mask in preemptible
+ * regions, which is always safe.
+ */
+#ifdef CONFIG_SMP
+#define __this_cpu()   __smp_processor_id()
+#else
+#define __this_cpu()   0
+#endif
+
 #define __cli()                                                                
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
        barrier();                                                      \
@@ -336,7 +347,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        _vcpu->evtchn_upcall_mask = 0;                                  \
        barrier(); /* unmask then check (avoid races) */                \
        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
@@ -347,7 +358,7 @@
 #define __save_flags(x)                                                        
\
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
 } while (0)
 
@@ -356,7 +367,7 @@
        vcpu_info_t *_vcpu;                                             \
        barrier();                                                      \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                barrier(); /* unmask then check (avoid races) */        \
                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
@@ -372,7 +383,7 @@
 do {                                                                   \
        vcpu_info_t *_vcpu;                                             \
        preempt_disable();                                              \
-       _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \
+       _vcpu = &HYPERVISOR_shared_info->vcpu_data[__this_cpu()];       \
        (x) = _vcpu->evtchn_upcall_mask;                                \
        _vcpu->evtchn_upcall_mask = 1;                                  \
        preempt_enable_no_resched();                                    \
@@ -387,14 +398,8 @@
 #define local_irq_disable()    __cli()
 #define local_irq_enable()     __sti()
 
-/* Don't use smp_processor_id: this is called in debug versions of that fn. */
-#ifdef CONFIG_SMP
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask
-#else
-#define irqs_disabled()                        \
-    HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask
-#endif
+#define irqs_disabled()                                                        
\
+       HYPERVISOR_shared_info->vcpu_data[__this_cpu()].evtchn_upcall_mask
 
 /*
  * disable hlt during certain critical i/o operations
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h     Thu Sep 22 17:42:01 2005
@@ -37,7 +37,7 @@
 void gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly);
 
-int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
+int gnttab_grant_foreign_transfer(domid_t domid);
 
 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
@@ -64,8 +64,7 @@
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
                                     unsigned long frame, int readonly);
 
-void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
-                                      unsigned long pfn);
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid);
 
 #ifdef __ia64__
 #define gnttab_map_vaddr(map) __va(map.dev_bus_addr)
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Thu Sep 22 17:42:01 2005
@@ -83,8 +83,7 @@
 
 char **xenbus_directory(const char *dir, const char *node, unsigned int *num);
 void *xenbus_read(const char *dir, const char *node, unsigned int *len);
-int xenbus_write(const char *dir, const char *node,
-                const char *string, int createflags);
+int xenbus_write(const char *dir, const char *node, const char *string);
 int xenbus_mkdir(const char *dir, const char *node);
 int xenbus_exists(const char *dir, const char *node);
 int xenbus_rm(const char *dir, const char *node);
diff -r 97dbd9524a7e -r 06d84bf87159 tools/blktap/xenbus.c
--- a/tools/blktap/xenbus.c     Thu Sep 22 17:34:14 2005
+++ b/tools/blktap/xenbus.c     Thu Sep 22 17:42:01 2005
@@ -92,7 +92,7 @@
         if ((path == NULL) || (buf == NULL))
             return 0;
 
-        ret = xs_write(h, path, buf, strlen(buf)+1, O_CREAT);
+        ret = xs_write(h, path, buf, strlen(buf)+1);
 
         free(buf);
         free(path);
diff -r 97dbd9524a7e -r 06d84bf87159 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Thu Sep 22 17:34:14 2005
+++ b/tools/console/daemon/io.c Thu Sep 22 17:42:01 2005
@@ -165,7 +165,7 @@
                success = asprintf(&path, "%s/tty", dom->conspath) != -1;
                if (!success)
                        goto out;
-               success = xs_write(xs, path, slave, strlen(slave), O_CREAT);
+               success = xs_write(xs, path, slave, strlen(slave));
                free(path);
                if (!success)
                        goto out;
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure   Thu Sep 
22 17:34:14 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure   Thu Sep 
22 17:42:01 2005
@@ -3475,7 +3475,7 @@
 
 
 GDBSERVER_DEPFILES="$srv_regobj $srv_tgtobj $srv_thread_depfiles"
-GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxc"
+GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxenctrl"
 
 
 
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in        
Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in        
Thu Sep 22 17:42:01 2005
@@ -107,7 +107,7 @@
 
 
 GDBSERVER_DEPFILES="$srv_regobj $srv_tgtobj $srv_thread_depfiles"
-GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxc"
+GDBSERVER_LIBS="$srv_libs -L../../../../../libxc/ -lxenctrl"
 
 AC_SUBST(GDBSERVER_DEPFILES)
 AC_SUBST(GDBSERVER_LIBS)
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c     
Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c     
Thu Sep 22 17:42:01 2005
@@ -37,9 +37,10 @@
 #include <errno.h>
 #include <xenctrl.h>
 #define TRACE_ENTER /* printf("enter %s\n", __FUNCTION__) */
-long (*myptrace)(enum __ptrace_request, pid_t, long, long);
-int (*myxcwait)(int domain, int *status, int options) ;
-
+
+long (*myptrace)(int xc_handle, enum __ptrace_request, u32, long, long);
+int (*myxcwait)(int xc_handle, int domain, int *status, int options) ;
+static int xc_handle;
 
 #define DOMFLAGS_DYING     (1<<0) /* Domain is scheduled to die.             */
 #define DOMFLAGS_SHUTDOWN  (1<<2) /* The guest OS has shut down.             */
@@ -47,11 +48,7 @@
 #define DOMFLAGS_BLOCKED   (1<<4) /* Currently blocked pending an event.     */
 #define DOMFLAGS_RUNNING   (1<<5) /* Domain is currently running.            */
 
-
-
 struct inferior_list all_processes;
-
-
 static int current_domain;
 static int expect_signal = 0;
 static int signal_to_send = 0; 
@@ -150,7 +147,7 @@
 {
     struct process_info *new_process;
     current_domain = domain;
-    if (myptrace (PTRACE_ATTACH, domain, 0, 0) != 0) {
+    if (myptrace (xc_handle, PTRACE_ATTACH, domain, 0, 0) != 0) {
        fprintf (stderr, "Cannot attach to domain %d: %s (%d)\n", domain,
                 strerror (errno), errno);
        fflush (stderr);
@@ -173,8 +170,7 @@
 {
   struct thread_info *thread = (struct thread_info *) entry;
   struct process_info *process = get_thread_process (thread);
-  myptrace (PTRACE_KILL, pid_of (process), 0, 0);
-
+  myptrace (xc_handle, PTRACE_KILL, pid_of (process), 0, 0);
 }
 
 static void
@@ -190,7 +186,7 @@
   struct thread_info *thread = (struct thread_info *) entry;
   struct process_info *process = get_thread_process (thread);
 
-  myptrace (PTRACE_DETACH, pid_of (process), 0, 0);
+  myptrace (xc_handle, PTRACE_DETACH, pid_of (process), 0, 0);
 }
 
 
@@ -216,7 +212,7 @@
 linux_wait (char *status)
 {
   int w;
-  if (myxcwait(current_domain, &w, 0))
+  if (myxcwait(xc_handle, current_domain, &w, 0))
       return -1;
   
   if (w & (DOMFLAGS_SHUTDOWN|DOMFLAGS_DYING)) {
@@ -241,7 +237,7 @@
   expect_signal = resume_info->sig;
   for_each_inferior(&all_threads, regcache_invalidate_one);
 
-  myptrace (step ? PTRACE_SINGLESTEP : PTRACE_CONT, current_domain, 0, 0);
+  myptrace (xc_handle, step ? PTRACE_SINGLESTEP : PTRACE_CONT, current_domain, 
0, 0);
 
 }
 
@@ -265,7 +261,7 @@
        }
 
       buf = malloc (regset->size);
-      res = myptrace (regset->get_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
+      res = myptrace (xc_handle, regset->get_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
       if (res < 0)
        {
          if (errno == EIO)
@@ -317,7 +313,7 @@
 
       buf = malloc (regset->size);
       regset->fill_function (buf);
-      res = myptrace (regset->set_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
+      res = myptrace (xc_handle, regset->set_request, inferior_pid, 0, 
(PTRACE_XFER_TYPE)buf);
       if (res < 0)
        {
          if (errno == EIO)
@@ -395,7 +391,7 @@
   for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE))
     {
       errno = 0;
-      buffer[i] = myptrace (PTRACE_PEEKTEXT, inferior_pid, (PTRACE_ARG3_TYPE) 
addr, 0);
+      buffer[i] = myptrace (xc_handle, PTRACE_PEEKTEXT, inferior_pid, 
(PTRACE_ARG3_TYPE) addr, 0);
       if (errno)
        return errno;
     }
@@ -428,13 +424,13 @@
 
   /* Fill start and end extra bytes of buffer with existing memory data.  */
 
-  buffer[0] = myptrace (PTRACE_PEEKTEXT, inferior_pid,
+  buffer[0] = myptrace (xc_handle, PTRACE_PEEKTEXT, inferior_pid,
                      (PTRACE_ARG3_TYPE) addr, 0);
 
   if (count > 1)
     {
       buffer[count - 1]
-       = myptrace (PTRACE_PEEKTEXT, inferior_pid,
+       = myptrace (xc_handle, PTRACE_PEEKTEXT, inferior_pid,
                  (PTRACE_ARG3_TYPE) (addr + (count - 1)
                                      * sizeof (PTRACE_XFER_TYPE)),
                  0);
@@ -448,7 +444,7 @@
   for (i = 0; i < count; i++, addr += sizeof (PTRACE_XFER_TYPE))
     {
       errno = 0;
-      myptrace (PTRACE_POKETEXT, inferior_pid, (PTRACE_ARG3_TYPE) addr, 
buffer[i]);
+      myptrace (xc_handle, PTRACE_POKETEXT, inferior_pid, (PTRACE_ARG3_TYPE) 
addr, buffer[i]);
       if (errno)
        return errno;
     }
@@ -539,7 +535,7 @@
 void
 initialize_low (void)
 {
-
+  xc_handle = xc_interface_open();
   set_target_ops (&linux_xen_target_ops);
   set_breakpoint_data (the_low_target.breakpoint,
                       the_low_target.breakpoint_len);
diff -r 97dbd9524a7e -r 06d84bf87159 tools/debugger/gdb/gdbbuild
--- a/tools/debugger/gdb/gdbbuild       Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/gdbbuild       Thu Sep 22 17:42:01 2005
@@ -1,20 +1,17 @@
 #!/bin/sh
 
-XENROOT=`hg root`
-export XENROOT
-
-cd $XENROOT/tools/debugger/gdb
-rm -rf gdb-6.2.1  gdb-6.2.1-linux-i386-xen
-# FIXME:cw this should be smarter
-wget -c ftp://ftp.gnu.org/gnu/gdb/gdb-6.2.1.tar.bz2
+rm -rf gdb-6.2.1 gdb-6.2.1-linux-i386-xen
+[ -a gdb-6.2.1.tar.bz2 ] || wget -c ftp://ftp.gnu.org/gnu/gdb/gdb-6.2.1.tar.bz2
 tar xjf gdb-6.2.1.tar.bz2
 
-cd $XENROOT/tools/debugger/gdb/gdb-6.2.1-xen-sparse
+cd gdb-6.2.1-xen-sparse
 ./mkbuildtree ../gdb-6.2.1
 
-mkdir $XENROOT/tools/debugger/gdb/gdb-6.2.1-linux-i386-xen
-cd $XENROOT/tools/debugger/gdb/gdb-6.2.1-linux-i386-xen
+cd ..
+mkdir gdb-6.2.1-linux-i386-xen
+cd gdb-6.2.1-linux-i386-xen
 ../gdb-6.2.1/configure
+
 # some people don't have gmake
 if which gmake ; then
     gmake -j4
diff -r 97dbd9524a7e -r 06d84bf87159 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Thu Sep 22 17:34:14 2005
+++ b/tools/examples/network-bridge     Thu Sep 22 17:42:01 2005
@@ -1,4 +1,4 @@
-#!/bin/sh -x
+#!/bin/sh
 #============================================================================
 # Default Xen network start/stop script.
 # Xend calls a network script when it starts.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Thu Sep 22 17:34:14 2005
+++ b/tools/examples/xend-config.sxp    Thu Sep 22 17:42:01 2005
@@ -49,6 +49,6 @@
 # If dom0-min-mem=0, dom0 will never balloon out.
 (dom0-min-mem 0)
 
-# In SMP system, dom0 will use only CPUs in range [1,dom0-cpus]
+# In SMP system, dom0 will use dom0-cpus # of CPUS
 # If dom0-cpus = 0, dom0 will take all cpus available
 (dom0-cpus 0)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Thu Sep 22 17:34:14 2005
+++ b/tools/examples/xmexample.vmx      Thu Sep 22 17:42:01 2005
@@ -25,6 +25,10 @@
 
 # A name for your domain. All domains must have different names.
 name = "ExampleVMXDomain"
+
+#-----------------------------------------------------------------------------
+# the number of cpus guest platform has, default=1
+vcpus=1
 
 # Which CPU to start domain on? 
 #cpu = -1   # leave to Xen to pick
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/acpi/acpi_madt.c
--- a/tools/firmware/acpi/acpi_madt.c   Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/acpi/acpi_madt.c   Thu Sep 22 17:42:01 2005
@@ -37,44 +37,7 @@
                                ACPI_LOCAL_APIC_ADDRESS,
                                ACPI_MULTIPLE_APIC_FLAGS,
                },
-               //
-               // LOCAL APIC Entries for 4 processors.
-               //
-               {
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x00,                           
                          
-                                               0x00,                           
                          
-                                               0x00000001,                     
                          
-                               },
-
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x01,                           
                          
-                                               0x00,                           
                          
-                                               0x00000000
-                               },                                              
 
-
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x02,                           
                          
-                                               0x00,                           
                          
-                                               0x00000000
-                               },                                              
 
-
-                               {
-                                               ACPI_PROCESSOR_LOCAL_APIC,      
                    
-                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),     
-                                               0x03,                           
                          
-                                               0x00,                           
                          
-                                               0x00000000
-                               }
-               }
-               ,
-
+       
                //
                // IO APIC
                // 
@@ -87,5 +50,19 @@
                                                ACPI_IO_APIC_ADDRESS_1,
                                                0x0000
                                }
+               },
+
+               //
+               // LOCAL APIC Entries for up to 32 processors.
+               //
+               {
+                               {
+                                               ACPI_PROCESSOR_LOCAL_APIC,
+                                               sizeof 
(ACPI_LOCAL_APIC_STRUCTURE),
+                                               0x00,
+                                               0x00,
+                                               0x00000001,
+                               }
+
                }
 };
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/acpi/acpi_madt.h
--- a/tools/firmware/acpi/acpi_madt.h   Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/acpi/acpi_madt.h   Thu Sep 22 17:42:01 2005
@@ -35,9 +35,9 @@
 //
 #pragma pack (1)
 typedef struct {
-  ACPI_2_0_MADT                                Header;
-  ACPI_LOCAL_APIC_STRUCTURE     LocalApic[4];
-  ACPI_IO_APIC_STRUCTURE        IoApic[1];
+       ACPI_2_0_MADT                   Header;
+       ACPI_IO_APIC_STRUCTURE          IoApic[1];
+       ACPI_LOCAL_APIC_STRUCTURE       LocalApic[32];
 } ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE;
 #pragma pack ()
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/vmxassist/Makefile
--- a/tools/firmware/vmxassist/Makefile Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/vmxassist/Makefile Thu Sep 22 17:42:01 2005
@@ -41,9 +41,9 @@
 
 all: vmxloader
 
-vmxloader: roms.h vmxloader.c acpi.h
-       ${CC} ${CFLAGS} ${DEFINES} -c vmxloader.c
-       $(CC) -o vmxloader.tmp -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 
vmxloader.o
+vmxloader: roms.h vmxloader.c acpi.h acpi_madt.c
+       ${CC} ${CFLAGS} ${DEFINES} -c vmxloader.c -c acpi_madt.c
+       $(CC) -o vmxloader.tmp -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 
vmxloader.o acpi_madt.o
        objcopy --change-addresses=0xC0000000 vmxloader.tmp vmxloader
        rm -f vmxloader.tmp
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/vmxassist/vmxloader.c
--- a/tools/firmware/vmxassist/vmxloader.c      Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/vmxassist/vmxloader.c      Thu Sep 22 17:42:01 2005
@@ -27,6 +27,7 @@
 #ifdef _ACPI_
 #include "acpi.h"
 #include "../acpi/acpi2_0.h"  // for ACPI_PHYSICAL_ADDRESS
+int acpi_madt_update(unsigned char* acpi_start);
 #endif
 
 
@@ -110,7 +111,10 @@
        }
 #ifdef _ACPI_
        puts("Loading ACPI ...\n");
-       if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){
+
+       acpi_madt_update(acpi);
+
+       if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
                /* make sure acpi table does not overlap rombios
                 * currently acpi less than 8K will be OK.
                 */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Thu Sep 22 17:34:14 2005
+++ b/tools/ioemu/vl.c  Thu Sep 22 17:42:01 2005
@@ -126,6 +126,7 @@
 int vm_running;
 int audio_enabled = 0;
 int nic_pcnet = 1;
+int vcpus = 1;
 int sb16_enabled = 1;
 int adlib_enabled = 1;
 int gus_enabled = 1;
@@ -2105,6 +2106,7 @@
           "-snapshot       write to temporary files instead of disk image 
files\n"
            "-m megs         set virtual RAM size to megs MB [default=%d]\n"
            "-nographic      disable graphical output and redirect serial I/Os 
to console\n"
+           "-vcpus          set CPU number of guest platform\n"
 #ifdef CONFIG_VNC
           "-vnc port             use vnc instead of sdl\n"
           "-vncport port         use a different port\n"
@@ -2235,6 +2237,7 @@
     QEMU_OPTION_hdachs,
     QEMU_OPTION_L,
     QEMU_OPTION_no_code_copy,
+    QEMU_OPTION_vcpus,
     QEMU_OPTION_pci,
     QEMU_OPTION_nic_pcnet,
     QEMU_OPTION_isa,
@@ -2307,6 +2310,7 @@
     { "hdachs", HAS_ARG, QEMU_OPTION_hdachs },
     { "L", HAS_ARG, QEMU_OPTION_L },
     { "no-code-copy", 0, QEMU_OPTION_no_code_copy },
+    { "vcpus", 1, QEMU_OPTION_vcpus },
 #ifdef TARGET_PPC
     { "prep", 0, QEMU_OPTION_prep },
     { "g", 1, QEMU_OPTION_g },
@@ -2646,6 +2650,9 @@
             case QEMU_OPTION_S:
                 start_emulation = 0;
                 break;
+            case QEMU_OPTION_vcpus:
+                vcpus = atoi(optarg);
+                fprintf(logfile, "qemu: the number of cpus is %d\n", vcpus);
             case QEMU_OPTION_pci:
                 pci_enabled = 1;
                 break;
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/Makefile      Thu Sep 22 17:42:01 2005
@@ -26,19 +26,21 @@
 BUILD_SRCS += xc_linux_build.c
 BUILD_SRCS += xc_load_bin.c
 BUILD_SRCS += xc_load_elf.c
-BUILD_SRCS += xg_private.c
 
 ifeq ($(XEN_TARGET_ARCH),ia64)
 BUILD_SRCS += xc_ia64_stubs.c
 else
+ifeq ($(XEN_TARGET_ARCH),x86_32)
 SRCS       += xc_ptrace.c
 SRCS       += xc_ptrace_core.c
-
-BUILD_SRCS := xc_load_aout9.c
+endif
+BUILD_SRCS += xc_load_aout9.c
 BUILD_SRCS += xc_linux_restore.c
 BUILD_SRCS += xc_linux_save.c
 BUILD_SRCS += xc_vmx_build.c
 endif
+
+BUILD_SRCS += xg_private.c
 
 CFLAGS   += -Wall
 CFLAGS   += -Werror
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_core.c     Thu Sep 22 17:42:01 2005
@@ -11,10 +11,10 @@
 
 static int
 copy_from_domain_page(int xc_handle,
-                     u32 domid,
-                     unsigned long *page_array,
-                     unsigned long src_pfn,
-                     void *dst_page)
+                      u32 domid,
+                      unsigned long *page_array,
+                      unsigned long src_pfn,
+                      void *dst_page)
 {
     void *vaddr = xc_map_foreign_range(
         xc_handle, domid, PAGE_SIZE, PROT_READ, page_array[src_pfn]);
@@ -27,90 +27,100 @@
 
 int 
 xc_domain_dumpcore(int xc_handle,
-                  u32 domid,
-                  const char *corename)
+                   u32 domid,
+                   const char *corename)
 {
-       unsigned long nr_pages;
-       unsigned long *page_array;
-       xc_dominfo_t info;
-       int i, j, vcpu_map_size, dump_fd;
-       char *dump_mem, *dump_mem_start = NULL;
-       struct xc_core_header header;
-       vcpu_guest_context_t     ctxt[MAX_VIRT_CPUS];
+    unsigned long nr_pages;
+    unsigned long *page_array;
+    xc_dominfo_t info;
+    int i, j, vcpu_map_size, dump_fd;
+    char *dump_mem, *dump_mem_start = NULL;
+    struct xc_core_header header;
+    vcpu_guest_context_t     ctxt[MAX_VIRT_CPUS];
 
-       
-       if ((dump_fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0) {
-               PERROR("Could not open corefile %s: %s", corename, 
strerror(errno));
-               goto error_out;
-       }
-       
-       if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL) {
-               PERROR("Could not allocate dump_mem");
-               goto error_out;
-       }
-       
-       if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1) {
-               PERROR("Could not get info for domain");
-               goto error_out;
-       }
-       
-       vcpu_map_size =  sizeof(info.vcpu_to_cpu) / sizeof(info.vcpu_to_cpu[0]);
+ 
+    if ((dump_fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0) {
+        PERROR("Could not open corefile %s: %s", corename, strerror(errno));
+        goto error_out;
+    }
+ 
+    if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL) {
+        PERROR("Could not allocate dump_mem");
+        goto error_out;
+    }
+ 
+    if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1) {
+        PERROR("Could not get info for domain");
+        goto error_out;
+    }
+ 
+    vcpu_map_size =  sizeof(info.vcpu_to_cpu) / sizeof(info.vcpu_to_cpu[0]);
 
-       for (i = 0, j = 0; i < vcpu_map_size; i++) {
-               if (info.vcpu_to_cpu[i] == -1) {
-                       continue;
-               }
-               if (xc_domain_get_vcpu_context(xc_handle, domid, i, &ctxt[j])) {
-                       PERROR("Could not get all vcpu contexts for domain");
-                       goto error_out;
-               }
-               j++;
-       }
-       
-       nr_pages = info.nr_pages;
+    for (i = 0, j = 0; i < vcpu_map_size; i++) {
+        if (info.vcpu_to_cpu[i] == -1) {
+            continue;
+        }
+        if (xc_domain_get_vcpu_context(xc_handle, domid, i, &ctxt[j])) {
+            PERROR("Could not get all vcpu contexts for domain");
+            goto error_out;
+        }
+        j++;
+    }
+ 
+    nr_pages = info.nr_pages;
 
-       header.xch_magic = 0xF00FEBED; 
-       header.xch_nr_vcpus = info.vcpus;
-       header.xch_nr_pages = nr_pages;
-       header.xch_ctxt_offset = sizeof(struct xc_core_header);
-       header.xch_index_offset = sizeof(struct xc_core_header) +
-           sizeof(vcpu_guest_context_t)*info.vcpus;
-       header.xch_pages_offset = round_pgup(sizeof(struct xc_core_header) +
-           (sizeof(vcpu_guest_context_t) * info.vcpus) + 
-           (nr_pages * sizeof(unsigned long)));
+    header.xch_magic = 0xF00FEBED; 
+    header.xch_nr_vcpus = info.vcpus;
+    header.xch_nr_pages = nr_pages;
+    header.xch_ctxt_offset = sizeof(struct xc_core_header);
+    header.xch_index_offset = sizeof(struct xc_core_header) +
+        sizeof(vcpu_guest_context_t)*info.vcpus;
+    header.xch_pages_offset = round_pgup(sizeof(struct xc_core_header) +
+                                         (sizeof(vcpu_guest_context_t) * 
info.vcpus) + 
+                                         (nr_pages * sizeof(unsigned long)));
 
-       write(dump_fd, &header, sizeof(struct xc_core_header));
-       write(dump_fd, &ctxt, sizeof(ctxt[0]) * info.vcpus);
+    write(dump_fd, &header, sizeof(struct xc_core_header));
+    write(dump_fd, &ctxt, sizeof(ctxt[0]) * info.vcpus);
 
-       if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate memory\n");
-           goto error_out;
-       }
-       if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != 
nr_pages) {
-           printf("Could not get the page frame list\n");
-           goto error_out;
-       }
-       write(dump_fd, page_array, nr_pages * sizeof(unsigned long));
-       lseek(dump_fd, header.xch_pages_offset, SEEK_SET);
-       for (dump_mem = dump_mem_start, i = 0; i < nr_pages; i++) {
-               copy_from_domain_page(xc_handle, domid, page_array, i, 
dump_mem);
-               dump_mem += PAGE_SIZE;
-               if (((i + 1) % DUMP_INCREMENT == 0) || (i + 1) == nr_pages) {
-                       if (write(dump_fd, dump_mem_start, dump_mem - 
dump_mem_start) < 
-                           dump_mem - dump_mem_start) {
-                               PERROR("Partial write, file system full?");
-                               goto error_out;
-                       }
-                       dump_mem = dump_mem_start;
-               }
-       }
+    if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+        printf("Could not allocate memory\n");
+        goto error_out;
+    }
+    if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
+        printf("Could not get the page frame list\n");
+        goto error_out;
+    }
+    write(dump_fd, page_array, nr_pages * sizeof(unsigned long));
+    lseek(dump_fd, header.xch_pages_offset, SEEK_SET);
+    for (dump_mem = dump_mem_start, i = 0; i < nr_pages; i++) {
+        copy_from_domain_page(xc_handle, domid, page_array, i, dump_mem);
+        dump_mem += PAGE_SIZE;
+        if (((i + 1) % DUMP_INCREMENT == 0) || (i + 1) == nr_pages) {
+            if (write(dump_fd, dump_mem_start, dump_mem - dump_mem_start) < 
+                dump_mem - dump_mem_start) {
+                PERROR("Partial write, file system full?");
+                goto error_out;
+            }
+            dump_mem = dump_mem_start;
+        }
+    }
 
-       close(dump_fd);
-       free(dump_mem_start);
-       return 0;
+    close(dump_fd);
+    free(dump_mem_start);
+    return 0;
  error_out:
-       if (dump_fd != -1)
-               close(dump_fd);
-       free(dump_mem_start);
-       return -1;
+    if (dump_fd != -1)
+        close(dump_fd);
+    free(dump_mem_start);
+    return -1;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_domain.c   Thu Sep 22 17:42:01 2005
@@ -265,7 +265,7 @@
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
                                           unsigned int address_bits,
-                                         unsigned long *extent_start)
+                                          unsigned long *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -296,7 +296,7 @@
                                           u32 domid, 
                                           unsigned long nr_extents,
                                           unsigned int extent_order,
-                                         unsigned long *extent_start)
+                                          unsigned long *extent_start)
 {
     int err;
     struct xen_memory_reservation reservation = {
@@ -328,3 +328,13 @@
 
     return err;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_ia64_stubs.c       Thu Sep 22 17:42:01 2005
@@ -9,8 +9,8 @@
 }
 
 int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns,
-                    unsigned int store_evtchn, unsigned long *store_mfn,
-                    unsigned int console_evtchn, unsigned long *console_mfn)
+                     unsigned int store_evtchn, unsigned long *store_mfn,
+                     unsigned int console_evtchn, unsigned long *console_mfn)
 {
     PERROR("xc_linux_restore not implemented\n");
     return -1;
@@ -44,3 +44,12 @@
     return -1;
 }
 
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_linux_build.c      Thu Sep 22 17:42:01 2005
@@ -12,7 +12,6 @@
 #if defined(__x86_64__) || defined(__ia64__)
 #define ELFSIZE 64
 #endif
-
 
 #include "xc_elf.h"
 #include "xc_aout9.h"
@@ -33,6 +32,13 @@
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #endif
 
+#ifdef __ia64__
+#define already_built(ctxt) (0)
+#define get_tot_pages xc_get_max_pages
+#else
+#define already_built(ctxt) ((ctxt)->ctrlreg[3] != 0)
+#define get_tot_pages xc_get_tot_pages
+#endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
@@ -47,7 +53,7 @@
 {
     if ( probe_elf(image, image_size, load_funcs) &&
          probe_bin(image, image_size, load_funcs) &&
-        probe_aout9(image, image_size, load_funcs) )
+         probe_aout9(image, image_size, load_funcs) )
     {
         ERROR( "Unrecognized image format" );
         return -EINVAL;
@@ -56,27 +62,27 @@
     return 0;
 }
 
-#define alloc_pt(ltab, vltab) \
-        ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \
-        if (vltab != NULL) { \
-            munmap(vltab, PAGE_SIZE); \
-        } \
-        if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
-                          PROT_READ|PROT_WRITE, \
-                          ltab >> PAGE_SHIFT)) == NULL) { \
-            goto error_out; \
-        } \
-        memset(vltab, 0, PAGE_SIZE);
+#define alloc_pt(ltab, vltab)                                           \
+do {                                                                    \
+    ltab = (u64)page_array[ppt_alloc++] << PAGE_SHIFT;                  \
+    if ( vltab != NULL )                                                \
+        munmap(vltab, PAGE_SIZE);                                       \
+    if ( (vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,       \
+                                       PROT_READ|PROT_WRITE,            \
+                                       ltab >> PAGE_SHIFT)) == NULL )   \
+        goto error_out;                                                 \
+    memset(vltab, 0, PAGE_SIZE);                                        \
+} while ( 0 )
 
 #if defined(__i386__)
 
 static int setup_pg_tables(int xc_handle, u32 dom,
-                          vcpu_guest_context_t *ctxt,
-                          unsigned long dsi_v_start,
-                          unsigned long v_end,
-                          unsigned long *page_array,
-                          unsigned long vpt_start,
-                          unsigned long vpt_end)
+                           vcpu_guest_context_t *ctxt,
+                           unsigned long dsi_v_start,
+                           unsigned long v_end,
+                           unsigned long *page_array,
+                           unsigned long vpt_start,
+                           unsigned long vpt_end)
 {
     l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -90,11 +96,11 @@
     vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
     ctxt->ctrlreg[3] = l2tab;
 
-    for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
+    for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ )
     {    
         if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
         {
-           alloc_pt(l1tab, vl1tab);
+            alloc_pt(l1tab, vl1tab);
             vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
             *vl2e++ = l1tab | L2_PROT;
         }
@@ -111,79 +117,67 @@
 
  error_out:
     if (vl1tab)
-       munmap(vl1tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
     if (vl2tab)
-       munmap(vl2tab, PAGE_SIZE);
+        munmap(vl2tab, PAGE_SIZE);
     return -1;
 }
 
 static int setup_pg_tables_pae(int xc_handle, u32 dom,
-                              vcpu_guest_context_t *ctxt,
-                              unsigned long dsi_v_start,
-                              unsigned long v_end,
-                              unsigned long *page_array,
-                              unsigned long vpt_start,
-                              unsigned long vpt_end)
+                               vcpu_guest_context_t *ctxt,
+                               unsigned long dsi_v_start,
+                               unsigned long v_end,
+                               unsigned long *page_array,
+                               unsigned long vpt_start,
+                               unsigned long vpt_end)
 {
-    l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
-    l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
-    l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
-    unsigned long long l1tab = 0;
-    unsigned long long l2tab = 0;
-    unsigned long long l3tab = 0;
-    unsigned long ppt_alloc;
-    unsigned long count;
+    l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL;
+    l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL;
+    l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL;
+    u64 l1tab, l2tab, l3tab;
+    unsigned long ppt_alloc, count, nmfn;
 
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
 
     if ( page_array[ppt_alloc] > 0xfffff )
     {
-       unsigned long nmfn;
-       nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
-       if ( nmfn == 0 )
-       {
-           fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-           goto error_out;
-       }
-       page_array[ppt_alloc] = nmfn;
+        nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]);
+        if ( nmfn == 0 )
+        {
+            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+            goto error_out;
+        }
+        page_array[ppt_alloc] = nmfn;
     }
 
     alloc_pt(l3tab, vl3tab);
     vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
     ctxt->ctrlreg[3] = l3tab;
 
-    if(l3tab>0xfffff000ULL)
-    {
-        fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab);
-        goto error_out;
-    }
- 
-    for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
+    for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++)
     {
         if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
         {
+            if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
+            {
+                alloc_pt(l2tab, vl2tab);
+                vl2e = &vl2tab[l2_table_offset_pae(
+                    dsi_v_start + (count << PAGE_SHIFT))];
+                *vl3e++ = l2tab | L3_PROT;
+            }
+
             alloc_pt(l1tab, vl1tab);
-            
-                if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
-                {
-                    alloc_pt(l2tab, vl2tab);
-                    vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + 
(count<<PAGE_SHIFT))];
-                    *vl3e = l2tab | L3_PROT;
-                    vl3e++;
-                }
-            vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + 
(count<<PAGE_SHIFT))];
-            *vl2e = l1tab | L2_PROT;
-            vl2e++;
+            vl1e = &vl1tab[l1_table_offset_pae(
+                dsi_v_start + (count << PAGE_SHIFT))];
+            *vl2e++ = l1tab | L2_PROT;
         }
         
-        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        *vl1e = ((u64)page_array[count] << PAGE_SHIFT) | L1_PROT;
         if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
-            (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
-        {
-           *vl1e &= ~_PAGE_RW;
-        }
-       vl1e++;
+             (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
+            *vl1e &= ~_PAGE_RW;
+        vl1e++;
     }
      
     munmap(vl1tab, PAGE_SIZE);
@@ -193,11 +187,11 @@
 
  error_out:
     if (vl1tab)
-       munmap(vl1tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
     if (vl2tab)
-       munmap(vl2tab, PAGE_SIZE);
+        munmap(vl2tab, PAGE_SIZE);
     if (vl3tab)
-       munmap(vl3tab, PAGE_SIZE);
+        munmap(vl3tab, PAGE_SIZE);
     return -1;
 }
 
@@ -206,12 +200,12 @@
 #if defined(__x86_64__)
 
 static int setup_pg_tables_64(int xc_handle, u32 dom,
-                             vcpu_guest_context_t *ctxt,
-                             unsigned long dsi_v_start,
-                             unsigned long v_end,
-                             unsigned long *page_array,
-                             unsigned long vpt_start,
-                             unsigned long vpt_end)
+                              vcpu_guest_context_t *ctxt,
+                              unsigned long dsi_v_start,
+                              unsigned long v_end,
+                              unsigned long *page_array,
+                              unsigned long vpt_start,
+                              unsigned long vpt_end)
 {
     l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -236,20 +230,20 @@
         {
             alloc_pt(l1tab, vl1tab);
             
-                if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
+            if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
+            {
+                alloc_pt(l2tab, vl2tab);
+                if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
                 {
-                    alloc_pt(l2tab, vl2tab);
-                    if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
-                    {
-                        alloc_pt(l3tab, vl3tab);
-                        vl3e = &vl3tab[l3_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
-                        *vl4e = l3tab | L4_PROT;
-                        vl4e++;
-                    }
-                    vl2e = &vl2tab[l2_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
-                    *vl3e = l2tab | L3_PROT;
-                    vl3e++;
+                    alloc_pt(l3tab, vl3tab);
+                    vl3e = &vl3tab[l3_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
+                    *vl4e = l3tab | L4_PROT;
+                    vl4e++;
                 }
+                vl2e = &vl2tab[l2_table_offset(dsi_v_start + 
(count<<PAGE_SHIFT))];
+                *vl3e = l2tab | L3_PROT;
+                vl3e++;
+            }
             vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
             *vl2e = l1tab | L2_PROT;
             vl2e++;
@@ -257,11 +251,11 @@
         
         *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
         if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
-            (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
-        {
-                *vl1e &= ~_PAGE_RW;
-        }
-            vl1e++;
+             (count <  ((vpt_end  -dsi_v_start)>>PAGE_SHIFT)) ) 
+        {
+            *vl1e &= ~_PAGE_RW;
+        }
+        vl1e++;
     }
      
     munmap(vl1tab, PAGE_SIZE);
@@ -272,13 +266,13 @@
 
  error_out:
     if (vl1tab)
-       munmap(vl1tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
     if (vl2tab)
-       munmap(vl2tab, PAGE_SIZE);
+        munmap(vl2tab, PAGE_SIZE);
     if (vl3tab)
-       munmap(vl3tab, PAGE_SIZE);
+        munmap(vl3tab, PAGE_SIZE);
     if (vl4tab)
-       munmap(vl4tab, PAGE_SIZE);
+        munmap(vl4tab, PAGE_SIZE);
     return -1;
 }
 #endif
@@ -286,18 +280,18 @@
 #ifdef __ia64__
 #include <asm/fpu.h> /* for FPSR_DEFAULT */
 static int setup_guest(int xc_handle,
-                         u32 dom,
-                         char *image, unsigned long image_size,
-                         gzFile initrd_gfd, unsigned long initrd_len,
-                         unsigned long nr_pages,
-                         unsigned long *pvsi, unsigned long *pvke,
-                         unsigned long *pvss, vcpu_guest_context_t *ctxt,
-                         const char *cmdline,
-                         unsigned long shared_info_frame,
-                         unsigned long flags,
-                         unsigned int vcpus,
-                         unsigned int store_evtchn, unsigned long *store_mfn,
-                        unsigned int console_evtchn, unsigned long 
*console_mfn)
+                       u32 dom,
+                       char *image, unsigned long image_size,
+                       gzFile initrd_gfd, unsigned long initrd_len,
+                       unsigned long nr_pages,
+                       unsigned long *pvsi, unsigned long *pvke,
+                       unsigned long *pvss, vcpu_guest_context_t *ctxt,
+                       const char *cmdline,
+                       unsigned long shared_info_frame,
+                       unsigned long flags,
+                       unsigned int vcpus,
+                       unsigned int store_evtchn, unsigned long *store_mfn,
+                       unsigned int console_evtchn, unsigned long *console_mfn)
 {
     unsigned long *page_array = NULL;
     struct load_funcs load_funcs;
@@ -339,19 +333,20 @@
     *pvke = dsi.v_kernentry;
 
     /* Now need to retrieve machine pfn for system pages:
-     *         start_info/store/console
+     *  start_info/store/console
      */
     pgnr = 3;
-    if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, nr_pages - 3, pgnr) 
!= pgnr)
-    {
-       PERROR("Could not get page frame for xenstore");
-       goto error_out;
+    if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array,
+                              nr_pages - 3, pgnr) != pgnr )
+    {
+        PERROR("Could not get page frame for xenstore");
+        goto error_out;
     }
 
     *store_mfn = page_array[1];
     *console_mfn = page_array[2];
     printf("store_mfn: 0x%lx, console_mfn: 0x%lx\n",
-       (u64)store_mfn, (u64)console_mfn);
+           (u64)store_mfn, (u64)console_mfn);
 
     start_info = xc_map_foreign_range(
         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]);
@@ -382,8 +377,8 @@
                        unsigned long shared_info_frame,
                        unsigned long flags,
                        unsigned int vcpus,
-                      unsigned int store_evtchn, unsigned long *store_mfn,
-                      unsigned int console_evtchn, unsigned long *console_mfn)
+                       unsigned int store_evtchn, unsigned long *store_mfn,
+                       unsigned int console_evtchn, unsigned long *console_mfn)
 {
     unsigned long *page_array = NULL;
     unsigned long count, i;
@@ -458,26 +453,26 @@
         if ( (v_end - vstack_end) < (512UL << 10) )
             v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
 #if defined(__i386__)
-       if (dsi.pae_kernel) {
-           /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
-           if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> 
-                  L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
-               break;
-       } else {
-           if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> 
-                  L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
-               break;
-       }
+        if (dsi.pae_kernel) {
+            /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
+            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >> 
+                   L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
+                break;
+        } else {
+            if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >> 
+                   L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
+                break;
+        }
 #endif
 #if defined(__x86_64__)
 #define NR(_l,_h,_s) \
     (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
     ((_l) & ~((1UL<<(_s))-1))) >> (_s))
-    if ( (1 + /* # L4 */
-        NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
-        NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
-        NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
-        <= nr_pt_pages )
+        if ( (1 + /* # L4 */
+              NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
+              NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
+              NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
+             <= nr_pt_pages )
             break;
 #endif
     }
@@ -541,7 +536,7 @@
                 goto error_out;
             }
             xc_copy_to_domain_page(xc_handle, dom,
-                                page_array[i>>PAGE_SHIFT], page);
+                                   page_array[i>>PAGE_SHIFT], page);
         }
     }
 
@@ -551,22 +546,22 @@
     /* setup page tables */
 #if defined(__i386__)
     if (dsi.pae_kernel)
-       rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
-                                dsi.v_start, v_end,
-                                page_array, vpt_start, vpt_end);
+        rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
+                                 dsi.v_start, v_end,
+                                 page_array, vpt_start, vpt_end);
     else {
-       rc = setup_pg_tables(xc_handle, dom, ctxt,
-                            dsi.v_start, v_end,
-                            page_array, vpt_start, vpt_end);
+        rc = setup_pg_tables(xc_handle, dom, ctxt,
+                             dsi.v_start, v_end,
+                             page_array, vpt_start, vpt_end);
     }
 #endif
 #if defined(__x86_64__)
     rc = setup_pg_tables_64(xc_handle, dom, ctxt,
-                           dsi.v_start, v_end,
-                           page_array, vpt_start, vpt_end);
+                            dsi.v_start, v_end,
+                            page_array, vpt_start, vpt_end);
 #endif
     if (0 != rc)
-       goto error_out;
+        goto error_out;
 
     /* Write the phys->machine and machine->phys table entries. */
     physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
@@ -576,11 +571,13 @@
 
     for ( count = 0; count < nr_pages; count++ )
     {
-        if ( xc_add_mmu_update(xc_handle, mmu,
-                              ((unsigned long long)page_array[count] << 
PAGE_SHIFT) | 
-                              MMU_MACHPHYS_UPDATE, count) )
-        {
-            fprintf(stderr,"m2p update failure p=%lx 
m=%lx\n",count,page_array[count] ); 
+        if ( xc_add_mmu_update(
+            xc_handle, mmu,
+            ((u64)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
+            count) )
+        {
+            fprintf(stderr,"m2p update failure p=%lx m=%lx\n",
+                    count, page_array[count]); 
             munmap(physmap, PAGE_SIZE);
             goto error_out;
         }
@@ -601,13 +598,13 @@
      * correct protection for the page
      */
     if (dsi.pae_kernel) {
-       if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
-                      ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
-           goto error_out;
+        if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
+                       ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+            goto error_out;
     } else {
-       if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
-                      ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
-           goto error_out;
+        if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
+                       ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+            goto error_out;
     }
 #endif
 
@@ -616,8 +613,8 @@
      * Pin down l4tab addr as page dir page - causes hypervisor to  provide
      * correct protection for the page
      */
-     if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
-                   ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
+    if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
+                   ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
         goto error_out;
 #endif
 
@@ -703,12 +700,7 @@
     unsigned long image_size, initrd_size=0;
     unsigned long vstartinfo_start, vkern_entry, vstack_start;
 
-#ifdef __ia64__
-    /* Current xen/ia64 allocates domU pages on demand */
-    if ( (nr_pages = xc_get_max_pages(xc_handle, domid)) < 0 )
-#else
-    if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
-#endif
+    if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 )
     {
         PERROR("Could not find total pages for domain");
         goto error_out;
@@ -755,12 +747,7 @@
         goto error_out;
     }
 
-    if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
-#ifdef __ia64__
-       0 )
-#else
-         (ctxt->ctrlreg[3] != 0) )
-#endif
+    if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || already_built(ctxt) )
     {
         ERROR("Domain is already constructed");
         goto error_out;
@@ -773,7 +760,7 @@
                      op.u.getdomaininfo.shared_info_frame,
                      flags, vcpus,
                      store_evtchn, store_mfn,
-                    console_evtchn, console_mfn) < 0 )
+                     console_evtchn, console_mfn) < 0 )
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -789,12 +776,13 @@
     /* based on new_thread in xen/arch/ia64/domain.c */
     ctxt->flags = 0;
     ctxt->shared.flags = flags;
-    ctxt->shared.start_info_pfn = nr_pages - 3; // metaphysical
+    ctxt->shared.start_info_pfn = nr_pages - 3; /* metaphysical */
     ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
     ctxt->regs.cr_iip = vkern_entry;
     ctxt->regs.cr_ifs = 1UL << 63;
     ctxt->regs.ar_fpsr = FPSR_DEFAULT;
-    /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should 
move here */
+    /* currently done by hypervisor, should move here */
+    /* ctxt->regs.r28 = dom_fw_setup(); */
     ctxt->vcpu.privregs = 0;
     ctxt->sys_pgnr = nr_pages - 3;
     i = 0; /* silence unused variable warning */
@@ -875,3 +863,13 @@
 
     return -1;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c       Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_linux_save.c       Thu Sep 22 17:42:01 2005
@@ -17,7 +17,6 @@
 #define BATCH_SIZE 1024   /* 1024 pages (4MB) at a time */
 
 #define MAX_MBIT_RATE 500
-
 
 /*
 ** Default values for important tuning parameters. Can override by passing
@@ -29,12 +28,9 @@
 #define DEF_MAX_ITERS   29   /* limit us to 30 times round loop */ 
 #define DEF_MAX_FACTOR   3   /* never send more than 3x nr_pfns */
 
-
-
 /* Flags to control behaviour of xc_linux_save */
 #define XCFLAGS_LIVE      1
 #define XCFLAGS_DEBUG     2
-
 
 #define DEBUG 0
 
@@ -115,8 +111,8 @@
     int i, count = 0;
     unsigned long *p = (unsigned long *)addr;
     /* We know that the array is padded to unsigned long. */
-    for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
-        count += hweight32( *p );
+    for( i = 0; i < (nr / (sizeof(unsigned long)*8)); i++, p++ )
+        count += hweight32(*p);
     return count;
 }
 
@@ -201,42 +197,50 @@
     struct timespec delay;
     long long delta;
 
-    if (START_MBIT_RATE == 0)
-       return write(io_fd, buf, n);
+    if ( START_MBIT_RATE == 0 )
+        return write(io_fd, buf, n);
     
     budget -= n;
-    if (budget < 0) {
-       if (MBIT_RATE != ombit_rate) {
-           BURST_TIME_US = RATE_TO_BTU / MBIT_RATE;
-           ombit_rate = MBIT_RATE;
-           DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
-                   MBIT_RATE, BURST_BUDGET, BURST_TIME_US);
-       }
-       if (last_put.tv_sec == 0) {
-           budget += BURST_BUDGET;
-           gettimeofday(&last_put, NULL);
-       } else {
-           while (budget < 0) {
-               gettimeofday(&now, NULL);
-               delta = tv_delta(&now, &last_put);
-               while (delta > BURST_TIME_US) {
-                   budget += BURST_BUDGET;
-                   last_put.tv_usec += BURST_TIME_US;
-                   if (last_put.tv_usec > 1000000) {
-                       last_put.tv_usec -= 1000000;
-                       last_put.tv_sec++;
-                   }
-                   delta -= BURST_TIME_US;
-               }
-               if (budget > 0)
-                   break;
-               delay.tv_sec = 0;
-               delay.tv_nsec = 1000 * (BURST_TIME_US - delta);
-               while (delay.tv_nsec > 0)
-                   if (nanosleep(&delay, &delay) == 0)
-                       break;
-           }
-       }
+    if ( budget < 0 )
+    {
+        if ( MBIT_RATE != ombit_rate )
+        {
+            BURST_TIME_US = RATE_TO_BTU / MBIT_RATE;
+            ombit_rate = MBIT_RATE;
+            DPRINTF("rate limit: %d mbit/s burst budget %d slot time %d\n",
+                    MBIT_RATE, BURST_BUDGET, BURST_TIME_US);
+        }
+        if ( last_put.tv_sec == 0 )
+        {
+            budget += BURST_BUDGET;
+            gettimeofday(&last_put, NULL);
+        }
+        else
+        {
+            while ( budget < 0 )
+            {
+                gettimeofday(&now, NULL);
+                delta = tv_delta(&now, &last_put);
+                while ( delta > BURST_TIME_US )
+                {
+                    budget += BURST_BUDGET;
+                    last_put.tv_usec += BURST_TIME_US;
+                    if ( last_put.tv_usec > 1000000 )
+                    {
+                        last_put.tv_usec -= 1000000;
+                        last_put.tv_sec++;
+                    }
+                    delta -= BURST_TIME_US;
+                }
+                if ( budget > 0 )
+                    break;
+                delay.tv_sec = 0;
+                delay.tv_nsec = 1000 * (BURST_TIME_US - delta);
+                while ( delay.tv_nsec > 0 )
+                    if ( nanosleep(&delay, &delay) == 0 )
+                        break;
+            }
+        }
     }
     return write(io_fd, buf, n);
 }
@@ -271,20 +275,21 @@
 
     if ( print )
         fprintf(stderr,
-               "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
-               "dirtied %dMb/s %" PRId32 " pages\n",
-               wall_delta, 
-               (int)((d0_cpu_delta*100)/wall_delta),
-               (int)((d1_cpu_delta*100)/wall_delta),
-               (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
-               (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
-               stats->dirty_count);
-
-    if (((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate) {
-       mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
-           + 50;
-       if (mbit_rate > MAX_MBIT_RATE)
-           mbit_rate = MAX_MBIT_RATE;
+                "delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
+                "dirtied %dMb/s %" PRId32 " pages\n",
+                wall_delta, 
+                (int)((d0_cpu_delta*100)/wall_delta),
+                (int)((d1_cpu_delta*100)/wall_delta),
+                (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
+                (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
+                stats->dirty_count);
+
+    if ( ((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate )
+    {
+        mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
+            + 50;
+        if (mbit_rate > MAX_MBIT_RATE)
+            mbit_rate = MAX_MBIT_RATE;
     }
 
     d0_cpu_last  = d0_cpu_now;
@@ -303,7 +308,7 @@
 
     start = llgettimeofday();
 
-    for (j = 0; j < runs; j++)
+    for ( j = 0; j < runs; j++ )
     {
         int i;
 
@@ -320,10 +325,10 @@
                                NULL, 0, &stats);
 
             fprintf(stderr, "now= %lld faults= %" PRId32 " dirty= %" PRId32
-                   " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", 
-                   ((now-start)+500)/1000, 
-                   stats.fault_count, stats.dirty_count,
-                   stats.dirty_net_count, stats.dirty_block_count);
+                    " dirty_net= %" PRId32 " dirty_block= %" PRId32"\n", 
+                    ((now-start)+500)/1000, 
+                    stats.fault_count, stats.dirty_count,
+                    stats.dirty_net_count, stats.dirty_block_count);
         }
     }
 
@@ -331,7 +336,7 @@
 }
 
 
-static int suspend_and_state(int xc_handle, int io_fd, int dom,              
+static int suspend_and_state(int xc_handle, int io_fd, int dom,       
                              xc_dominfo_t *info,
                              vcpu_guest_context_t *ctxt)
 {
@@ -340,51 +345,53 @@
 
     printf("suspend\n");
     fflush(stdout);
-    if (fgets(ans, sizeof(ans), stdin) == NULL) {
+    if ( fgets(ans, sizeof(ans), stdin) == NULL )
+    {
         ERR("failed reading suspend reply");
         return -1;
     }
-    if (strncmp(ans, "done\n", 5)) {
+    if ( strncmp(ans, "done\n", 5) )
+    {
         ERR("suspend reply incorrect: %s", ans);
         return -1;
     }
 
-retry:
+ retry:
 
     if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1)
     {
-       ERR("Could not get domain info");
-       return -1;
+        ERR("Could not get domain info");
+        return -1;
     }
 
     if ( xc_domain_get_vcpu_context(xc_handle, dom, 0 /* XXX */, 
-                                   ctxt) )
+                                    ctxt) )
     {
         ERR("Could not get vcpu context");
     }
 
     if ( info->shutdown && info->shutdown_reason == SHUTDOWN_suspend )
     {
-       return 0; // success
+        return 0; // success
     }
 
     if ( info->paused )
     {
-       // try unpausing domain, wait, and retest       
-       xc_domain_unpause( xc_handle, dom );
-
-       ERR("Domain was paused. Wait and re-test.");
-       usleep(10000);  // 10ms
-
-       goto retry;
+        // try unpausing domain, wait, and retest 
+        xc_domain_unpause( xc_handle, dom );
+
+        ERR("Domain was paused. Wait and re-test.");
+        usleep(10000);  // 10ms
+
+        goto retry;
     }
 
 
     if( ++i < 100 )
     {
-       ERR("Retry suspend domain.");
-       usleep(10000);  // 10ms 
-       goto retry;
+        ERR("Retry suspend domain.");
+        usleep(10000);  // 10ms 
+        goto retry;
     }
 
     ERR("Unable to suspend domain.");
@@ -454,26 +461,26 @@
 
 
     /* If no explicit control parameters given, use defaults */
-    if(!max_iters) 
+    if( !max_iters ) 
         max_iters = DEF_MAX_ITERS; 
-    if(!max_factor) 
+    if( !max_factor ) 
         max_factor = DEF_MAX_FACTOR; 
 
 
     DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live?"true":"false"); 
 
-    if (mlock(&ctxt, sizeof(ctxt))) {
+    if ( mlock(&ctxt, sizeof(ctxt)) ) 
+    {
         ERR("Unable to mlock ctxt");
         return 1;
     }
     
-    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1)
+    if ( xc_domain_getinfo(xc_handle, dom, 1, &info) != 1 )
     {
         ERR("Could not get domain info");
         goto out;
     }
-    if ( xc_domain_get_vcpu_context( xc_handle, dom, /* FIXME */ 0, 
-                                &ctxt) )
+    if ( xc_domain_get_vcpu_context(xc_handle, dom, /* FIXME */ 0, &ctxt) )
     {
         ERR("Could not get vcpu context");
         goto out;
@@ -481,7 +488,8 @@
     shared_info_frame = info.shared_info_frame;
 
     /* A cheesy test to see whether the domain contains valid state. */
-    if ( ctxt.ctrlreg[3] == 0 ){
+    if ( ctxt.ctrlreg[3] == 0 )
+    {
         ERR("Domain is not in a valid Linux guest OS state");
         goto out;
     }
@@ -496,18 +504,17 @@
     }
 
     /* Map the shared info frame */
-    live_shinfo = xc_map_foreign_range(xc_handle, dom,
-                                        PAGE_SIZE, PROT_READ,
-                                        shared_info_frame);
-
-    if (!live_shinfo){
+    live_shinfo = xc_map_foreign_range(
+        xc_handle, dom, PAGE_SIZE, PROT_READ, shared_info_frame);
+    if ( !live_shinfo )
+    {
         ERR("Couldn't map live_shinfo");
         goto out;
     }
 
-    live_pfn_to_mfn_frame_list_list = xc_map_foreign_range(xc_handle, dom,
-                                        PAGE_SIZE, PROT_READ,
-                                        
live_shinfo->arch.pfn_to_mfn_frame_list_list);
+    live_pfn_to_mfn_frame_list_list = xc_map_foreign_range(
+        xc_handle, dom,
+        PAGE_SIZE, PROT_READ, live_shinfo->arch.pfn_to_mfn_frame_list_list);
 
     if (!live_pfn_to_mfn_frame_list_list){
         ERR("Couldn't map pfn_to_mfn_frame_list_list");
@@ -515,12 +522,13 @@
     }
 
     live_pfn_to_mfn_frame_list = 
-       xc_map_foreign_batch(xc_handle, dom, 
-                            PROT_READ,
-                            live_pfn_to_mfn_frame_list_list,
-                            (nr_pfns+(1024*1024)-1)/(1024*1024) );
-
-    if (!live_pfn_to_mfn_frame_list){
+        xc_map_foreign_batch(xc_handle, dom, 
+                             PROT_READ,
+                             live_pfn_to_mfn_frame_list_list,
+                             (nr_pfns+(1024*1024)-1)/(1024*1024) );
+
+    if ( !live_pfn_to_mfn_frame_list)
+    {
         ERR("Couldn't map pfn_to_mfn_frame_list");
         goto out;
     }
@@ -535,7 +543,8 @@
                                                  PROT_READ,
                                                  live_pfn_to_mfn_frame_list,
                                                  (nr_pfns+1023)/1024 );  
-    if( !live_pfn_to_mfn_table ){
+    if ( !live_pfn_to_mfn_table )
+    {
         ERR("Couldn't map pfn_to_mfn table");
         goto out;
     }
@@ -544,15 +553,17 @@
     mfn_to_pfn_table_start_mfn = xc_get_m2p_start_mfn( xc_handle );
 
     live_mfn_to_pfn_table = 
-       xc_map_foreign_range(xc_handle, DOMID_XEN, 
-                             PAGE_SIZE*1024, PROT_READ, 
-                             mfn_to_pfn_table_start_mfn );
+        xc_map_foreign_range(xc_handle, DOMID_XEN, 
+                             PAGE_SIZE*1024, PROT_READ, 
+                             mfn_to_pfn_table_start_mfn );
 
     /* Canonicalise the pfn-to-mfn table frame-number list. */
     memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
 
-    for ( i = 0; i < nr_pfns; i += 1024 ){
-        if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
+    for ( i = 0; i < nr_pfns; i += 1024 )
+    {
+        if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
+        {
             ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
             goto out;
         }
@@ -561,40 +572,44 @@
 
     /* Domain is still running at this point */
 
-    if( live )
+    if ( live )
     {
         if ( xc_shadow_control( xc_handle, dom, 
                                 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
-                                NULL, 0, NULL ) < 0 ) {
+                                NULL, 0, NULL ) < 0 )
+        {
             ERR("Couldn't enable shadow mode");
             goto out;
         }
 
         last_iter = 0;
-    } else{
-       /* This is a non-live suspend. Issue the call back to get the
-        domain suspended */
+    } 
+    else
+    {
+        /* This is a non-live suspend. Issue the call back to get the
+           domain suspended */
 
         last_iter = 1;
 
-       if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
-       {
-           ERR("Domain appears not to have suspended");
-           goto out;
-       }
+        if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
+        {
+            ERR("Domain appears not to have suspended");
+            goto out;
+        }
 
     }
     sent_last_iter = 1<<20; /* 4GB of pages */
 
     /* calculate the power of 2 order of nr_pfns, e.g.
        15->4 16->4 17->5 */
-    for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
+    for ( i = nr_pfns-1, order_nr = 0; i ; i >>= 1, order_nr++ )
+        continue;
 
     /* Setup to_send bitmap */
     {
-       /* size these for a maximal 4GB domain, to make interaction
-          with balloon driver easier. It's only user space memory,
-          ater all... (3x 128KB) */
+        /* size these for a maximal 4GB domain, to make interaction
+           with balloon driver easier. It's only user space memory,
+           ater all... (3x 128KB) */
 
         int sz = ( 1<<20 ) / 8;
  
@@ -602,21 +617,24 @@
         to_fix  = calloc( 1, sz );
         to_skip = malloc( sz );
 
-        if (!to_send || !to_fix || !to_skip){
+        if ( !to_send || !to_fix || !to_skip )
+        {
             ERR("Couldn't allocate to_send array");
             goto out;
         }
 
-        memset( to_send, 0xff, sz );
-
-        if ( mlock( to_send, sz ) ){
+        memset(to_send, 0xff, sz);
+
+        if ( mlock(to_send, sz) )
+        {
             ERR("Unable to mlock to_send");
             return 1;
         }
 
         /* (to fix is local only) */
 
-        if ( mlock( to_skip, sz ) ){
+        if ( mlock(to_skip, sz) )
+        {
             ERR("Unable to mlock to_skip");
             return 1;
         }
@@ -629,12 +647,14 @@
     pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
     pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
 
-    if ( (pfn_type == NULL) || (pfn_batch == NULL) ){
+    if ( (pfn_type == NULL) || (pfn_batch == NULL) )
+    {
         errno = ENOMEM;
         goto out;
     }
 
-    if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
+    if ( mlock(pfn_type, BATCH_SIZE * sizeof(unsigned long)) )
+    {
         ERR("Unable to mlock");
         goto out;
     }
@@ -645,31 +665,34 @@
      */
 #if DEBUG
     {
-       int err=0;
-       for ( i = 0; i < nr_pfns; i++ )
-       {
-           mfn = live_pfn_to_mfn_table[i];
-           
-           if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
-           {
-               fprintf(stderr, "i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
-                       i,mfn,live_mfn_to_pfn_table[mfn]);
-               err++;
-           }
-       }
-       fprintf(stderr, "Had %d unexplained entries in p2m table\n",err);
+        int err=0;
+        for ( i = 0; i < nr_pfns; i++ )
+        {
+            mfn = live_pfn_to_mfn_table[i];
+     
+            if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
+            {
+                fprintf(stderr, "i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
+                        i,mfn,live_mfn_to_pfn_table[mfn]);
+                err++;
+            }
+        }
+        fprintf(stderr, "Had %d unexplained entries in p2m table\n",err);
     }
 #endif
 
 
     /* Start writing out the saved-domain record. */
 
-    if (write(io_fd, &nr_pfns, sizeof(unsigned long)) !=
-       sizeof(unsigned long)) {
-       ERR("write: nr_pfns");
-       goto out;
-    }
-    if (write(io_fd, pfn_to_mfn_frame_list, PAGE_SIZE) != PAGE_SIZE) {
+    if ( write(io_fd, &nr_pfns, sizeof(unsigned long)) !=
+         sizeof(unsigned long) )
+    {
+        ERR("write: nr_pfns");
+        goto out;
+    }
+
+    if ( write(io_fd, pfn_to_mfn_frame_list, PAGE_SIZE) != PAGE_SIZE )
+    {
         ERR("write: pfn_to_mfn_frame_list");
         goto out;
     }
@@ -678,7 +701,8 @@
 
     /* Now write out each data page, canonicalising page tables as we go... */
     
-    while(1){
+    for ( ; ; )
+    {
         unsigned int prev_pc, sent_this_iter, N, batch;
 
         iter++;
@@ -689,10 +713,12 @@
 
         DPRINTF("Saving memory pages: iter %d   0%%", iter);
 
-        while( N < nr_pfns ){
+        while ( N < nr_pfns )
+        {
             unsigned int this_pc = (N * 100) / nr_pfns;
 
-            if ( (this_pc - prev_pc) >= 5 ){
+            if ( (this_pc - prev_pc) >= 5 )
+            {
                 DPRINTF("\b\b\b\b%3d%%", this_pc);
                 prev_pc = this_pc;
             }
@@ -701,10 +727,10 @@
                but this is fast enough for the moment. */
 
             if ( !last_iter && 
-                xc_shadow_control(xc_handle, dom, 
+                 xc_shadow_control(xc_handle, dom, 
                                    DOM0_SHADOW_CONTROL_OP_PEEK,
                                    to_skip, nr_pfns, NULL) != nr_pfns )
-           {
+            {
                 ERR("Error peeking shadow bitmap");
                 goto out;
             }
@@ -748,7 +774,7 @@
                 pfn_type[batch] = live_pfn_to_mfn_table[n];
 
                 if( ! is_mapped(pfn_type[batch]) )
-               {
+                {
                     /* not currently in pusedo-physical map -- set bit
                        in to_fix that we must send this page in last_iter
                        unless its sent sooner anyhow */
@@ -756,7 +782,7 @@
                     set_bit( n, to_fix );
                     if( iter>1 )
                         DPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n",
-                               iter,n,pfn_type[batch]);
+                                iter,n,pfn_type[batch]);
                     continue;
                 }
 
@@ -790,8 +816,10 @@
                 goto out;
             }
      
-            for ( j = 0; j < batch; j++ ){
-                if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
+            for ( j = 0; j < batch; j++ )
+            {
+                if ( (pfn_type[j] & LTAB_MASK) == XTAB )
+                {
                     DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
                     continue;
                 }
@@ -809,21 +837,25 @@
                 pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
             }
 
-            if (write(io_fd, &batch, sizeof(int)) != sizeof(int)) {
+            if ( write(io_fd, &batch, sizeof(int)) != sizeof(int) )
+            {
                 ERR("Error when writing to state file (2)");
                 goto out;
             }
 
-            if (write(io_fd, pfn_type, sizeof(unsigned long)*j) !=
-               sizeof(unsigned long)*j) {
+            if ( write(io_fd, pfn_type, sizeof(unsigned long)*j) !=
+                 (sizeof(unsigned long) * j) )
+            {
                 ERR("Error when writing to state file (3)");
                 goto out;
             }
      
             /* entering this loop, pfn_type is now in pfns (Not mfns) */
-            for( j = 0; j < batch; j++ ){
+            for ( j = 0; j < batch; j++ )
+            {
                 /* write out pages in batch */
-                if( (pfn_type[j] & LTAB_MASK) == XTAB){
+                if ( (pfn_type[j] & LTAB_MASK) == XTAB )
+                {
                     DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
                     continue;
                 }
@@ -836,7 +868,8 @@
                           k < (((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ? 
                                (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) :
                                1024); 
-                          k++ ){
+                          k++ )
+                    {
                         unsigned long pfn;
 
                         if ( !(page[k] & _PAGE_PRESENT) )
@@ -849,13 +882,13 @@
                         {
                             /* I don't think this should ever happen */
                             fprintf(stderr, "FNI %d : [%08lx,%d] pte=%08lx, "
-                                   "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
-                                   j, pfn_type[j], k,
-                                   page[k], mfn, live_mfn_to_pfn_table[mfn],
-                                   (live_mfn_to_pfn_table[mfn]<nr_pfns)? 
-                                   live_pfn_to_mfn_table[
-                                       live_mfn_to_pfn_table[mfn]] : 
-                                   0xdeadbeef);
+                                    "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
+                                    j, pfn_type[j], k,
+                                    page[k], mfn, live_mfn_to_pfn_table[mfn],
+                                    (live_mfn_to_pfn_table[mfn]<nr_pfns)? 
+                                    live_pfn_to_mfn_table[
+                                        live_mfn_to_pfn_table[mfn]] : 
+                                    0xdeadbeef);
 
                             pfn = 0; /* be suspicious */
                         }
@@ -865,12 +898,12 @@
    
 #if 0
                         fprintf(stderr,
-                               "L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
-                               "xpfn=%d\n",
-                               pfn_type[j]>>28,
-                               j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
+                                "L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
+                                "xpfn=%d\n",
+                                pfn_type[j]>>28,
+                                j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
 #endif     
-                       
+   
                     } /* end of page table rewrite for loop */
       
                     if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) {
@@ -880,8 +913,9 @@
       
                 }  /* end of it's a PT page */ else {  /* normal page */
 
-                    if (ratewrite(io_fd, region_base + (PAGE_SIZE*j), 
-                                 PAGE_SIZE) != PAGE_SIZE) {
+                    if ( ratewrite(io_fd, region_base + (PAGE_SIZE*j), 
+                                   PAGE_SIZE) != PAGE_SIZE )
+                    {
                         ERR("Error when writing to state file (5)");
                         goto out;
                     }
@@ -899,13 +933,13 @@
         total_sent += sent_this_iter;
 
         DPRINTF("\r %d: sent %d, skipped %d, ", 
-                       iter, sent_this_iter, skip_this_iter );
+                iter, sent_this_iter, skip_this_iter );
 
         if ( last_iter ) {
             print_stats( xc_handle, dom, sent_this_iter, &stats, 1);
 
             DPRINTF("Total pages sent= %d (%.2fx)\n", 
-                           total_sent, ((float)total_sent)/nr_pfns );
+                    total_sent, ((float)total_sent)/nr_pfns );
             DPRINTF("(of which %d were fixups)\n", needed_to_fix  );
         }       
 
@@ -930,7 +964,7 @@
         {
             if ( 
                 ( ( sent_this_iter > sent_last_iter ) &&
-                 (mbit_rate == MAX_MBIT_RATE ) ) ||
+                  (mbit_rate == MAX_MBIT_RATE ) ) ||
                 (iter >= max_iters) || 
                 (sent_this_iter+skip_this_iter < 50) || 
                 (total_sent > nr_pfns*max_factor) )
@@ -938,15 +972,15 @@
                 DPRINTF("Start last iteration\n");
                 last_iter = 1;
 
-               if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
-               {
-                   ERR("Domain appears not to have suspended");
-                   goto out;
-               }
-
-               DPRINTF("SUSPEND shinfo %08lx eip %08u esi %08u\n",
-                       info.shared_info_frame,
-                       ctxt.user_regs.eip, ctxt.user_regs.esi);
+                if ( suspend_and_state( xc_handle, io_fd, dom, &info, &ctxt) )
+                {
+                    ERR("Domain appears not to have suspended");
+                    goto out;
+                }
+
+                DPRINTF("SUSPEND shinfo %08lx eip %08u esi %08u\n",
+                        info.shared_info_frame,
+                        ctxt.user_regs.eip, ctxt.user_regs.esi);
             } 
 
             if ( xc_shadow_control( xc_handle, dom, 
@@ -972,86 +1006,92 @@
     rc = 0;
     
     /* Zero terminate */
-    if (write(io_fd, &rc, sizeof(int)) != sizeof(int)) {
+    if ( write(io_fd, &rc, sizeof(int)) != sizeof(int) )
+    {
         ERR("Error when writing to state file (6)");
         goto out;
     }
 
     /* Send through a list of all the PFNs that were not in map at the close */
     {
-       unsigned int i,j;
-       unsigned int pfntab[1024];
-
-       for ( i = 0, j = 0; i < nr_pfns; i++ )
-       {
-           if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
-               j++;
-       }
-
-       if (write(io_fd, &j, sizeof(unsigned int)) != sizeof(unsigned int)) {
-           ERR("Error when writing to state file (6a)");
-           goto out;
-       }       
-
-       for ( i = 0, j = 0; i < nr_pfns; )
-       {
-           if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
-           {
-               pfntab[j++] = i;
-           }
-           i++;
-           if ( j == 1024 || i == nr_pfns )
-           {
-               if (write(io_fd, &pfntab, sizeof(unsigned long)*j) !=
-                   sizeof(unsigned long)*j) {
-                   ERR("Error when writing to state file (6b)");
-                   goto out;
-               }       
-               j = 0;
-           }
-       }
+        unsigned int i,j;
+        unsigned int pfntab[1024];
+
+        for ( i = 0, j = 0; i < nr_pfns; i++ )
+            if ( !is_mapped(live_pfn_to_mfn_table[i]) )
+                j++;
+
+        if ( write(io_fd, &j, sizeof(unsigned int)) != sizeof(unsigned int) )
+        {
+            ERR("Error when writing to state file (6a)");
+            goto out;
+        } 
+
+        for ( i = 0, j = 0; i < nr_pfns; )
+        {
+            if ( !is_mapped(live_pfn_to_mfn_table[i]) )
+            {
+                pfntab[j++] = i;
+            }
+            i++;
+            if ( j == 1024 || i == nr_pfns )
+            {
+                if ( write(io_fd, &pfntab, sizeof(unsigned long)*j) !=
+                     (sizeof(unsigned long) * j) )
+                {
+                    ERR("Error when writing to state file (6b)");
+                    goto out;
+                } 
+                j = 0;
+            }
+        }
     }
 
     /* Canonicalise the suspend-record frame number. */
-    if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) ){
+    if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) )
+    {
         ERR("Suspend record is not in range of pseudophys map");
         goto out;
     }
 
     /* Canonicalise each GDT frame number. */
-    for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
-        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
+    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
+    {
+        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) 
+        {
             ERR("GDT frame is not in range of pseudophys map");
             goto out;
         }
     }
 
     /* Canonicalise the page table base pointer. */
-    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) ) {
+    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) )
+    {
         ERR("PT base is not in range of pseudophys map");
         goto out;
     }
     ctxt.ctrlreg[3] = live_mfn_to_pfn_table[ctxt.ctrlreg[3] >> PAGE_SHIFT] <<
         PAGE_SHIFT;
 
-    if (write(io_fd, &ctxt, sizeof(ctxt)) != sizeof(ctxt) ||
-       write(io_fd, live_shinfo, PAGE_SIZE) != PAGE_SIZE) {
+    if ( write(io_fd, &ctxt, sizeof(ctxt)) != sizeof(ctxt) ||
+         write(io_fd, live_shinfo, PAGE_SIZE) != PAGE_SIZE)
+    {
         ERR("Error when writing to state file (1)");
         goto out;
     }
 
  out:
 
-    if(live_shinfo)
+    if ( live_shinfo )
         munmap(live_shinfo, PAGE_SIZE);
 
-    if(live_pfn_to_mfn_frame_list) 
+    if ( live_pfn_to_mfn_frame_list ) 
         munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
 
-    if(live_pfn_to_mfn_table) 
+    if ( live_pfn_to_mfn_table ) 
         munmap(live_pfn_to_mfn_table, nr_pfns*4);
 
-    if(live_mfn_to_pfn_table) 
+    if ( live_mfn_to_pfn_table ) 
         munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024);
 
     free(pfn_type);
@@ -1063,3 +1103,13 @@
     DPRINTF("Save exit rc=%d\n",rc);
     return !!rc;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_load_aout9.c
--- a/tools/libxc/xc_load_aout9.c       Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_load_aout9.c       Thu Sep 22 17:42:01 2005
@@ -64,11 +64,11 @@
     dstart = round_pgup(start + ehdr.text);
     end = dstart + ehdr.data + ehdr.bss;
 
-    dsi->v_start       = KZERO;
-    dsi->v_kernstart   = start;
-    dsi->v_kernend     = end;
-    dsi->v_kernentry   = ehdr.entry;
-    dsi->v_end         = end;
+    dsi->v_start     = KZERO;
+    dsi->v_kernstart = start;
+    dsi->v_kernend   = end;
+    dsi->v_kernentry = ehdr.entry;
+    dsi->v_end       = end;
 
     /* XXX load symbols */
 
@@ -168,3 +168,12 @@
     return ehdr;
 }
 
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_load_bin.c
--- a/tools/libxc/xc_load_bin.c Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_load_bin.c Thu Sep 22 17:42:01 2005
@@ -109,8 +109,8 @@
     unsigned long *parray, struct domain_setup_info *dsi);
 
 int probe_bin(char *image,
-             unsigned long image_size,
-             struct load_funcs *load_funcs)
+              unsigned long image_size,
+              struct load_funcs *load_funcs)
 {
     if ( NULL == findtable(image, image_size) )
     {
@@ -297,3 +297,13 @@
 
     return 0;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_load_elf.c Thu Sep 22 17:42:01 2005
@@ -30,8 +30,8 @@
     struct domain_setup_info *dsi);
 
 int probe_elf(char *image,
-             unsigned long image_size,
-             struct load_funcs *load_funcs)
+              unsigned long image_size,
+              struct load_funcs *load_funcs)
 {
     Elf_Ehdr *ehdr = (Elf_Ehdr *)image;
 
@@ -116,7 +116,7 @@
             return -EINVAL;
         }
         if ( (strstr(guestinfo, "PAE=yes") != NULL) )
-           dsi->pae_kernel = 1;
+            dsi->pae_kernel = 1;
 
         break;
     }
@@ -313,3 +313,13 @@
 
     return 0;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_misc.c     Thu Sep 22 17:42:01 2005
@@ -133,5 +133,15 @@
 
 long xc_init_store(int xc_handle, int remote_port)
 {
-       return ioctl(xc_handle, IOCTL_PRIVCMD_INITDOMAIN_STORE, remote_port);
+    return ioctl(xc_handle, IOCTL_PRIVCMD_INITDOMAIN_STORE, remote_port);
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_private.c  Thu Sep 22 17:42:01 2005
@@ -15,7 +15,7 @@
     void *addr;
     addr = mmap(NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0);
     if ( addr == MAP_FAILED )
-       return NULL;
+        return NULL;
 
     ioctlx.num=num;
     ioctlx.dom=dom;
@@ -24,10 +24,10 @@
     if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) < 0 )
     {
         int saved_errno = errno;
-       perror("XXXXXXXX");
-       (void)munmap(addr, num*PAGE_SIZE);
+        perror("XXXXXXXX");
+        (void)munmap(addr, num*PAGE_SIZE);
         errno = saved_errno;
-       return NULL;
+        return NULL;
     }
     return addr;
 
@@ -36,15 +36,15 @@
 /*******************/
 
 void *xc_map_foreign_range(int xc_handle, u32 dom,
-                            int size, int prot,
-                            unsigned long mfn )
+                           int size, int prot,
+                           unsigned long mfn )
 {
     privcmd_mmap_t ioctlx; 
     privcmd_mmap_entry_t entry; 
     void *addr;
     addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
     if ( addr == MAP_FAILED )
-       return NULL;
+        return NULL;
 
     ioctlx.num=1;
     ioctlx.dom=dom;
@@ -55,9 +55,9 @@
     if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) < 0 )
     {
         int saved_errno = errno;
-       (void)munmap(addr, size);
+        (void)munmap(addr, size);
         errno = saved_errno;
-       return NULL;
+        return NULL;
     }
     return addr;
 }
@@ -66,7 +66,7 @@
 
 /* NB: arr must be mlock'ed */
 int xc_get_pfn_type_batch(int xc_handle, 
-                         u32 dom, int num, unsigned long *arr)
+                          u32 dom, int num, unsigned long *arr)
 {
     dom0_op_t op;
     op.cmd = DOM0_GETPAGEFRAMEINFO2;
@@ -116,8 +116,8 @@
 
     if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
     {
-       fprintf(stderr, "Dom_mmuext operation failed (rc=%ld errno=%d)-- need 
to"
-                    " rebuild the user-space tool set?\n",ret,errno);
+        fprintf(stderr, "Dom_mmuext operation failed (rc=%ld errno=%d)-- need 
to"
+                " rebuild the user-space tool set?\n",ret,errno);
     }
 
     safe_munlock(op, nr_ops*sizeof(*op));
@@ -172,7 +172,7 @@
 }
 
 int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, 
-                     unsigned long long ptr, unsigned long long val)
+                      unsigned long long ptr, unsigned long long val)
 {
     mmu->updates[mmu->idx].ptr = ptr;
     mmu->updates[mmu->idx].val = val;
@@ -229,7 +229,7 @@
 
     if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
     {
-       fprintf(stderr, "hypercall failed (rc=%ld errno=%d)-- need to"
+        fprintf(stderr, "hypercall failed (rc=%ld errno=%d)-- need to"
                 " rebuild the user-space tool set?\n",ret,errno);
     }
 
@@ -275,16 +275,16 @@
 
     if ( ioctl( xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN, &mfn ) < 0 )
     {
-       perror("xc_get_m2p_start_mfn:");
-       return 0;
+        perror("xc_get_m2p_start_mfn:");
+        return 0;
     }
     return mfn;
 }
 
 int xc_get_pfn_list(int xc_handle,
-                u32 domid, 
-                unsigned long *pfn_buf, 
-                unsigned long max_pfns)
+                    u32 domid, 
+                    unsigned long *pfn_buf, 
+                    unsigned long max_pfns)
 {
     dom0_op_t op;
     int ret;
@@ -306,16 +306,16 @@
 
 #if 0
 #ifdef DEBUG
-       DPRINTF(("Ret for xc_get_pfn_list is %d\n", ret));
-       if (ret >= 0) {
-               int i, j;
-               for (i = 0; i < op.u.getmemlist.num_pfns; i += 16) {
-                       fprintf(stderr, "0x%x: ", i);
-                       for (j = 0; j < 16; j++)
-                               fprintf(stderr, "0x%lx ", pfn_buf[i + j]);
-                       fprintf(stderr, "\n");
-               }
-       }
+    DPRINTF(("Ret for xc_get_pfn_list is %d\n", ret));
+    if (ret >= 0) {
+        int i, j;
+        for (i = 0; i < op.u.getmemlist.num_pfns; i += 16) {
+            fprintf(stderr, "0x%x: ", i);
+            for (j = 0; j < 16; j++)
+                fprintf(stderr, "0x%lx ", pfn_buf[i + j]);
+            fprintf(stderr, "\n");
+        }
+    }
 #endif
 #endif
 
@@ -324,10 +324,10 @@
 
 #ifdef __ia64__
 int xc_ia64_get_pfn_list(int xc_handle,
-                u32 domid, 
-                unsigned long *pfn_buf, 
-                unsigned int start_page,
-                unsigned int nr_pages)
+                         u32 domid, 
+                         unsigned long *pfn_buf, 
+                         unsigned int start_page,
+                         unsigned int nr_pages)
 {
     dom0_op_t op;
     int ret;
@@ -372,9 +372,9 @@
 }
 
 int xc_copy_to_domain_page(int xc_handle,
-                                   u32 domid,
-                                   unsigned long dst_pfn, 
-                                   void *src_page)
+                           u32 domid,
+                           unsigned long dst_pfn, 
+                           void *src_page)
 {
     void *vaddr = xc_map_foreign_range(
         xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn);
@@ -465,18 +465,28 @@
     unsigned long new_mfn;
 
     if ( xc_domain_memory_decrease_reservation( 
-       xc_handle, domid, 1, 0, &mfn) != 0 )
-    {
-       fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
-       return 0;
+        xc_handle, domid, 1, 0, &mfn) != 0 )
+    {
+        fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn);
+        return 0;
     }
 
     if ( xc_domain_memory_increase_reservation(
         xc_handle, domid, 1, 0, 32, &new_mfn) != 0 )
     {
-       fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
-       return 0;
+        fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn);
+        return 0;
     }
 
     return new_mfn;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_ptrace.c   Thu Sep 22 17:42:01 2005
@@ -1,25 +1,15 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include "xc_private.h"
+#include "xg_private.h"
 #include <time.h>
 
 #define X86_CR0_PE              0x00000001 /* Enable Protected Mode    (RW) */
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
-
-#define BSD_PAGE_MASK  (PAGE_SIZE-1)
-#define        PG_FRAME        (~((unsigned long)BSD_PAGE_MASK)
+#define BSD_PAGE_MASK (PAGE_SIZE-1)
 #define PDRSHIFT        22
-#define        PSL_T           0x00000100      /* trace enable bit */
-
+#define PSL_T  0x00000100 /* trace enable bit */
 #define VCPU            0               /* XXX */
-
-/*
- * long  
- * ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
- */
-
-
-int waitdomain(int domain, int *status, int options);
 
 char * ptrace_names[] = {
     "PTRACE_TRACEME",
@@ -69,67 +59,64 @@
     int  xss;    /* 64 */
 };
 
-#define FETCH_REGS(cpu) \
-    if (!regs_valid[cpu]) \
-    {                \
-       int retval = xc_domain_get_vcpu_context(xc_handle, domid, cpu, 
&ctxt[cpu]); \
-       if (retval) \
-           goto error_out; \
-       cr3[cpu] = ctxt[cpu].ctrlreg[3]; /* physical address */ \
-       regs_valid[cpu] = 1; \
-    } \
+#define FETCH_REGS(cpu)                                         \
+    if (!regs_valid[cpu])                                       \
+    {                                                           \
+        int retval = xc_domain_get_vcpu_context(                \
+            xc_handle, domid, cpu, &ctxt[cpu]);                 \
+        if (retval)                                             \
+            goto error_out;                                     \
+        cr3[cpu] = ctxt[cpu].ctrlreg[3]; /* physical address */ \
+        regs_valid[cpu] = 1;                                    \
+    }
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define SET_PT_REGS(pt, xc) \
-{ \
-    pt.ebx = xc.ebx; \
-    pt.ecx = xc.ecx; \
-    pt.edx = xc.edx; \
-    pt.esi = xc.esi; \
-    pt.edi = xc.edi; \
-    pt.ebp = xc.ebp; \
-    pt.eax = xc.eax; \
-    pt.eip = xc.eip; \
-    pt.xcs = xc.cs; \
-    pt.eflags = xc.eflags; \
-    pt.esp = xc.esp; \
-    pt.xss = xc.ss; \
-    pt.xes = xc.es; \
-    pt.xds = xc.ds; \
-    pt.xfs = xc.fs; \
-    pt.xgs = xc.gs; \
-}
-
-#define SET_XC_REGS(pt, xc) \
-{ \
-    xc.ebx = pt->ebx; \
-    xc.ecx = pt->ecx; \
-    xc.edx = pt->edx; \
-    xc.esi = pt->esi; \
-    xc.edi = pt->edi; \
-    xc.ebp = pt->ebp; \
-    xc.eax = pt->eax; \
-    xc.eip = pt->eip; \
-    xc.cs = pt->xcs; \
-    xc.eflags = pt->eflags; \
-    xc.esp = pt->esp; \
-    xc.ss = pt->xss; \
-    xc.es = pt->xes; \
-    xc.ds = pt->xds; \
-    xc.fs = pt->xfs; \
-    xc.gs = pt->xgs; \
-}
-
+#define SET_PT_REGS(pt, xc)                     \
+{                                               \
+    pt.ebx = xc.ebx;                            \
+    pt.ecx = xc.ecx;                            \
+    pt.edx = xc.edx;                            \
+    pt.esi = xc.esi;                            \
+    pt.edi = xc.edi;                            \
+    pt.ebp = xc.ebp;                            \
+    pt.eax = xc.eax;                            \
+    pt.eip = xc.eip;                            \
+    pt.xcs = xc.cs;                             \
+    pt.eflags = xc.eflags;                      \
+    pt.esp = xc.esp;                            \
+    pt.xss = xc.ss;                             \
+    pt.xes = xc.es;                             \
+    pt.xds = xc.ds;                             \
+    pt.xfs = xc.fs;                             \
+    pt.xgs = xc.gs;                             \
+}
+
+#define SET_XC_REGS(pt, xc)                     \
+{                                               \
+    xc.ebx = pt->ebx;                           \
+    xc.ecx = pt->ecx;                           \
+    xc.edx = pt->edx;                           \
+    xc.esi = pt->esi;                           \
+    xc.edi = pt->edi;                           \
+    xc.ebp = pt->ebp;                           \
+    xc.eax = pt->eax;                           \
+    xc.eip = pt->eip;                           \
+    xc.cs = pt->xcs;                            \
+    xc.eflags = pt->eflags;                     \
+    xc.esp = pt->esp;                           \
+    xc.ss = pt->xss;                            \
+    xc.es = pt->xes;                            \
+    xc.ds = pt->xds;                            \
+    xc.fs = pt->xfs;                            \
+    xc.gs = pt->xgs;                            \
+}
 
 #define vtopdi(va) ((va) >> PDRSHIFT)
 #define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
 
 /* XXX application state */
-
-
-static int                      xc_handle;
-static long                    nr_pages = 0;
-unsigned long                  *page_array = NULL;
+static long   nr_pages = 0;
+unsigned long   *page_array = NULL;
 static int                      regs_valid[MAX_VIRT_CPUS];
 static unsigned long            cr3[MAX_VIRT_CPUS];
 static vcpu_guest_context_t ctxt[MAX_VIRT_CPUS];
@@ -137,14 +124,60 @@
 static inline int paging_enabled(vcpu_guest_context_t *v)
 {
     unsigned long cr0 = v->ctrlreg[0];
-
     return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
 }
 
 /* --------------------- */
 
 static void *
-map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
+map_domain_va_pae(
+    int xc_handle,
+    unsigned long domid,
+    int cpu,
+    void *guest_va,
+    int perm)
+{
+    unsigned long l2p, l1p, p, va = (unsigned long)guest_va;
+    u64 *l3, *l2, *l1;
+    static void *v;
+
+    FETCH_REGS(cpu);
+
+    l3 = xc_map_foreign_range(
+        xc_handle, domid, PAGE_SIZE, PROT_READ, cr3[cpu] >> PAGE_SHIFT);
+    if ( l3 == NULL )
+        goto error_out;
+
+    l2p = l3[l3_table_offset_pae(va)] >> PAGE_SHIFT;
+    l2 = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ, l2p);
+    if ( l2 == NULL )
+        goto error_out;
+
+    l1p = l2[l2_table_offset_pae(va)] >> PAGE_SHIFT;
+    l1 = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, perm, l1p);
+    if ( l1 == NULL )
+        goto error_out;
+
+    p = l1[l1_table_offset_pae(va)] >> PAGE_SHIFT;
+    if ( v != NULL )
+        munmap(v, PAGE_SIZE);
+    v = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, perm, p);
+    if ( v == NULL )
+        goto error_out;
+
+    return (void *)((unsigned long)v | (va & (PAGE_SIZE - 1)));
+
+ error_out:
+    return NULL;
+}
+
+static void *
+map_domain_va(
+    int xc_handle,
+    unsigned long domid,
+    int cpu,
+    void *guest_va,
+    int perm)
 {
     unsigned long pde, page;
     unsigned long va = (unsigned long)guest_va;
@@ -155,69 +188,88 @@
     static unsigned long  pde_phys[MAX_VIRT_CPUS];
     static unsigned long *pde_virt[MAX_VIRT_CPUS];
     static unsigned long  page_phys[MAX_VIRT_CPUS];
-    static unsigned long *page_virt[MAX_VIRT_CPUS];
-    
+    static unsigned long *page_virt[MAX_VIRT_CPUS];    
     static int            prev_perm[MAX_VIRT_CPUS];
-
-    if (nr_pages != npgs) {
-       if (nr_pages > 0)
-           free(page_array);
-       nr_pages = npgs;
-       if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate memory\n");
-           goto error_out;
-       }
-
-       if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != 
nr_pages) {
-               printf("Could not get the page frame list\n");
-               goto error_out;
-       }
+    static enum { MODE_UNKNOWN, MODE_32, MODE_PAE } mode;
+
+    if ( mode == MODE_UNKNOWN )
+    {
+        xen_capabilities_info_t caps;
+        (void)xc_version(xc_handle, XENVER_capabilities, caps);
+        mode = MODE_32;
+        if ( strstr(caps, "_x86_32p") )
+            mode = MODE_PAE;
+    }
+
+    if ( mode == MODE_PAE )
+        return map_domain_va_pae(xc_handle, domid, cpu, guest_va, perm);
+
+    if ( nr_pages != npgs )
+    {
+        if ( nr_pages > 0 )
+            free(page_array);
+        nr_pages = npgs;
+        if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+        {
+            printf("Could not allocate memory\n");
+            goto error_out;
+        }
+        if ( xc_get_pfn_list(xc_handle, domid,
+                             page_array, nr_pages) != nr_pages )
+        {
+            printf("Could not get the page frame list\n");
+            goto error_out;
+        }
     }
 
     FETCH_REGS(cpu);
 
-    if (cr3[cpu] != cr3_phys[cpu]) 
-    {
-       cr3_phys[cpu] = cr3[cpu];
-       if (cr3_virt[cpu])
-           munmap(cr3_virt[cpu], PAGE_SIZE);
-       if ((cr3_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                            PROT_READ,
-                                            cr3_phys[cpu] >> PAGE_SHIFT)) == 
NULL)
-           goto error_out;
+    if ( cr3[cpu] != cr3_phys[cpu] )
+    {
+        cr3_phys[cpu] = cr3[cpu];
+        if ( cr3_virt[cpu] )
+            munmap(cr3_virt[cpu], PAGE_SIZE);
+        cr3_virt[cpu] = xc_map_foreign_range(
+            xc_handle, domid, PAGE_SIZE, PROT_READ,
+            cr3_phys[cpu] >> PAGE_SHIFT);
+        if ( cr3_virt[cpu] == NULL )
+            goto error_out;
+    }
+    if ( (pde = cr3_virt[cpu][vtopdi(va)]) == 0 )
+        goto error_out;
+    if ( (ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]) )
+        pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
+    if ( pde != pde_phys[cpu] )
+    {
+        pde_phys[cpu] = pde;
+        if ( pde_virt[cpu] )
+            munmap(pde_virt[cpu], PAGE_SIZE);
+        pde_virt[cpu] = xc_map_foreign_range(
+            xc_handle, domid, PAGE_SIZE, PROT_READ,
+            pde_phys[cpu] >> PAGE_SHIFT);
+        if ( pde_virt[cpu] == NULL )
+            goto error_out;
+    }
+    if ( (page = pde_virt[cpu][vtopti(va)]) == 0 )
+        goto error_out;
+    if ( (ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]) )
+        page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
+    if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) )
+    {
+        page_phys[cpu] = page;
+        if ( page_virt[cpu] )
+            munmap(page_virt[cpu], PAGE_SIZE);
+        page_virt[cpu] = xc_map_foreign_range(
+            xc_handle, domid, PAGE_SIZE, perm,
+            page_phys[cpu] >> PAGE_SHIFT);
+        if ( page_virt[cpu] == NULL )
+        {
+            page_phys[cpu] = 0;
+            goto error_out;
+        }
+        prev_perm[cpu] = perm;
     } 
-    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
-       goto error_out;
-    if ((ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]))
-        pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
-    if (pde != pde_phys[cpu]) 
-    {
-       pde_phys[cpu] = pde;
-       if (pde_virt[cpu])
-           munmap(pde_virt[cpu], PAGE_SIZE);
-       if ((pde_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                            PROT_READ,
-                                            pde_phys[cpu] >> PAGE_SHIFT)) == 
NULL)
-           goto error_out;
-    }
-    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
-       goto error_out;
-    if (ctxt[cpu].flags & VGCF_VMX_GUEST && paging_enabled(&ctxt[cpu]))
-        page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
-    if (page != page_phys[cpu] || perm != prev_perm[cpu]) 
-    {
-       page_phys[cpu] = page;
-       if (page_virt[cpu])
-           munmap(page_virt[cpu], PAGE_SIZE);
-       if ((page_virt[cpu] = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
-                                             perm,
-                                             page_phys[cpu] >> PAGE_SHIFT)) == 
NULL) {
-           printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
-           page_phys[cpu] = 0;
-           goto error_out;
-       }
-       prev_perm[cpu] = perm;
-    }  
+
     return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
 
  error_out:
@@ -225,7 +277,11 @@
 }
 
 int 
-xc_waitdomain(int domain, int *status, int options)
+xc_waitdomain(
+    int xc_handle,
+    int domain,
+    int *status,
+    int options)
 {
     dom0_op_t op;
     int retval;
@@ -233,38 +289,39 @@
     ts.tv_sec = 0;
     ts.tv_nsec = 10*1000*1000;
 
-    if (!xc_handle)
-       if ((xc_handle = xc_interface_open()) < 0) 
-       {
-           printf("xc_interface_open failed\n");
-           return -1;
-       }
     op.cmd = DOM0_GETDOMAININFO;
     op.u.getdomaininfo.domain = domain;
+
  retry:
-
     retval = do_dom0_op(xc_handle, &op);
-    if (retval || op.u.getdomaininfo.domain != domain) {
-       printf("getdomaininfo failed\n");
-       goto done;
+    if ( retval || (op.u.getdomaininfo.domain != domain) )
+    {
+        printf("getdomaininfo failed\n");
+        goto done;
     }
     *status = op.u.getdomaininfo.flags;
     
-    if (options & WNOHANG)
-       goto done;
-       
-
-    if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED)) {       
-       nanosleep(&ts,NULL);
-       goto retry;
-    }
+    if ( options & WNOHANG )
+        goto done;
+
+    if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) )
+    {
+        nanosleep(&ts,NULL);
+        goto retry;
+    }
+
  done:
     return retval;
 
 }
 
 long
-xc_ptrace(enum __ptrace_request request, u32 domid, long eaddr, long edata)
+xc_ptrace(
+    int xc_handle,
+    enum __ptrace_request request,
+    u32 domid,
+    long eaddr,
+    long edata)
 {
     dom0_op_t       op;
     int             status = 0;
@@ -277,108 +334,124 @@
 
     op.interface_version = DOM0_INTERFACE_VERSION;
     
-    if (!xc_handle)
-       if ((xc_handle = xc_interface_open()) < 0)
-           return -1;
-#if 0
-    printf("%20s %d, %p, %p \n", ptrace_names[request], domid, addr, data);
-#endif
-    switch (request) { 
+    switch ( request )
+    { 
     case PTRACE_PEEKTEXT:
     case PTRACE_PEEKDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domid, cpu, addr, 
PROT_READ)) == NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-
-       retval = *guest_va;
-       break;
+        guest_va = (unsigned long *)map_domain_va(
+            xc_handle, domid, cpu, addr, PROT_READ);
+        if ( guest_va == NULL )
+        {
+            status = EFAULT;
+            goto error_out;
+        }
+        retval = *guest_va;
+        break;
+
     case PTRACE_POKETEXT:
     case PTRACE_POKEDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domid, cpu, addr, 
PROT_READ|PROT_WRITE)) == NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-
-       *guest_va = (unsigned long)data;
-       break;
+        guest_va = (unsigned long *)map_domain_va(
+            xc_handle, domid, cpu, addr, PROT_READ|PROT_WRITE);
+        if ( guest_va == NULL )
+        {
+            status = EFAULT;
+            goto error_out;
+        }
+        *guest_va = (unsigned long)data;
+        break;
+
     case PTRACE_GETREGS:
     case PTRACE_GETFPREGS:
     case PTRACE_GETFPXREGS:
-       FETCH_REGS(cpu);
-
-       if (request == PTRACE_GETREGS) {
-               SET_PT_REGS(pt, ctxt[cpu].user_regs); 
-               memcpy(data, &pt, sizeof(struct gdb_regs));
-       } else if (request == PTRACE_GETFPREGS)
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       else /*if (request == PTRACE_GETFPXREGS)*/
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       break;
+        FETCH_REGS(cpu);
+        if ( request == PTRACE_GETREGS )
+        {
+            SET_PT_REGS(pt, ctxt[cpu].user_regs); 
+            memcpy(data, &pt, sizeof(struct gdb_regs));
+        }
+        else if (request == PTRACE_GETFPREGS)
+        {
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        }
+        else /*if (request == PTRACE_GETFPXREGS)*/
+        {
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        }
+        break;
+
     case PTRACE_SETREGS:
-       op.cmd = DOM0_SETDOMAININFO;
-       SET_XC_REGS(((struct gdb_regs *)data), ctxt[VCPU].user_regs);
-       op.u.setdomaininfo.domain = domid;
-       /* XXX need to understand multiple vcpus */
-       op.u.setdomaininfo.vcpu = cpu;
-       op.u.setdomaininfo.ctxt = &ctxt[cpu];
-       retval = do_dom0_op(xc_handle, &op);
-       if (retval)
-           goto error_out;
-
-       break;
+        op.cmd = DOM0_SETDOMAININFO;
+        SET_XC_REGS(((struct gdb_regs *)data), ctxt[VCPU].user_regs);
+        op.u.setdomaininfo.domain = domid;
+        /* XXX need to understand multiple vcpus */
+        op.u.setdomaininfo.vcpu = cpu;
+        op.u.setdomaininfo.ctxt = &ctxt[cpu];
+        retval = do_dom0_op(xc_handle, &op);
+        if (retval)
+            goto error_out;
+        break;
+
     case PTRACE_ATTACH:
-       op.cmd = DOM0_GETDOMAININFO;
-       op.u.getdomaininfo.domain = domid;
-       retval = do_dom0_op(xc_handle, &op);
-       if (retval || op.u.getdomaininfo.domain != domid) {
-           perror("dom0 op failed");
-           goto error_out;
-       }
-       if (op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) {
-           printf("domain currently paused\n");
-           goto error_out;
-       }
-       printf("domain not currently paused\n");
-       op.cmd = DOM0_PAUSEDOMAIN;
-       op.u.pausedomain.domain = domid;
-       retval = do_dom0_op(xc_handle, &op);
-       break;
+        op.cmd = DOM0_GETDOMAININFO;
+        op.u.getdomaininfo.domain = domid;
+        retval = do_dom0_op(xc_handle, &op);
+        if ( retval || (op.u.getdomaininfo.domain != domid) )
+        {
+            perror("dom0 op failed");
+            goto error_out;
+        }
+        if ( op.u.getdomaininfo.flags & DOMFLAGS_PAUSED )
+        {
+            printf("domain currently paused\n");
+            goto error_out;
+        }
+        printf("domain not currently paused\n");
+        op.cmd = DOM0_PAUSEDOMAIN;
+        op.u.pausedomain.domain = domid;
+        retval = do_dom0_op(xc_handle, &op);
+        break;
+
     case PTRACE_SINGLESTEP:
-       ctxt[VCPU].user_regs.eflags |= PSL_T;
-       op.cmd = DOM0_SETDOMAININFO;
-       op.u.setdomaininfo.domain = domid;
-       op.u.setdomaininfo.vcpu = 0;
-       op.u.setdomaininfo.ctxt = &ctxt[cpu];
-       retval = do_dom0_op(xc_handle, &op);    
-       if (retval) {
-           perror("dom0 op failed");
-           goto error_out;
-       }
-       /* FALLTHROUGH */
+        ctxt[VCPU].user_regs.eflags |= PSL_T;
+        op.cmd = DOM0_SETDOMAININFO;
+        op.u.setdomaininfo.domain = domid;
+        op.u.setdomaininfo.vcpu = 0;
+        op.u.setdomaininfo.ctxt = &ctxt[cpu];
+        retval = do_dom0_op(xc_handle, &op); 
+        if ( retval )
+        {
+            perror("dom0 op failed");
+            goto error_out;
+        }
+        /* FALLTHROUGH */
+
     case PTRACE_CONT:
     case PTRACE_DETACH:
-       if (request != PTRACE_SINGLESTEP) {
-           FETCH_REGS(cpu);
-           /* Clear trace flag */
-           if (ctxt[cpu].user_regs.eflags & PSL_T) {
-               ctxt[cpu].user_regs.eflags &= ~PSL_T;
-               op.cmd = DOM0_SETDOMAININFO;
-               op.u.setdomaininfo.domain = domid;
-               op.u.setdomaininfo.vcpu = cpu;
-               op.u.setdomaininfo.ctxt = &ctxt[cpu];
-               retval = do_dom0_op(xc_handle, &op);    
-               if (retval) {
-                   perror("dom0 op failed");
-                   goto error_out;
-               }
-           }
-       }
-       regs_valid[cpu] = 0;
-       op.cmd = DOM0_UNPAUSEDOMAIN;
-       op.u.unpausedomain.domain = domid > 0 ? domid : -domid;
-       retval = do_dom0_op(xc_handle, &op);
-       break;
+        if ( request != PTRACE_SINGLESTEP )
+        {
+            FETCH_REGS(cpu);
+            /* Clear trace flag */
+            if ( ctxt[cpu].user_regs.eflags & PSL_T )
+            {
+                ctxt[cpu].user_regs.eflags &= ~PSL_T;
+                op.cmd = DOM0_SETDOMAININFO;
+                op.u.setdomaininfo.domain = domid;
+                op.u.setdomaininfo.vcpu = cpu;
+                op.u.setdomaininfo.ctxt = &ctxt[cpu];
+                retval = do_dom0_op(xc_handle, &op); 
+                if ( retval )
+                {
+                    perror("dom0 op failed");
+                    goto error_out;
+                }
+            }
+        }
+        regs_valid[cpu] = 0;
+        op.cmd = DOM0_UNPAUSEDOMAIN;
+        op.u.unpausedomain.domain = domid > 0 ? domid : -domid;
+        retval = do_dom0_op(xc_handle, &op);
+        break;
+
     case PTRACE_SETFPREGS:
     case PTRACE_SETFPXREGS:
     case PTRACE_PEEKUSER:
@@ -386,20 +459,33 @@
     case PTRACE_SYSCALL:
     case PTRACE_KILL:
 #ifdef DEBUG
-       printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
+        printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
 #endif
-       /* XXX not yet supported */
-       status = ENOSYS;
-       break;
+        /* XXX not yet supported */
+        status = ENOSYS;
+        break;
+
     case PTRACE_TRACEME:
-       printf("PTRACE_TRACEME is an invalid request under Xen\n");
-       status = EINVAL;
+        printf("PTRACE_TRACEME is an invalid request under Xen\n");
+        status = EINVAL;
     }
     
-    if (status) {
-       errno = status;
-       retval = -1;
-    }
+    if ( status )
+    {
+        errno = status;
+        retval = -1;
+    }
+
  error_out:
     return retval;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_ptrace_core.c      Thu Sep 22 17:42:01 2005
@@ -3,19 +3,14 @@
 #include "xc_private.h"
 #include <time.h>
 
-
-#define BSD_PAGE_MASK  (PAGE_SIZE-1)
-#define        PG_FRAME        (~((unsigned long)BSD_PAGE_MASK)
+#define BSD_PAGE_MASK (PAGE_SIZE-1)
 #define PDRSHIFT        22
-#define        PSL_T           0x00000100      /* trace enable bit */
-
 #define VCPU            0               /* XXX */
 
 /*
  * long  
  * ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
  */
-
 
 struct gdb_regs {
     long ebx; /* 0 */
@@ -38,44 +33,44 @@
 };
 
 #define printval(x) printf("%s = %lx\n", #x, (long)x);
-#define SET_PT_REGS(pt, xc) \
-{ \
-    pt.ebx = xc.ebx; \
-    pt.ecx = xc.ecx; \
-    pt.edx = xc.edx; \
-    pt.esi = xc.esi; \
-    pt.edi = xc.edi; \
-    pt.ebp = xc.ebp; \
-    pt.eax = xc.eax; \
-    pt.eip = xc.eip; \
-    pt.xcs = xc.cs; \
-    pt.eflags = xc.eflags; \
-    pt.esp = xc.esp; \
-    pt.xss = xc.ss; \
-    pt.xes = xc.es; \
-    pt.xds = xc.ds; \
-    pt.xfs = xc.fs; \
-    pt.xgs = xc.gs; \
-}
-
-#define SET_XC_REGS(pt, xc) \
-{ \
-    xc.ebx = pt->ebx; \
-    xc.ecx = pt->ecx; \
-    xc.edx = pt->edx; \
-    xc.esi = pt->esi; \
-    xc.edi = pt->edi; \
-    xc.ebp = pt->ebp; \
-    xc.eax = pt->eax; \
-    xc.eip = pt->eip; \
-    xc.cs = pt->xcs; \
-    xc.eflags = pt->eflags; \
-    xc.esp = pt->esp; \
-    xc.ss = pt->xss; \
-    xc.es = pt->xes; \
-    xc.ds = pt->xds; \
-    xc.fs = pt->xfs; \
-    xc.gs = pt->xgs; \
+#define SET_PT_REGS(pt, xc)                     \
+{                                               \
+    pt.ebx = xc.ebx;                            \
+    pt.ecx = xc.ecx;                            \
+    pt.edx = xc.edx;                            \
+    pt.esi = xc.esi;                            \
+    pt.edi = xc.edi;                            \
+    pt.ebp = xc.ebp;                            \
+    pt.eax = xc.eax;                            \
+    pt.eip = xc.eip;                            \
+    pt.xcs = xc.cs;                             \
+    pt.eflags = xc.eflags;                      \
+    pt.esp = xc.esp;                            \
+    pt.xss = xc.ss;                             \
+    pt.xes = xc.es;                             \
+    pt.xds = xc.ds;                             \
+    pt.xfs = xc.fs;                             \
+    pt.xgs = xc.gs;                             \
+}
+
+#define SET_XC_REGS(pt, xc)                     \
+{                                               \
+    xc.ebx = pt->ebx;                           \
+    xc.ecx = pt->ecx;                           \
+    xc.edx = pt->edx;                           \
+    xc.esi = pt->esi;                           \
+    xc.edi = pt->edi;                           \
+    xc.ebp = pt->ebp;                           \
+    xc.eax = pt->eax;                           \
+    xc.eip = pt->eip;                           \
+    xc.cs = pt->xcs;                            \
+    xc.eflags = pt->eflags;                     \
+    xc.esp = pt->esp;                           \
+    xc.ss = pt->xss;                            \
+    xc.es = pt->xes;                            \
+    xc.ds = pt->xds;                            \
+    xc.fs = pt->xfs;                            \
+    xc.gs = pt->xgs;                            \
 }
 
 
@@ -84,10 +79,9 @@
 
 /* XXX application state */
 
-
-static long                    nr_pages = 0;
-static unsigned long           *p2m_array = NULL;
-static unsigned long           *m2p_array = NULL;
+static long   nr_pages = 0;
+static unsigned long  *p2m_array = NULL;
+static unsigned long  *m2p_array = NULL;
 static unsigned long            pages_offset;
 static unsigned long            cr3[MAX_VIRT_CPUS];
 static vcpu_guest_context_t     ctxt[MAX_VIRT_CPUS];
@@ -117,54 +111,54 @@
 
     if (cr3[cpu] != cr3_phys[cpu]) 
     {
-       cr3_phys[cpu] = cr3[cpu];
-       if (cr3_virt[cpu])
-           munmap(cr3_virt[cpu], PAGE_SIZE);
-       v = mmap(
+        cr3_phys[cpu] = cr3[cpu];
+        if (cr3_virt[cpu])
+            munmap(cr3_virt[cpu], PAGE_SIZE);
+        v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
             map_mtop_offset(cr3_phys[cpu]));
         if (v == MAP_FAILED)
-       {
-           perror("mmap failed");
-           goto error_out;
-       }
+        {
+            perror("mmap failed");
+            goto error_out;
+        }
         cr3_virt[cpu] = v;
     } 
     if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
-       goto error_out;
+        goto error_out;
     if (ctxt[cpu].flags & VGCF_VMX_GUEST)
-       pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
+        pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
     if (pde != pde_phys[cpu]) 
     {
-       pde_phys[cpu] = pde;
-       if (pde_virt[cpu])
-           munmap(pde_virt[cpu], PAGE_SIZE);
-       v = mmap(
+        pde_phys[cpu] = pde;
+        if (pde_virt[cpu])
+            munmap(pde_virt[cpu], PAGE_SIZE);
+        v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
             map_mtop_offset(pde_phys[cpu]));
         if (v == MAP_FAILED)
-           goto error_out;
+            goto error_out;
         pde_virt[cpu] = v;
     }
     if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
-       goto error_out;
+        goto error_out;
     if (ctxt[cpu].flags & VGCF_VMX_GUEST)
-       page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
+        page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
     if (page != page_phys[cpu]) 
     {
-       page_phys[cpu] = page;
-       if (page_virt[cpu])
-           munmap(page_virt[cpu], PAGE_SIZE);
-       v = mmap(
+        page_phys[cpu] = page;
+        if (page_virt[cpu])
+            munmap(page_virt[cpu], PAGE_SIZE);
+        v = mmap(
             NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd,
             map_mtop_offset(page_phys[cpu]));
         if (v == MAP_FAILED) {
-           printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
-           page_phys[cpu] = 0;
-           goto error_out;
-       }
+            printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, 
vtopti(va));
+            page_phys[cpu] = 0;
+            goto error_out;
+        }
         page_virt[cpu] = v;
-    }  
+    } 
     return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK));
 
  error_out:
@@ -172,7 +166,11 @@
 }
 
 int 
-xc_waitdomain_core(int domfd, int *status, int options)
+xc_waitdomain_core(
+    int xc_handle,
+    int domfd,
+    int *status,
+    int options)
 {
     int retval = -1;
     int nr_vcpus;
@@ -181,37 +179,37 @@
 
     if (nr_pages == 0) {
 
-       if (read(domfd, &header, sizeof(header)) != sizeof(header))
-           return -1;
-
-       nr_pages = header.xch_nr_pages;
-       nr_vcpus = header.xch_nr_vcpus;
-       pages_offset = header.xch_pages_offset;
-
-       if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != 
-           sizeof(vcpu_guest_context_t)*nr_vcpus)
-           return -1;
-
-       for (i = 0; i < nr_vcpus; i++) {
-           cr3[i] = ctxt[i].ctrlreg[3];
-       }
-       if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate p2m_array\n");
-           goto error_out;
-       }
-       if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != 
-           sizeof(unsigned long)*nr_pages)
-           return -1;
-
-       if ((m2p_array = malloc((1<<20) * sizeof(unsigned long))) == NULL) {
-           printf("Could not allocate m2p array\n");
-           goto error_out;
-       }
-       bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
-
-       for (i = 0; i < nr_pages; i++) {
-           m2p_array[p2m_array[i]] = i;
-       }
+        if (read(domfd, &header, sizeof(header)) != sizeof(header))
+            return -1;
+
+        nr_pages = header.xch_nr_pages;
+        nr_vcpus = header.xch_nr_vcpus;
+        pages_offset = header.xch_pages_offset;
+
+        if (read(domfd, ctxt, sizeof(vcpu_guest_context_t)*nr_vcpus) != 
+            sizeof(vcpu_guest_context_t)*nr_vcpus)
+            return -1;
+
+        for (i = 0; i < nr_vcpus; i++) {
+            cr3[i] = ctxt[i].ctrlreg[3];
+        }
+        if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+            printf("Could not allocate p2m_array\n");
+            goto error_out;
+        }
+        if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != 
+            sizeof(unsigned long)*nr_pages)
+            return -1;
+
+        if ((m2p_array = malloc((1<<20) * sizeof(unsigned long))) == NULL) {
+            printf("Could not allocate m2p array\n");
+            goto error_out;
+        }
+        bzero(m2p_array, sizeof(unsigned long)* 1 << 20);
+
+        for (i = 0; i < nr_pages; i++) {
+            m2p_array[p2m_array[i]] = i;
+        }
 
     }
     retval = 0;
@@ -221,7 +219,12 @@
 }
 
 long
-xc_ptrace_core(enum __ptrace_request request, u32 domfd, long eaddr, long 
edata)
+xc_ptrace_core(
+    int xc_handle,
+    enum __ptrace_request request,
+    u32 domfd,
+    long eaddr,
+    long edata)
 {
     int             status = 0;
     struct gdb_regs pt;
@@ -234,38 +237,38 @@
 #if 0
     printf("%20s %d, %p, %p \n", ptrace_names[request], domid, addr, data);
 #endif
-    switch (request) { 
+    switch (request) { 
     case PTRACE_PEEKTEXT:
     case PTRACE_PEEKDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-
-       retval = *guest_va;
-       break;
+        if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
+            status = EFAULT;
+            goto error_out;
+        }
+
+        retval = *guest_va;
+        break;
     case PTRACE_POKETEXT:
     case PTRACE_POKEDATA:
-       if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
-           status = EFAULT;
-           goto error_out;
-       }
-       *guest_va = (unsigned long)data;
-       break;
+        if ((guest_va = (unsigned long *)map_domain_va(domfd, cpu, addr)) == 
NULL) {
+            status = EFAULT;
+            goto error_out;
+        }
+        *guest_va = (unsigned long)data;
+        break;
     case PTRACE_GETREGS:
     case PTRACE_GETFPREGS:
     case PTRACE_GETFPXREGS:
-       if (request == PTRACE_GETREGS) {
-               SET_PT_REGS(pt, ctxt[cpu].user_regs); 
-               memcpy(data, &pt, sizeof(struct gdb_regs));
-       } else if (request == PTRACE_GETFPREGS)
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       else /*if (request == PTRACE_GETFPXREGS)*/
-           memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
-       break;
+        if (request == PTRACE_GETREGS) {
+            SET_PT_REGS(pt, ctxt[cpu].user_regs); 
+            memcpy(data, &pt, sizeof(struct gdb_regs));
+        } else if (request == PTRACE_GETFPREGS)
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        else /*if (request == PTRACE_GETFPXREGS)*/
+            memcpy(data, &ctxt[cpu].fpu_ctxt, sizeof(ctxt[cpu].fpu_ctxt));
+        break;
     case PTRACE_ATTACH:
-       retval = 0;
-       break;
+        retval = 0;
+        break;
     case PTRACE_SETREGS:
     case PTRACE_SINGLESTEP:
     case PTRACE_CONT:
@@ -277,19 +280,29 @@
     case PTRACE_SYSCALL:
     case PTRACE_KILL:
 #ifdef DEBUG
-       printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
+        printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
 #endif
-       status = ENOSYS;
-       break;
+        status = ENOSYS;
+        break;
     case PTRACE_TRACEME:
-       printf("PTRACE_TRACEME is an invalid request under Xen\n");
-       status = EINVAL;
+        printf("PTRACE_TRACEME is an invalid request under Xen\n");
+        status = EINVAL;
     }
     
     if (status) {
-       errno = status;
-       retval = -1;
+        errno = status;
+        retval = -1;
     }
  error_out:
     return retval;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xc_vmx_build.c        Thu Sep 22 17:42:01 2005
@@ -107,11 +107,38 @@
     mem_mapp->nr_map = nr_map;
 }
 
+/*
+ * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
+ * vmxloader will use it to config ACPI MADT table
+ */
+#define VCPU_MAGIC 0x76637075 /* "vcpu" */
+static int 
+set_nr_vcpus(int xc_handle, u32 dom, unsigned long *pfn_list, 
+             struct domain_setup_info *dsi, unsigned long vcpus)
+{
+    char          *va_map;
+    unsigned long *va_vcpus;
+    
+    va_map = xc_map_foreign_range(
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        pfn_list[(0x9F000 - dsi->v_start) >> PAGE_SHIFT]);    
+    if ( va_map == NULL )
+        return -1;
+    
+    va_vcpus = (unsigned long *)(va_map + 0x800);
+    *va_vcpus++ = VCPU_MAGIC;
+    *va_vcpus++ = vcpus;
+
+    munmap(va_map, PAGE_SIZE);
+
+    return 0;
+}
+
 #ifdef __i386__
 static int zap_mmio_range(int xc_handle, u32 dom,
-                            l2_pgentry_32_t *vl2tab,
-                            unsigned long mmio_range_start,
-                            unsigned long mmio_range_size)
+                          l2_pgentry_32_t *vl2tab,
+                          unsigned long mmio_range_start,
+                          unsigned long mmio_range_size)
 {
     unsigned long mmio_addr;
     unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
@@ -123,12 +150,14 @@
         vl2e = vl2tab[l2_table_offset(mmio_addr)];
         if (vl2e == 0)
             continue;
-        vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
-       if (vl1tab == 0) {
-           PERROR("Failed zap MMIO range");
-           return -1;
-       }
+        vl1tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE,
+            PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+        if ( vl1tab == 0 )
+        {
+            PERROR("Failed zap MMIO range");
+            return -1;
+        }
         vl1tab[l1_table_offset(mmio_addr)] = 0;
         munmap(vl1tab, PAGE_SIZE);
     }
@@ -136,114 +165,118 @@
 }
 
 static int zap_mmio_ranges(int xc_handle, u32 dom,
-                            unsigned long l2tab,
-                            struct mem_map *mem_mapp)
+                           unsigned long l2tab,
+                           struct mem_map *mem_mapp)
 {
     int i;
     l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l2tab >> PAGE_SHIFT);
-    if (vl2tab == 0)
-       return -1;
-    for (i = 0; i < mem_mapp->nr_map; i++) {
-        if ((mem_mapp->map[i].type == E820_IO)
-          && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
-            if (zap_mmio_range(xc_handle, dom, vl2tab,
-                       mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
-               return -1;
-    }
+                                                   PROT_READ|PROT_WRITE,
+                                                   l2tab >> PAGE_SHIFT);
+    if ( vl2tab == 0 )
+        return -1;
+
+    for ( i = 0; i < mem_mapp->nr_map; i++ )
+    {
+        if ( (mem_mapp->map[i].type == E820_IO) &&
+             (mem_mapp->map[i].caching_attr == MEMMAP_UC) &&
+             (zap_mmio_range(xc_handle, dom, vl2tab,
+                             mem_mapp->map[i].addr,
+                             mem_mapp->map[i].size) == -1) )
+            return -1;
+    }
+
     munmap(vl2tab, PAGE_SIZE);
     return 0;
 }
 #else
 static int zap_mmio_range(int xc_handle, u32 dom,
-                           l3_pgentry_t *vl3tab,
-                           unsigned long mmio_range_start,
-                           unsigned long mmio_range_size)
-{
-   unsigned long mmio_addr;
-   unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
-   unsigned long vl2e = 0;
-   unsigned long vl3e;
-   l1_pgentry_t *vl1tab;
-   l2_pgentry_t *vl2tab;
+                          l3_pgentry_t *vl3tab,
+                          unsigned long mmio_range_start,
+                          unsigned long mmio_range_size)
+{
+    unsigned long mmio_addr;
+    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+    unsigned long vl2e = 0;
+    unsigned long vl3e;
+    l1_pgentry_t *vl1tab;
+    l2_pgentry_t *vl2tab;
  
-   mmio_addr = mmio_range_start & PAGE_MASK;
-   for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
-   {
-       vl3e = vl3tab[l3_table_offset(mmio_addr)];
-       if ( vl3e == 0 )
-           continue;
-
-       vl2tab = xc_map_foreign_range(
-           xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
-       if ( vl2tab == NULL )
-       {
-           PERROR("Failed zap MMIO range");
-           return -1;
-       }
-
-       vl2e = vl2tab[l2_table_offset(mmio_addr)];
-       if ( vl2e == 0 )
-       {
-           munmap(vl2tab, PAGE_SIZE);
-           continue;
-       }
-
-       vl1tab = xc_map_foreign_range(
-           xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
-       if ( vl1tab == NULL )
-       {
-           PERROR("Failed zap MMIO range");
-           munmap(vl2tab, PAGE_SIZE);
-           return -1;
-       }
-
-       vl1tab[l1_table_offset(mmio_addr)] = 0;
-       munmap(vl2tab, PAGE_SIZE);
-       munmap(vl1tab, PAGE_SIZE);
-   }
-   return 0;
+    mmio_addr = mmio_range_start & PAGE_MASK;
+    for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
+    {
+        vl3e = vl3tab[l3_table_offset(mmio_addr)];
+        if ( vl3e == 0 )
+            continue;
+
+        vl2tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
+        if ( vl2tab == NULL )
+        {
+            PERROR("Failed zap MMIO range");
+            return -1;
+        }
+
+        vl2e = vl2tab[l2_table_offset(mmio_addr)];
+        if ( vl2e == 0 )
+        {
+            munmap(vl2tab, PAGE_SIZE);
+            continue;
+        }
+
+        vl1tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
+        if ( vl1tab == NULL )
+        {
+            PERROR("Failed zap MMIO range");
+            munmap(vl2tab, PAGE_SIZE);
+            return -1;
+        }
+
+        vl1tab[l1_table_offset(mmio_addr)] = 0;
+        munmap(vl2tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
+    }
+    return 0;
 }
 
 static int zap_mmio_ranges(int xc_handle, u32 dom,
                            unsigned long l3tab,
                            struct mem_map *mem_mapp)
 {
-   int i;
-   l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                               PROT_READ|PROT_WRITE,
-                                               l3tab >> PAGE_SHIFT);
-   if (vl3tab == 0)
-       return -1;
-   for (i = 0; i < mem_mapp->nr_map; i++) {
-       if ((mem_mapp->map[i].type == E820_IO)
-         && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
-           if (zap_mmio_range(xc_handle, dom, vl3tab,
-                       mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
-               return -1;
-   }
-   munmap(vl3tab, PAGE_SIZE);
-   return 0;
+    int i;
+    l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                PROT_READ|PROT_WRITE,
+                                                l3tab >> PAGE_SHIFT);
+    if (vl3tab == 0)
+        return -1;
+    for (i = 0; i < mem_mapp->nr_map; i++) {
+        if ((mem_mapp->map[i].type == E820_IO)
+            && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
+            if (zap_mmio_range(xc_handle, dom, vl3tab,
+                               mem_mapp->map[i].addr, mem_mapp->map[i].size) 
== -1)
+                return -1;
+    }
+    munmap(vl3tab, PAGE_SIZE);
+    return 0;
 }
 
 #endif
 
 static int setup_guest(int xc_handle,
-                         u32 dom, int memsize,
-                         char *image, unsigned long image_size,
-                         gzFile initrd_gfd, unsigned long initrd_len,
-                         unsigned long nr_pages,
-                         vcpu_guest_context_t *ctxt,
-                         const char *cmdline,
-                         unsigned long shared_info_frame,
-                         unsigned int control_evtchn,
-                         unsigned long flags,
-                         unsigned int vcpus,
-                         unsigned int store_evtchn,
-                         unsigned long *store_mfn,
-                         struct mem_map *mem_mapp
-                         )
+                       u32 dom, int memsize,
+                       char *image, unsigned long image_size,
+                       gzFile initrd_gfd, unsigned long initrd_len,
+                       unsigned long nr_pages,
+                       vcpu_guest_context_t *ctxt,
+                       const char *cmdline,
+                       unsigned long shared_info_frame,
+                       unsigned int control_evtchn,
+                       unsigned long flags,
+                       unsigned int vcpus,
+                       unsigned int store_evtchn,
+                       unsigned long *store_mfn,
+                       struct mem_map *mem_mapp
+    )
 {
     l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
     l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
@@ -303,7 +336,8 @@
 
     /* memsize is in megabytes */
     v_end              = memsize << 20;
-    vinitrd_end        = v_end - PAGE_SIZE; /* leaving the top 4k untouched 
for IO requests page use */
+    /* leaving the top 4k untouched for IO requests page use */
+    vinitrd_end        = v_end - PAGE_SIZE;
     vinitrd_start      = vinitrd_end - initrd_len;
     vinitrd_start      = vinitrd_start & (~(PAGE_SIZE - 1));
 
@@ -369,16 +403,28 @@
                 goto error_out;
             }
             xc_copy_to_domain_page(xc_handle, dom,
-                                page_array[i>>PAGE_SHIFT], page);
+                                   page_array[i>>PAGE_SHIFT], page);
         }
     }
 
     if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
+    /* First allocate page for page dir or pdpt */
+    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+    if ( page_array[ppt_alloc] > 0xfffff )
+    {
+        unsigned long nmfn;
+        nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
+        if ( nmfn == 0 )
+        {
+            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+            goto error_out;
+        }
+        page_array[ppt_alloc] = nmfn;
+    }
+
 #ifdef __i386__
-    /* First allocate page for page dir. */
-    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
     l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
     ctxt->ctrlreg[3] = l2tab;
 
@@ -414,8 +460,6 @@
     munmap(vl1tab, PAGE_SIZE);
     munmap(vl2tab, PAGE_SIZE);
 #else
-    /* First allocate pdpt */
-    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
     /* here l3tab means pdpt, only 4 entry is used */
     l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
     ctxt->ctrlreg[3] = l3tab;
@@ -438,8 +482,8 @@
                 munmap(vl2tab, PAGE_SIZE);
 
             if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                      PROT_READ|PROT_WRITE,
-                      l2tab >> PAGE_SHIFT)) == NULL )
+                                                PROT_READ|PROT_WRITE,
+                                                l2tab >> PAGE_SHIFT)) == NULL )
                 goto error_out;
 
             memset(vl2tab, 0, PAGE_SIZE);
@@ -452,8 +496,8 @@
             if ( vl1tab != NULL )
                 munmap(vl1tab, PAGE_SIZE);
             if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                      PROT_READ|PROT_WRITE,
-                      l1tab >> PAGE_SHIFT)) == NULL )
+                                                PROT_READ|PROT_WRITE,
+                                                l1tab >> PAGE_SHIFT)) == NULL )
             {
                 munmap(vl2tab, PAGE_SIZE);
                 goto error_out;
@@ -475,15 +519,16 @@
     for ( count = 0; count < nr_pages; count++ )
     {
         if ( xc_add_mmu_update(xc_handle, mmu,
-                              (page_array[count] << PAGE_SHIFT) | 
-                              MMU_MACHPHYS_UPDATE, count) )
-           goto error_out;
-    }
-    
+                               (page_array[count] << PAGE_SHIFT) | 
+                               MMU_MACHPHYS_UPDATE, count) )
+            goto error_out;
+    }
+
+    set_nr_vcpus(xc_handle, dom, page_array, &dsi, vcpus);
 
     if ((boot_paramsp = xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
         goto error_out;
 
     memset(boot_paramsp, 0, sizeof(*boot_paramsp));
@@ -548,9 +593,9 @@
 #if defined (__i386__)
     if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
 #else
-    if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
+        if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
 #endif
-       goto error_out;
+            goto error_out;
     boot_paramsp->e820_map_nr = mem_mapp->nr_map;
     for (i=0; i<mem_mapp->nr_map; i++) {
         boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr; 
@@ -562,9 +607,9 @@
     munmap(boot_paramsp, PAGE_SIZE); 
 
     if ((boot_gdtp = xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
-       goto error_out;
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
+        goto error_out;
     memset(boot_gdtp, 0, PAGE_SIZE);
     boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
     boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
@@ -574,20 +619,24 @@
 
     /* shared_info page starts its life empty. */
     if ((shared_info = xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               shared_info_frame)) == 0)
-       goto error_out;
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        shared_info_frame)) == 0)
+        goto error_out;
     memset(shared_info, 0, sizeof(shared_info_t));
     /* Mask all upcalls... */
     for ( i = 0; i < MAX_VIRT_CPUS; i++ )
         shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+    shared_info->n_vcpu = vcpus;
+    printf(" VCPUS:         %d\n", shared_info->n_vcpu);
+
     munmap(shared_info, PAGE_SIZE);
 
     /* Populate the event channel port in the shared page */
     if ((sp = (shared_iopage_t *) xc_map_foreign_range(
-               xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-               page_array[shared_page_frame])) == 0)
-       goto error_out;
+        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+        page_array[shared_page_frame])) == 0)
+        goto error_out;
     memset(sp, 0, PAGE_SIZE);
     sp->sp_global.eport = control_evtchn;
     munmap(sp, PAGE_SIZE);
@@ -612,7 +661,7 @@
     ctxt->user_regs.edx = vboot_gdt_start;
     ctxt->user_regs.eax = 0x800;
     ctxt->user_regs.esp = vboot_gdt_end;
-    ctxt->user_regs.ebx = 0;   /* startup_32 expects this to be 0 to signal 
boot cpu */
+    ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot 
cpu */
     ctxt->user_regs.ecx = mem_mapp->nr_map;
     ctxt->user_regs.esi = vboot_params_start;
     ctxt->user_regs.edi = vboot_params_start + 0x2d0;
@@ -636,9 +685,9 @@
 
 #ifdef __i386__
     __asm__ __volatile__ ("pushl %%ebx; cpuid; popl %%ebx" 
-                         : "=a" (eax), "=c" (ecx) 
-                         : "0" (1) 
-                         : "dx");
+                          : "=a" (eax), "=c" (ecx) 
+                          : "0" (1) 
+                          : "dx");
 #elif defined __x86_64__
     __asm__ __volatile__ ("pushq %%rbx; cpuid; popq %%rbx"
                           : "=a" (eax), "=c" (ecx)
@@ -653,17 +702,17 @@
 }
 
 int xc_vmx_build(int xc_handle,
-                   u32 domid,
-                   int memsize,
-                   const char *image_name,
-                   struct mem_map *mem_mapp,
-                   const char *ramdisk_name,
-                   const char *cmdline,
-                   unsigned int control_evtchn,
-                   unsigned long flags,
-                   unsigned int vcpus,
-                   unsigned int store_evtchn,
-                   unsigned long *store_mfn)
+                 u32 domid,
+                 int memsize,
+                 const char *image_name,
+                 struct mem_map *mem_mapp,
+                 const char *ramdisk_name,
+                 const char *cmdline,
+                 unsigned int control_evtchn,
+                 unsigned long flags,
+                 unsigned int vcpus,
+                 unsigned int store_evtchn,
+                 unsigned long *store_mfn)
 {
     dom0_op_t launch_op, op;
     int initrd_fd = -1;
@@ -735,11 +784,11 @@
     }
 
     if ( setup_guest(xc_handle, domid, memsize, image, image_size, 
-                       initrd_gfd, initrd_size, nr_pages, 
-                       ctxt, cmdline,
-                       op.u.getdomaininfo.shared_info_frame,
-                       control_evtchn, flags, vcpus, store_evtchn, store_mfn,
-                       mem_mapp) < 0 )
+                     initrd_gfd, initrd_size, nr_pages, 
+                     ctxt, cmdline,
+                     op.u.getdomaininfo.shared_info_frame,
+                     control_evtchn, flags, vcpus, store_evtchn, store_mfn,
+                     mem_mapp) < 0 )
     {
         ERROR("Error constructing guest OS");
         goto error_out;
@@ -770,8 +819,8 @@
 
     /* Ring 1 stack is the initial stack. */
 /*
-    ctxt->kernel_ss = FLAT_KERNEL_DS;
-    ctxt->kernel_sp = vstartinfo_start;
+  ctxt->kernel_ss = FLAT_KERNEL_DS;
+  ctxt->kernel_sp = vstartinfo_start;
 */
     /* No debugging. */
     memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
@@ -851,7 +900,7 @@
         return -EINVAL;
     }
     shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff + 
-                        (ehdr->e_shstrndx*ehdr->e_shentsize));
+                          (ehdr->e_shstrndx*ehdr->e_shentsize));
     shstrtab = elfbase + shdr->sh_offset;
     
     for ( h = 0; h < ehdr->e_phnum; h++ ) 
@@ -906,9 +955,9 @@
         {
             pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
             if ((va = xc_map_foreign_range(
-                       xch, dom, PAGE_SIZE, PROT_WRITE,
-                       parray[pa>>PAGE_SHIFT])) == 0)
-               return -1;
+                xch, dom, PAGE_SIZE, PROT_WRITE,
+                parray[pa>>PAGE_SHIFT])) == 0)
+                return -1;
             chunksz = phdr->p_filesz - done;
             if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
                 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
@@ -921,9 +970,9 @@
         {
             pa = (phdr->p_paddr + done) - dsi->v_start - LINUX_PAGE_OFFSET;
             if ((va = xc_map_foreign_range(
-                       xch, dom, PAGE_SIZE, PROT_WRITE,
-                       parray[pa>>PAGE_SHIFT])) == 0)
-               return -1;
+                xch, dom, PAGE_SIZE, PROT_WRITE,
+                parray[pa>>PAGE_SHIFT])) == 0)
+                return -1;
             chunksz = phdr->p_memsz - done;
             if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
                 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
@@ -934,3 +983,13 @@
 
     return 0;
 }
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Thu Sep 22 17:34:14 2005
+++ b/tools/libxc/xenctrl.h     Thu Sep 22 17:42:01 2005
@@ -101,23 +101,31 @@
 } xc_core_header_t;
 
 
-long xc_ptrace(enum __ptrace_request request, 
-               u32  domid,
-               long addr, 
-               long data);
-
-long xc_ptrace_core(enum __ptrace_request request, 
-                    u32 domid, 
-                    long addr, 
-                    long data);
-
-int xc_waitdomain(int domain, 
-                  int *status, 
-                  int options);
-
-int xc_waitdomain_core(int domain, 
-                       int *status, 
-                       int options);
+long xc_ptrace(
+    int xc_handle,
+    enum __ptrace_request request, 
+    u32  domid,
+    long addr, 
+    long data);
+
+long xc_ptrace_core(
+    int xc_handle,
+    enum __ptrace_request request, 
+    u32 domid, 
+    long addr, 
+    long data);
+
+int xc_waitdomain(
+    int xc_handle,
+    int domain, 
+    int *status, 
+    int options);
+
+int xc_waitdomain_core(
+    int xc_handle,
+    int domain, 
+    int *status, 
+    int options);
 
 /*
  * DOMAIN MANAGEMENT FUNCTIONS
diff -r 97dbd9524a7e -r 06d84bf87159 tools/misc/xend
--- a/tools/misc/xend   Thu Sep 22 17:34:14 2005
+++ b/tools/misc/xend   Thu Sep 22 17:42:01 2005
@@ -86,9 +86,6 @@
     daemon = SrvDaemon.instance()
     if not sys.argv[1:]:
         print 'usage: %s {start|stop|restart}' % sys.argv[0]
-    elif os.fork():
-        pid, status = os.wait()
-        return status >> 8
     elif sys.argv[1] == 'start':
         start_xenstored()
         start_consoled()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/pylintrc
--- a/tools/python/pylintrc     Thu Sep 22 17:34:14 2005
+++ b/tools/python/pylintrc     Thu Sep 22 17:42:01 2005
@@ -74,7 +74,7 @@
 init-import=no
 
 # List of variable names used for dummy variables (i.e. not used).
-dummy-variables=_,dummy
+dummy-variables=_,_1,_2,_3,_4,_5,dummy
 
 
 
@@ -131,7 +131,7 @@
 bad-names=foo,bar,baz,toto,tutu,tata
 
 # List of builtins function names that should not be used, separated by a comma
-bad-functions=map,filter,apply,input
+bad-functions=apply,input
 
 
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/lowlevel/xc/xc.c Thu Sep 22 17:42:01 2005
@@ -220,7 +220,13 @@
         return PyErr_NoMemory();
 
     nr_doms = xc_domain_getinfo(xc->xc_handle, first_dom, max_doms, info);
-    
+
+    if (nr_doms < 0)
+    {
+        free(info);
+        return PyErr_SetFromErrno(xc_error);
+    }
+
     list = PyList_New(nr_doms);
     for ( i = 0 ; i < nr_doms; i++ )
     {
@@ -844,7 +850,7 @@
     XcObject *xc = (XcObject *)self;
 
     u32 dom;
-    unsigned long maxmem_kb;
+    unsigned int maxmem_kb;
 
     static char *kwd_list[] = { "dom", "maxmem_kb", NULL };
 
@@ -1175,7 +1181,7 @@
       METH_VARARGS | METH_KEYWORDS, "\n"
       "Set a domain's memory limit\n"
       " dom [int]: Identifier of domain.\n"
-      " maxmem_kb [long]: .\n"
+      " maxmem_kb [int]: .\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
     { "domain_memory_increase_reservation", 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/lowlevel/xs/xs.c Thu Sep 22 17:42:01 2005
@@ -116,8 +116,6 @@
        "Write data to a path.\n"                               \
        " path   [string] : xenstore path to write to\n."       \
        " data   [string] : data to write.\n"                   \
-       " create [int]    : create flag, default 0.\n"          \
-       " excl   [int]    : exclusive flag, default 0.\n"       \
        "\n"                                                    \
        "Returns None on success.\n"                            \
        "Raises RuntimeError on error.\n"                       \
@@ -125,30 +123,23 @@
 
 static PyObject *xspy_write(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *kwd_spec[] = { "path", "data", "create", "excl", NULL };
-    static char *arg_spec = "ss#|ii";
+    static char *kwd_spec[] = { "path", "data", NULL };
+    static char *arg_spec = "ss#";
     char *path = NULL;
     char *data = NULL;
     int data_n = 0;
-    int create = 0;
-    int excl = 0;
-
-    struct xs_handle *xh = xshandle(self);
-    PyObject *val = NULL;
-    int flags = 0;
+
+    struct xs_handle *xh = xshandle(self);
+    PyObject *val = NULL;
     int xsval = 0;
 
     if (!xh)
         goto exit;
     if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
-                                     &path, &data, &data_n, &create, &excl))
-        goto exit;
-    if (create)
-        flags |= O_CREAT;
-    if (excl)
-        flags |= O_EXCL;
-    Py_BEGIN_ALLOW_THREADS
-    xsval = xs_write(xh, path, data, data_n, flags);
+                                     &path, &data, &data_n))
+        goto exit;
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_write(xh, path, data, data_n);
     Py_END_ALLOW_THREADS
     if (!xsval) {
         PyErr_SetFromErrno(PyExc_RuntimeError);
@@ -808,6 +799,48 @@
     }
     Py_INCREF(Py_None);
     val = Py_None;
+ exit:
+    return val;
+}
+
+#define xspy_get_domain_path_doc "\n"                  \
+       "Return store path of domain.\n"                \
+       " domid [int]: domain id\n"                     \
+       "\n"                                            \
+       "Returns: [string] domain store path.\n"        \
+       "         None if domid doesn't exist.\n"       \
+       "Raises RuntimeError on error.\n"               \
+       "\n"
+
+static PyObject *xspy_get_domain_path(PyObject *self, PyObject *args,
+                                     PyObject *kwds)
+{
+    static char *kwd_spec[] = { "domid", NULL };
+    static char *arg_spec = "i";
+    int domid = 0;
+
+    struct xs_handle *xh = xshandle(self);
+    char *xsval = NULL;
+    PyObject *val = NULL;
+
+    if (!xh)
+        goto exit;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec,
+                                     &domid))
+        goto exit;
+    Py_BEGIN_ALLOW_THREADS
+    xsval = xs_get_domain_path(xh, domid);
+    Py_END_ALLOW_THREADS
+    if (!xsval) {
+        if (errno == ENOENT) {
+            Py_INCREF(Py_None);
+            val = Py_None;
+        } else
+            PyErr_SetFromErrno(PyExc_RuntimeError);
+        goto exit;
+    }
+    val = PyString_FromString(xsval);
+    free(xsval);
  exit:
     return val;
 }
@@ -858,6 +891,7 @@
      XSPY_METH(release_domain),
      XSPY_METH(close),
      XSPY_METH(shutdown),
+     XSPY_METH(get_domain_path),
      XSPY_METH(fileno),
      { /* Terminator. */ },
 };
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/sv/Main.py
--- a/tools/python/xen/sv/Main.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/sv/Main.py       Thu Sep 22 17:42:01 2005
@@ -1,5 +1,4 @@
 
-from xen.sv.HTMLBase import HTMLBase
 from xen.sv.NodeInfo import NodeInfo
 from xen.sv.DomInfo  import DomInfo
 from xen.sv.CreateDomain import CreateDomain
@@ -33,15 +32,8 @@
             result.append( (key, self.fieldStorage.getlist( key ) ) )
         return result
                                                                                
                                                                             
-class TwistedAdapter:
-    def __init__( self, req ):
-        self.args = Args( req )
-        self.uri = req.unparsed_uri
-        self.url = req.uri
-        self.write = req.write
-
 # This is the Main class
-# It peices together all the modules
+# It pieces together all the modules
 
 class Main:
     def __init__( self ):
@@ -61,7 +53,7 @@
             self.init_modules( request )
             self.init_done = True
             
-        for moduleName, module in self.modules.iteritems():
+        for _, module in self.modules.iteritems():
             module.write_MENU( request )
             request.write( "\n" )
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/sv/Wizard.py
--- a/tools/python/xen/sv/Wizard.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/sv/Wizard.py     Thu Sep 22 17:42:01 2005
@@ -47,7 +47,7 @@
     def __init__( self, urlWriter, title, location ):
         HTMLBase.__init__( self )
         self.urlWriter = urlWriter
-        self.feilds = []
+        self.fields = []
         self.title = title
         self.location = location
         self.passback = None
@@ -86,9 +86,9 @@
         
         request.write( "<table width='100%' cellpadding='0' cellspacing='1' 
border='0'>" )
         
-       for (feild, control) in self.feilds:
-            control.write_Control( request, previous_values.get( feild ) )
-            if previous_values.get( feild ) is not None and not 
control.validate( previous_values.get( feild ) ):
+       for (field, control) in self.fields:
+            control.write_Control( request, previous_values.get( field ) )
+            if previous_values.get( field ) is not None and not 
control.validate( previous_values.get( field ) ):
                control.write_Help( request )
             
         request.write( "</table>" )
@@ -97,7 +97,7 @@
         #request.write( "<input type='hidden' name='visited-sheet%s' 
value='True'></p>" % self.location )
                 
     def addControl( self, control ):
-       self.feilds.append( [ control.getName(), control ] )
+       self.fields.append( [ control.getName(), control ] )
         
     def validate( self, request ):
     
@@ -108,10 +108,10 @@
         previous_values = ssxp2hash( string2sxp( self.passback ) ) #get the 
map for quick reference
        if DEBUG: print previous_values
       
-       for (feild, control) in self.feilds:
-            if not control.validate( previous_values.get( feild ) ):
+       for (field, control) in self.fields:
+            if not control.validate( previous_values.get( field ) ):
                 check = False
-                if DEBUG: print "> %s = %s" % (feild, previous_values.get( 
feild ))
+                if DEBUG: print "> %s = %s" % (field, previous_values.get( 
field ))
 
         return check
         
@@ -143,7 +143,7 @@
         
 class InputControl( SheetControl ):
 
-    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this feild." ):
+    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this field." ):
         SheetControl.__init__( self, reg_exp )
         self.setName( name )
         
@@ -206,7 +206,7 @@
         
 class FileControl( InputControl ):
 
-    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this feild." ):
+    def __init__( self, name, defaultValue, humanText,  reg_exp = ".*", 
help_text = "You must enter the appropriate details in this field." ):
        InputControl.__init__( self, name, defaultValue, humanText )
         
     def validate( self, persistedValue ):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/util/process.py
--- a/tools/python/xen/util/process.py  Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/util/process.py  Thu Sep 22 17:42:01 2005
@@ -24,6 +24,8 @@
         r = p.poll()
         for (fd, event) in r:
             if event == select.POLLHUP:
+                cout.close()
+                cerr.close()
                 return stdout
             if fd == cout.fileno():
                 stdout = stdout + cout.readline()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/web/httpserver.py
--- a/tools/python/xen/web/httpserver.py        Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/web/httpserver.py        Thu Sep 22 17:42:01 2005
@@ -273,6 +273,9 @@
         self.interface = interface
         self.port = port
         self.root = root
+        # ready indicates when we are ready to begin accept connections
+        # it should be set after a successful bind
+        self.ready = False
 
     def getRoot(self):
         return self.root
@@ -283,6 +286,7 @@
     def run(self):
         self.bind()
         self.listen()
+        self.ready = True
         self.requestLoop()
 
     def stop(self):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/web/tcp.py
--- a/tools/python/xen/web/tcp.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/web/tcp.py       Thu Sep 22 17:42:01 2005
@@ -18,6 +18,8 @@
 import sys
 import socket
 import types
+import time
+import errno
 
 from connection import *
 from protocol import *
@@ -35,9 +37,20 @@
     def createSocket(self):
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        addr = (self.interface, self.port)
-        sock.bind(addr)
-        return sock
+
+        # SO_REUSEADDR does not always ensure that we do not get an address
+        # in use error when restarted quickly
+        # we implement a timeout to try and avoid failing unnecessarily
+        timeout = time.time() + 30
+        while True:
+            try:
+                sock.bind((self.interface, self.port))
+                return sock
+            except socket.error, (_errno, strerrno):
+                if _errno == errno.EADDRINUSE and time.time() < timeout:
+                    time.sleep(0.5)
+                else:
+                    raise
 
     def acceptConnection(self, sock, protocol, addr):
         return TCPServerConnection(sock, protocol, addr, self)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/Args.py
--- a/tools/python/xen/xend/Args.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/Args.py     Thu Sep 22 17:42:01 2005
@@ -32,12 +32,12 @@
         self.arg_dict = {}
         self.key_ord = []
         self.key_dict = {}
-        for (name, type) in paramspec:
+        for (name, typ) in paramspec:
                 self.arg_ord.append(name)
-                self.arg_dict[name] = type
-        for (name, type) in keyspec:
+                self.arg_dict[name] = typ
+        for (name, typ) in keyspec:
                 self.key_ord.append(name)
-                self.key_dict[name] = type
+                self.key_dict[name] = typ
 
     def get_args(self, d, xargs=None):
         args = {}
@@ -56,12 +56,12 @@
     def split_args(self, d, args, keys):
         for (k, v) in d.items():
             if k in self.arg_dict:
-                type = self.arg_dict[k]
-                val = self.coerce(type, v)
+                typ = self.arg_dict[k]
+                val = self.coerce(typ, v)
                 args[k] = val
             elif k in self.key_dict:
-                type = self.key_dict[k]
-                val = self.coerce(type, v)
+                typ = self.key_dict[k]
+                val = self.coerce(typ, v)
                 keys[k] = val
             else:
                 raise ArgError('Invalid parameter: %s' % k)
@@ -85,20 +85,20 @@
             d[k] = val
         return self.get_args(d, xargs=xargs)
 
-    def coerce(self, type, v):
+    def coerce(self, typ, v):
         try:
-            if type == 'int':
+            if typ == 'int':
                 val = int(v)
-            elif type == 'long':
+            elif typ == 'long':
                 val = long(v)
-            elif type == 'str':
+            elif typ == 'str':
                 val = str(v)
-            elif type == 'sxpr':
+            elif typ == 'sxpr':
                 val = self.sxpr(v)
-            elif type == 'bool':
+            elif typ == 'bool':
                 val = self.bool(v)
             else:
-                raise ArgError('invalid type:' + str(type))
+                raise ArgError('invalid type:' + str(typ))
             return val
         except ArgError:
             raise
@@ -142,7 +142,9 @@
     Used on the client.
     """
 
-    def __init__(self, fn, paramspec, keyspec={}):
+    def __init__(self, fn, paramspec, keyspec = None):
+        if keyspec == None:
+            keyspec = {}
         Args.__init__(self, paramspec, keyspec)
         self.fn = fn
 
@@ -154,7 +156,9 @@
     Used in the HTTP server.
     """
 
-    def __init__(self, fn, paramspec, keyspec={}):
+    def __init__(self, fn, paramspec, keyspec = None):
+        if keyspec == None:
+            keyspec = {}
         Args.__init__(self, paramspec, keyspec)
         self.fn = fn
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/EventServer.py
--- a/tools/python/xen/xend/EventServer.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/EventServer.py      Thu Sep 22 17:42:01 2005
@@ -145,7 +145,7 @@
             self.lock.release()
             
         if async:
-            scheduler.now(self.call_handlers, [event, val])
+            scheduler.now(self.call_handlers, event, val)
         else:
             self.call_handlers(event, val)
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/PrettyPrint.py
--- a/tools/python/xen/xend/PrettyPrint.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/PrettyPrint.py      Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """General pretty-printer, including support for SXP.
@@ -34,11 +35,11 @@
     def get_width(self):
         return self.width
 
-    def output(self, out):
+    def output(self, _):
         print '***PrettyItem>output>', self
         pass
 
-    def prettyprint(self, out, width):
+    def prettyprint(self, _, width):
         print '***PrettyItem>prettyprint>', self
         return width
 
@@ -51,7 +52,7 @@
     def output(self, out):
         out.write(self.value)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         line.output(self)
 
     def show(self, out):
@@ -62,7 +63,7 @@
     def output(self, out):
         out.write(' ' * self.width)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         line.output(self)
 
     def show(self, out):
@@ -79,7 +80,7 @@
     def output(self, out):
         out.write(' ' * self.width)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         if line.breaks(self.space):
             self.active = 1
             line.newline(self.indent)
@@ -88,26 +89,20 @@
 
     def show(self, out):
         print >> out, ("(break (width %d) (indent %d) (space %d) (active %d))"
-                       % (self.width, self.indent, self.space, self.lspace, 
self.active))
+                       % (self.width, self.indent, self.space, self.active))
 
 class PrettyNewline(PrettySpace):
-
-    def __init__(self, indent):
-        PrettySpace.__init__(self, indent)
 
     def insert(self, block):
         block.newline()
         block.addtoline(self)
 
-    def output(self, out):
-        out.write(' ' * self.width)
-
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         line.newline(0)
         line.output(self)
 
     def show(self, out):
-        print >> out, ("(nl (indent %d))" % self.indent)
+        print >> out, ("(nl (width %d))" % self.width)
 
 class PrettyLine(PrettyItem):
     def __init__(self):
@@ -132,7 +127,7 @@
             lastbreak.space = (width - lastwidth)
         self.width = width
  
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         for x in self.content:
             x.prettyprint(line)
 
@@ -145,7 +140,8 @@
 class PrettyBlock(PrettyItem):
 
     def __init__(self, all=0, parent=None):
-        self.width = 0
+        PrettyItem.__init__(self, 0)
+
         self.lines = []
         self.parent = parent
         self.indent = 0
@@ -163,7 +159,7 @@
             if self.width < l.width:
                 self.width = l.width
 
-    def breaks(self, n):
+    def breaks(self, _):
         return self.all and self.broken
 
     def newline(self):
@@ -172,7 +168,7 @@
     def addtoline(self, x):
         self.lines[-1].write(x)
 
-    def prettyprint(self, line):
+    def prettyprint(self, line, _):
         self.indent = line.used
         line.block = self
         if not line.fits(self.width):
@@ -191,6 +187,7 @@
 class Line:
 
     def __init__(self, out, width):
+        self.block = None
         self.out = out
         self.width = width
         self.used = 0
@@ -255,8 +252,7 @@
         self.block = self.block.parent
 
     def prettyprint(self, out=sys.stdout):
-        line = Line(out, self.width)
-        self.top.prettyprint(line)
+        self.top.prettyprint(Line(out, self.width))
 
 class SXPPrettyPrinter(PrettyPrinter):
     """An SXP prettyprinter.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/Vifctl.py
--- a/tools/python/xen/xend/Vifctl.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/Vifctl.py   Thu Sep 22 17:42:01 2005
@@ -13,13 +13,13 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """Xend interface to networking control scripts.
 """
 import os
 import os.path
-import sys
 import xen.util.process
 
 from xen.xend import XendRoot
@@ -71,7 +71,7 @@
         vif = vif_old
     return vif
 
-def vifctl(op, vif=None, script=None, domain=None, mac=None, bridge=None, 
ipaddr=[]):
+def vifctl(op, vif=None, script=None, domain=None, mac=None, bridge=None, 
ipaddr=None):
     """Call a vif control script.
     Xend calls this when bringing vifs up or down.
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendBootloader.py
--- a/tools/python/xen/xend/XendBootloader.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendBootloader.py   Thu Sep 22 17:42:01 2005
@@ -12,7 +12,7 @@
 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 #
 
-import os, sys, select, errno
+import os, select, errno
 import sxp
 
 from XendLogging import log
@@ -72,7 +72,7 @@
         if len(s) == 0:
             break
         
-    (pid, status) = os.waitpid(child, 0)
+    os.waitpid(child, 0)
     os.close(r)
     os.unlink(BL_FIFO)
 
@@ -89,6 +89,4 @@
     if vcpus and sxp.child_value(config_image, "vcpus") is None:
         config_image.append(['vcpus', vcpus])
 
-    config = ['image', config_image]
-    return config
-
+    return config_image
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py   Thu Sep 22 17:42:01 2005
@@ -4,7 +4,6 @@
 # Public License.  See the file "COPYING" in the main directory of
 # this archive for more details.
 
-import errno
 import os
 import re
 import select
@@ -12,7 +11,7 @@
 from string import join
 from struct import pack, unpack, calcsize
 from xen.util.xpopen import xPopen3
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+import xen.lowlevel.xc
 from xen.xend.xenstore.xsutil import IntroduceDomain
 
 from XendError import XendError
@@ -24,6 +23,10 @@
 
 sizeof_int = calcsize("i")
 sizeof_unsigned_long = calcsize("L")
+
+
+xc = xen.lowlevel.xc.new()
+
 
 def write_exact(fd, buf, errmsg):
     if os.write(fd, buf) != len(buf):
@@ -83,7 +86,7 @@
     if child.wait() != 0:
         raise XendError("xc_save failed: %s" % lasterr)
 
-    dominfo.setStoreChannel(None)
+    dominfo.closeStoreChannel()
     xd.domain_destroy(dominfo.domid)
     return None
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendClient.py
--- a/tools/python/xen/xend/XendClient.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendClient.py       Thu Sep 22 17:42:01 2005
@@ -33,8 +33,6 @@
                          UnixXendClientProtocol, \
                          XendError
 
-DEBUG = 0
-
 def fileof(val):
     """Converter for passing configs or other 'large' data.
     Handles lists, files directly.
@@ -385,7 +383,6 @@
 python XendClient.py domain 0
     (domain (id 0) (name Domain-0) (memory 128))
     """
-    global DEBUG
     from getopt import getopt
     short_options = 'x:au:d'
     long_options = ['xend=', 'unix=', 'debug']
@@ -397,8 +394,6 @@
             srv = v
         elif k in ['-u', '--unix']:
             unix = int(v)
-        elif k in ['-d', '--debug']:
-            DEBUG = 1
     if len(args):
         fn = args[0]
         args = args[1:]
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendDmesg.py
--- a/tools/python/xen/xend/XendDmesg.py        Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendDmesg.py        Thu Sep 22 17:42:01 2005
@@ -18,7 +18,6 @@
 """Get dmesg output for this node.
 """
 
-import os
 import xen.lowlevel.xc
 
 class XendDmesg:
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendDomain.py       Thu Sep 22 17:42:01 2005
@@ -14,40 +14,52 @@
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """Handler for domain operations.
  Nothing here is persistent (across reboots).
  Needs to be persistent for one uptime.
 """
-import errno
 import os
-import sys
-import time
-import traceback
-
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+
+import xen.lowlevel.xc
 
 from xen.xend import sxp
-from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendRoot
 from xen.xend import XendCheckpoint
 from xen.xend.XendDomainInfo import XendDomainInfo, shutdown_reason
-from xen.xend import EventServer; eserver = EventServer.instance()
+from xen.xend import EventServer
 from xen.xend.XendError import XendError
 from xen.xend.XendLogging import log
 from xen.xend import scheduler
 from xen.xend.server import relocate
 from xen.xend.uuid import getUuid
 from xen.xend.xenstore import XenNode, DBMap
+from xen.xend.xenstore.xstransact import xstransact
+from xen.xend.xenstore.xsutil import GetDomainPath
+
+
+xc = xen.lowlevel.xc.new()
+xroot = XendRoot.instance()
+eserver = EventServer.instance()
+
 
 __all__ = [ "XendDomain" ]
 
 SHUTDOWN_TIMEOUT = 30
+PRIV_DOMAIN      =  0
+
+def is_dead(dom):
+    return dom['crashed'] or dom['shutdown'] or (
+        dom['dying'] and not(dom['running'] or dom['paused'] or
+                             dom['blocked']))
+
 
 class XendDomainDict(dict):
     def get_by_name(self, name):
         try:
-            return filter(lambda d: d.name == name, self.values())[0]
+            return filter(lambda d: d.getName() == name, self.values())[0]
         except IndexError, err:
             return None
 
@@ -65,9 +77,12 @@
         # So we stuff the XendDomain instance (self) into xroot's components.
         xroot.add_component("xen.xend.XendDomain", self)
         self.domains = XendDomainDict()
-        self.dbmap = DBMap(db=XenNode("/domain"))
+        self.domroot = "/domain"
+        self.vmroot = "/domain"
+        self.dbmap = DBMap(db=XenNode(self.vmroot))
         self.watchReleaseDomain()
         self.initial_refresh()
+        self.dom0_setup()
 
     def list(self):
         """Get list of domain objects.
@@ -83,7 +98,7 @@
         @return: domain objects
         """
         doms = self.list()
-        doms.sort(lambda x, y: cmp(x.name, y.name))
+        doms.sort(lambda x, y: cmp(x.getName(), y.getName()))
         return doms
 
     def list_names(self):
@@ -92,10 +107,12 @@
         @return: domain names
         """
         doms = self.list_sorted()
-        return map(lambda x: x.name, doms)
+        return map(lambda x: x.getName(), doms)
 
     def onReleaseDomain(self):
-        self.refresh(cleanup=True)
+        self.reap()
+        self.refresh()
+        self.domain_restarts()
 
     def watchReleaseDomain(self):
         from xen.xend.xenstore.xswatch import xswatch
@@ -123,70 +140,58 @@
         else:
             dominfo = dominfo[0]
         return dominfo
-            
+
     def initial_refresh(self):
         """Refresh initial domain info from db.
         """
         doms = self.xen_domains()
-        self.dbmap.readDB()
-        for domdb in self.dbmap.values():
-            if not domdb.has_key("xend"):
-                continue
-            db = domdb.addChild("xend")
+        self.dbmap.readDB()             # XXX only needed for "xend"
+        for dom in doms.values():
+            domid = dom['dom']
+            dompath = GetDomainPath(domid)
+            if not dompath:
+                continue
+            vmpath = xstransact.Read(dompath, "vm")
+            if not vmpath:
+                continue
+            uuid = xstransact.Read(vmpath, "uuid")
+            if not uuid:
+                continue
+            log.info("recreating domain %d, uuid %s" % (domid, uuid))
+            dompath = "/".join(dompath.split("/")[0:-1])
             try:
-                domid = int(domdb["domid"].getData())
-            except:
-                domid = None
-            # XXX if domid in self.domains, then something went wrong
-            if (domid is None) or (domid in self.domains):
-                domdb.delete()
-            elif domid in doms:
-                try:
-                    self._new_domain(domdb["uuid"].getData(), domid, db,
-                                     doms[domid]) 
-                except Exception, ex:
-                    log.exception("Error recreating domain info: id=%d", domid)
-                    self._delete_domain(domid)
-            else:
-                self._delete_domain(domid)
-        self.refresh(cleanup=True)
-
-        dom0 = self.domain_lookup(0)
+                dominfo = XendDomainInfo.recreate(uuid, dompath, domid, dom)
+            except Exception, ex:
+                log.exception("Error recreating domain info: id=%d", domid)
+                continue
+            self._add_domain(dominfo)
+        self.reap()
+        self.refresh()
+        self.domain_restarts()
+
+    def dom0_setup(self):
+        dom0 = self.domain_lookup(PRIV_DOMAIN)
         if not dom0:
-            dom0 = self.domain_unknown(0)
+            dom0 = self.dom0_unknown()
         dom0.dom0_init_store()    
+        dom0.dom0_enforce_vcpus()
 
     def close(self):
         pass
 
-    def _new_domain(self, uuid, domid, db, info):
-        """Create a domain entry from saved info.
-
-        @param db:   saved info from the db
-        @param info: domain info from xen
-        @return: domain
-        """
-        dominfo = XendDomainInfo.recreate(uuid, domid, db, info)
-        self.domains[dominfo.domid] = dominfo
-        return dominfo
-
     def _add_domain(self, info, notify=True):
         """Add a domain entry to the tables.
 
         @param info:   domain info object
         @param notify: send a domain created event if true
         """
-        # Remove entries under the wrong id.
-        for i, d in self.domains.items():
-            if i != d.domid:
-                del self.domains[i]
-                self.dbmap.delete(d.uuid)
-        if info.domid in self.domains:
+        if info.getDomid() in self.domains:
             notify = False
-        self.domains[info.domid] = info
-        info.exportToDB(save=True)
+        self.domains[info.getDomid()] = info
+        info.exportToDB()
         if notify:
-            eserver.inject('xend.domain.create', [info.name, info.domid])
+            eserver.inject('xend.domain.create', [info.getName(),
+                                                  info.getDomid()])
 
     def _delete_domain(self, id, notify=True):
         """Remove a domain from the tables.
@@ -194,18 +199,14 @@
         @param id:     domain id
         @param notify: send a domain died event if true
         """
-        try:
-            if self.xen_domain(id):
-                return
-        except:
-            pass
         info = self.domains.get(id)
         if info:
             del self.domains[id]
             info.cleanup()
             info.delete()
             if notify:
-                eserver.inject('xend.domain.died', [info.name, info.domid])
+                eserver.inject('xend.domain.died', [info.getName(),
+                                                    info.getDomid()])
         # XXX this should not be needed
         for domdb in self.dbmap.values():
             if not domdb.has_key("xend"):
@@ -222,61 +223,40 @@
         """Look for domains that have crashed or stopped.
         Tidy them up.
         """
-        casualties = []
         doms = self.xen_domains()
         for d in doms.values():
-            dead = 0
-            dead = dead or (d['crashed'] or d['shutdown'])
-            dead = dead or (d['dying'] and
-                            not(d['running'] or d['paused'] or d['blocked']))
-            if dead:
-                casualties.append(d)
-        for d in casualties:
-            id = d['dom']
-            dominfo = self.domains.get(id)
-            name = (dominfo and dominfo.name) or '??'
-            if dominfo and dominfo.is_terminated():
-                continue
-            log.debug('XendDomain>reap> domain died name=%s id=%d', name, id)
+            if not is_dead(d):
+                continue
+            domid = d['dom']
+            dominfo = self.domains.get(domid)
+            if not dominfo or dominfo.is_terminated():
+                continue
+            log.debug('domain died name=%s domid=%d', dominfo.getName(), domid)
+            if d['crashed'] and xroot.get_enable_dump():
+                self.domain_dumpcore(domid)
             if d['shutdown']:
                 reason = shutdown_reason(d['shutdown_reason'])
-                log.debug('XendDomain>reap> shutdown name=%s id=%d reason=%s', 
name, id, reason)
-                if reason in ['suspend']:
-                    if dominfo and dominfo.is_terminated():
-                        log.debug('XendDomain>reap> Suspended domain died 
id=%d', id)
-                    else:
-                        eserver.inject('xend.domain.suspended', [name, id])
-                        if dominfo:
-                            dominfo.state_set("suspended")
-                        continue
+                log.debug('shutdown name=%s id=%d reason=%s',
+                          dominfo.getName(), domid, reason)
+                if reason == 'suspend':
+                    dominfo.state_set("suspended")
+                    continue
                 if reason in ['poweroff', 'reboot']:
-                    eserver.inject('xend.domain.exit', [name, id, reason])
-                    self.domain_restart_schedule(id, reason)
-            else:
-               if xroot.get_enable_dump():
-                   self.domain_dumpcore(id)
-               eserver.inject('xend.domain.exit', [name, id, 'crash']) 
-            self.final_domain_destroy(id)
-
-    def refresh(self, cleanup=False):
+                    self.domain_restart_schedule(domid, reason)
+            dominfo.destroy()
+
+    def refresh(self):
         """Refresh domain list from Xen.
         """
-        if cleanup:
-            self.reap()
         doms = self.xen_domains()
         # Remove entries for domains that no longer exist.
         # Update entries for existing domains.
-        do_domain_restarts = False
         for d in self.domains.values():
-            info = doms.get(d.domid)
+            info = doms.get(d.getDomid())
             if info:
                 d.update(info)
-            elif d.restart_pending():
-                do_domain_restarts = True
-            else:
-                self._delete_domain(d.domid)
-        if cleanup and do_domain_restarts:
-            scheduler.now(self.domain_restarts)
+            elif not d.restart_pending():
+                self._delete_domain(d.getDomid())
 
     def update_domain(self, id):
         """Update information for a single domain.
@@ -297,7 +277,8 @@
         @param config: configuration
         @return: domain
         """
-        dominfo = XendDomainInfo.create(self.dbmap, config)
+        dominfo = XendDomainInfo.create(self.dbmap.getPath(), config)
+        self._add_domain(dominfo)
         return dominfo
 
     def domain_restart(self, dominfo):
@@ -305,31 +286,39 @@
 
         @param dominfo: domain object
         """
-        log.info("Restarting domain: name=%s id=%s", dominfo.name, 
dominfo.domid)
+        log.info("Restarting domain: name=%s id=%s", dominfo.getName(),
+                 dominfo.getDomid())
         eserver.inject("xend.domain.restart",
-                       [dominfo.name, dominfo.domid, "begin"])
+                       [dominfo.getName(), dominfo.getDomid(), "begin"])
         try:
             dominfo.restart()
-            log.info('Restarted domain name=%s id=%s', dominfo.name, 
dominfo.domid)
+            log.info('Restarted domain name=%s id=%s', dominfo.getName(),
+                     dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "success"])
-            self.domain_unpause(dominfo.domid)
+                           [dominfo.getName(), dominfo.getDomid(),
+                            "success"])
+            self.domain_unpause(dominfo.getDomid())
         except Exception, ex:
             log.exception("Exception restarting domain: name=%s id=%s",
-                          dominfo.name, dominfo.domid)
+                          dominfo.getName(), dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "fail"])
+                           [dominfo.getName(), dominfo.getDomid(), "fail"])
         return dominfo
 
-    def domain_configure(self, vmconfig):
+    def domain_configure(self, config):
         """Configure an existing domain. This is intended for internal
         use by domain restore and migrate.
 
         @param vmconfig: vm configuration
         """
-        config = sxp.child_value(vmconfig, 'config')
-        dominfo = XendDomainInfo.restore(self.dbmap, config)
-        return dominfo
+        # We accept our configuration specified as ['config' [...]], which
+        # some tools or configuration files may be using.  For save-restore,
+        # we use the value of XendDomainInfo.sxpr() directly, which has no
+        # such item.
+        nested = sxp.child_value(config, 'config')
+        if nested:
+            config = nested
+        return XendDomainInfo.restore(self.dbmap.getPath(), config)
 
     def domain_restore(self, src, progress=False):
         """Restore a domain from file.
@@ -340,7 +329,9 @@
 
         try:
             fd = os.open(src, os.O_RDONLY)
-            return XendCheckpoint.restore(self, fd)
+            dominfo = XendCheckpoint.restore(self, fd)
+            self._add_domain(dominfo)
+            return dominfo
         except OSError, ex:
             raise XendError("can't read guest state file %s: %s" %
                             (src, ex[1]))
@@ -354,22 +345,32 @@
         self.update_domain(id)
         return self.domains.get(id)
 
-    def domain_unknown(self, id):
-        try:
-            info = self.xen_domain(id)
-            if info:
-                uuid = getUuid()
-                log.info(
-                    "Creating entry for unknown domain: id=%d uuid=%s",
-                    id, uuid)
-                db = self.dbmap.addChild("%s/xend" % uuid)
-                dominfo = XendDomainInfo.recreate(uuid, id, db, info)
-                self._add_domain(dominfo)
-                return dominfo
-        except Exception, ex:
-            raise
-            log.exception("Error creating domain info: id=%d", id)
-        return None
+    def dom0_unknown(self):
+        dom0 = PRIV_DOMAIN
+        uuid = None
+        info = self.xen_domain(dom0)
+        dompath = GetDomainPath(dom0)
+        if dompath:
+            vmpath = xstransact.Read(dompath, "vm")
+            if vmpath:
+                uuid = xstransact.Read(vmpath, "uuid")
+            if not uuid:
+                uuid = dompath.split("/")[-1]
+            dompath = "/".join(dompath.split("/")[0:-1])
+        if not uuid:
+            uuid = getUuid()
+            dompath = self.domroot
+        log.info("Creating entry for unknown xend domain: id=%d uuid=%s",
+                 dom0, uuid)
+        try:
+            dominfo = XendDomainInfo.recreate(uuid, dompath, dom0, info)
+            self._add_domain(dominfo)
+            return dominfo
+        except Exception, exn:
+            log.exception(exn)
+            raise XendError("Error recreating xend domain info: id=%d: %s" %
+                            (dom0, str(exn)))
+
         
     def domain_lookup(self, id):
         return self.domains.get(id)
@@ -390,9 +391,10 @@
         @param id: domain id
         """
         dominfo = self.domain_lookup(id)
-        eserver.inject('xend.domain.unpause', [dominfo.name, dominfo.domid])
-        try:
-            return xc.domain_unpause(dom=dominfo.domid)
+        eserver.inject('xend.domain.unpause', [dominfo.getName(),
+                                               dominfo.getDomid()])
+        try:
+            return xc.domain_unpause(dom=dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
     
@@ -402,9 +404,10 @@
         @param id: domain id
         """
         dominfo = self.domain_lookup(id)
-        eserver.inject('xend.domain.pause', [dominfo.name, dominfo.domid])
-        try:
-            return xc.domain_pause(dom=dominfo.domid)
+        eserver.inject('xend.domain.pause', [dominfo.getName(),
+                                             dominfo.getDomid()])
+        try:
+            return xc.domain_pause(dom=dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
     
@@ -420,8 +423,9 @@
         @param reason: shutdown type: poweroff, reboot, suspend, halt
         """
         dominfo = self.domain_lookup(id)
-        self.domain_restart_schedule(dominfo.domid, reason, force=True)
-        eserver.inject('xend.domain.shutdown', [dominfo.name, dominfo.domid, 
reason])
+        self.domain_restart_schedule(dominfo.getDomid(), reason, force=True)
+        eserver.inject('xend.domain.shutdown', [dominfo.getName(),
+                                                dominfo.getDomid(), reason])
         if reason == 'halt':
             reason = 'poweroff'
         val = dominfo.shutdown(reason)
@@ -445,13 +449,13 @@
             if not dominfo.shutdown_pending:
                 # domain doesn't need shutdown
                 continue
-            id = dominfo.domid
+            id = dominfo.getDomid()
             left = dominfo.shutdown_time_left(SHUTDOWN_TIMEOUT)
             if left <= 0:
                 # Shutdown expired - destroy domain.
                 try:
                     log.info("Domain shutdown timeout expired: name=%s id=%s",
-                             dominfo.name, id)
+                             dominfo.getName(), id)
                     self.domain_destroy(id, reason=
                                         dominfo.shutdown_pending['reason'])
                 except Exception:
@@ -476,15 +480,16 @@
         restart = (force and reason == 'reboot') or 
dominfo.restart_needed(reason)
         if restart:
             log.info('Scheduling restart for domain: name=%s id=%s',
-                     dominfo.name, dominfo.domid)
+                     dominfo.getName(), dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "schedule"])
+                           [dominfo.getName(), dominfo.getDomid(),
+                            "schedule"])
             dominfo.restarting()
         else:
             log.info('Cancelling restart for domain: name=%s id=%s',
-                     dominfo.name, dominfo.domid)
+                     dominfo.getName(), dominfo.getDomid())
             eserver.inject("xend.domain.restart",
-                           [dominfo.name, dominfo.domid, "cancel"])
+                           [dominfo.getName(), dominfo.getDomid(), "cancel"])
             dominfo.restart_cancel()
 
     def domain_restarts(self):
@@ -494,45 +499,36 @@
         for dominfo in self.domains.values():
             if not dominfo.restart_pending():
                 continue
-            print 'domain_restarts>', dominfo.name, dominfo.domid
-            info = doms.get(dominfo.domid)
+            info = doms.get(dominfo.getDomid())
             if info:
                 # Don't execute restart for domains still running.
-                print 'domain_restarts> still runnning: ', dominfo.name
                 continue
             # Remove it from the restarts.
-            print 'domain_restarts> restarting: ', dominfo.name
+            log.info('restarting: %s' % dominfo.getName())
             self.domain_restart(dominfo)
 
-    def final_domain_destroy(self, id):
-        """Final destruction of a domain..
-
-        @param id: domain id
-        """
-        try:
-            dominfo = self.domain_lookup(id)
-            log.info('Destroying domain: name=%s', dominfo.name)
-            eserver.inject('xend.domain.destroy', [dominfo.name, 
dominfo.domid])
+    def domain_destroy(self, domid, reason='halt'):
+        """Terminate domain immediately.
+        - halt:   cancel any restart for the domain
+        - reboot  schedule a restart for the domain
+
+        @param domid: domain id
+        """
+
+        if domid == PRIV_DOMAIN:
+            raise XendError("Cannot destroy privileged domain %i" % domid)
+        
+        self.domain_restart_schedule(domid, reason, force=True)
+        dominfo = self.domain_lookup(domid)
+        if dominfo:
             val = dominfo.destroy()
-        except:
-            #todo
+        else:
             try:
-                val = xc.domain_destroy(dom=id)
+                val = xc.domain_destroy(dom=domid)
             except Exception, ex:
                 raise XendError(str(ex))
         return val       
 
-    def domain_destroy(self, id, reason='halt'):
-        """Terminate domain immediately.
-        - halt:   cancel any restart for the domain
-        - reboot  schedule a restart for the domain
-
-        @param id: domain id
-        """
-        self.domain_restart_schedule(id, reason, force=True)
-        val = self.final_domain_destroy(id)
-        return val
-
     def domain_migrate(self, id, dst, live=False, resource=0):
         """Start domain migration.
 
@@ -547,13 +543,14 @@
 
         # temporarily rename domain for localhost migration
         if dst == "localhost":
-            dominfo.name = "tmp-" + dominfo.name
+            dominfo.setName("tmp-" + dominfo.getName())
 
         try:
             XendCheckpoint.save(self, sock.fileno(), dominfo, live)
         except:
             if dst == "localhost":
-                dominfo.name = string.replace(dominfo.name, "tmp-", "", 1)
+                dominfo.setName(
+                    string.replace(dominfo.getName(), "tmp-", "", 1))
             raise
         
         return None
@@ -587,7 +584,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.domain_pincpu(dominfo.domid, vcpu, cpumap)
+            return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -596,8 +593,10 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.bvtsched_domain_set(dom=dominfo.domid, mcuadv=mcuadv,
-                                          warpback=warpback, 
warpvalue=warpvalue, 
+            return xc.bvtsched_domain_set(dom=dominfo.getDomid(),
+                                          mcuadv=mcuadv,
+                                          warpback=warpback,
+                                          warpvalue=warpvalue, 
                                           warpl=warpl, warpu=warpu)
         except Exception, ex:
             raise XendError(str(ex))
@@ -607,7 +606,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.bvtsched_domain_get(dominfo.domid)
+            return xc.bvtsched_domain_get(dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
     
@@ -617,7 +616,8 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.sedf_domain_set(dominfo.domid, period, slice, latency, 
extratime, weight)
+            return xc.sedf_domain_set(dominfo.getDomid(), period, slice,
+                                      latency, extratime, weight)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -626,7 +626,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.sedf_domain_get(dominfo.domid)
+            return xc.sedf_domain_get(dominfo.getDomid())
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -674,9 +674,8 @@
         @param type: device type
         """
         dominfo = self.domain_lookup(id)
-        val = dominfo.device_delete(type, devid)
-        dominfo.exportToDB()
-        return val
+        return dominfo.destroyDevice(type, devid)
+
 
     def domain_devtype_ls(self, id, type):
         """Get list of device sxprs for a domain.
@@ -716,7 +715,7 @@
         """
         dominfo = self.domain_lookup(id)
         try:
-            return xc.shadow_control(dominfo.domid, op)
+            return xc.shadow_control(dominfo.getDomid(), op)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -730,7 +729,8 @@
         dominfo = self.domain_lookup(id)
         maxmem = int(mem) * 1024
         try:
-            return xc.domain_setmaxmem(dominfo.domid, maxmem_kb = maxmem)
+            return xc.domain_setmaxmem(dominfo.getDomid(),
+                                       maxmem_kb = maxmem)
         except Exception, ex:
             raise XendError(str(ex))
 
@@ -742,7 +742,7 @@
         @return: 0 on success, -1 on error
         """
         dominfo = self.domain_lookup(id)
-        return dominfo.setMemoryTarget(mem * (1 << 20))
+        return dominfo.setMemoryTarget(mem << 10)
 
     def domain_vcpu_hotplug(self, id, vcpu, state):
         """Enable or disable VCPU vcpu in DOM id
@@ -762,12 +762,13 @@
         @param id: domain
         """
         dominfo = self.domain_lookup(id)
-        corefile = "/var/xen/dump/%s.%s.core"% (dominfo.name, dominfo.domid)
-        try:
-            xc.domain_dumpcore(dom=dominfo.domid, corefile=corefile)
+        corefile = "/var/xen/dump/%s.%s.core" % (dominfo.getName(),
+                                                 dominfo.getDomid())
+        try:
+            xc.domain_dumpcore(dom=dominfo.getDomid(), corefile=corefile)
         except Exception, ex:
             log.warning("Dumpcore failed, id=%s name=%s: %s",
-                        dominfo.domid, dominfo.name, ex)
+                        dominfo.getDomid(), dominfo.getName(), ex)
         
 def instance():
     """Singleton constructor. Use this instead of the class constructor.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Thu Sep 22 17:42:01 2005
@@ -1,4 +1,4 @@
-#============================================================================
+#===========================================================================
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of version 2.1 of the GNU Lesser General Public
 # License as published by the Free Software Foundation.
@@ -13,6 +13,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 """Representation of a single domain.
@@ -23,31 +24,23 @@
 
 """
 
-import string, re
-import os
+import string
 import time
 import threading
 import errno
 
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
-from xen.util.ip import check_subnet, get_current_ipgw
+import xen.lowlevel.xc
 from xen.util.blkif import blkdev_uname_to_file
 
-from xen.xend.server import controller
-from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance()
 from xen.xend.server.channel import EventChannel
-from xen.util.blkif import blkdev_name_to_number, expand_dev_name
 
 from xen.xend import sxp
-from xen.xend import Blkctl
-from xen.xend.PrettyPrint import prettyprintstring
 from xen.xend.XendBootloader import bootloader
 from xen.xend.XendLogging import log
 from xen.xend.XendError import XendError, VmError
 from xen.xend.XendRoot import get_component
 
 from xen.xend.uuid import getUuid
-from xen.xend.xenstore import DBVar, XenNode, DBMap
 from xen.xend.xenstore.xstransact import xstransact
 from xen.xend.xenstore.xsutil import IntroduceDomain
 
@@ -88,6 +81,18 @@
 STATE_VM_TERMINATED = "terminated"
 STATE_VM_SUSPENDED  = "suspended"
 
+"""Flag for a block device backend domain."""
+SIF_BLK_BE_DOMAIN = (1<<4)
+
+"""Flag for a net device backend domain."""
+SIF_NET_BE_DOMAIN = (1<<5)
+
+"""Flag for a TPM device backend domain."""
+SIF_TPM_BE_DOMAIN = (1<<7)
+
+
+xc = xen.lowlevel.xc.new()
+
 
 def domain_exists(name):
     # See comment in XendDomain constructor.
@@ -110,9 +115,13 @@
     @param dom: domain id
     @return: info or None
     """
-    domlist = xc.domain_getinfo(dom, 1)
-    if domlist and dom == domlist[0]['dom']:
-        return domlist[0]
+    try:
+        domlist = xc.domain_getinfo(dom, 1)
+        if domlist and dom == domlist[0]['dom']:
+            return domlist[0]
+    except Exception, err:
+        # ignore missing domain
+        log.exception("domain_getinfo(%d) failed, ignoring", dom)
     return None
 
 class XendDomainInfo:
@@ -122,149 +131,288 @@
     """
     MINIMUM_RESTART_TIME = 20
 
-    def create(cls, parentdb, config):
+
+    def create(cls, dompath, config):
         """Create a VM from a configuration.
 
-        @param parentdb:  parent db
+        @param dompath:   The path to all domain information
         @param config    configuration
         @raise: VmError for invalid configuration
         """
-        uuid = getUuid()
-        db = parentdb.addChild("%s/xend" % uuid)
-        path = parentdb.getPath()
-        vm = cls(uuid, path, db)
-        vm.construct(config)
-        vm.saveToDB(sync=True)
-
+
+        log.debug("XendDomainInfo.create(%s, ...)", dompath)
+        
+        vm = cls(getUuid(), dompath, cls.parseConfig(config))
+        vm.construct()
         return vm
 
     create = classmethod(create)
 
-    def recreate(cls, uuid, domid, db, info):
+
+    def recreate(cls, uuid, dompath, domid, info):
         """Create the VM object for an existing domain.
 
-        @param db:        domain db
+        @param dompath:   The path to all domain information
         @param info:      domain info from xc
         """
-        path = "/".join(db.getPath().split("/")[0:-2])
-        vm = cls(uuid, path, db)
-        vm.setDomid(domid)
-        vm.name, vm.start_time = vm.gatherVm(("name", str),
-                                             ("start-time", float))
-        try:
-            db.readDB()
-        except: pass
-        vm.importFromDB()
-        config = vm.config
-        log.debug('info=' + str(info))
-        log.debug('config=' + prettyprintstring(config))
-
-        vm.memory = info['mem_kb'] / 1024
-        vm.target = info['mem_kb'] * 1024
-
-        if config:
-            try:
-                vm.recreate = True
-                vm.construct(config)
-            finally:
-                vm.recreate = False
-        else:
-            vm.setName("Domain-%d" % domid)
-
-        vm.exportToDB(save=True)
-        return vm
+
+        log.debug("XendDomainInfo.recreate(%s, %s, %s, %s)", uuid, dompath,
+                  domid, info)
+
+        return cls(uuid, dompath, info, domid, True)
 
     recreate = classmethod(recreate)
 
-    def restore(cls, parentdb, config, uuid=None):
+
+    def restore(cls, dompath, config, uuid = None):
         """Create a domain and a VM object to do a restore.
 
-        @param parentdb:  parent db
+        @param dompath:   The path to all domain information
         @param config:    domain configuration
         @param uuid:      uuid to use
         """
+        
+        log.debug("XendDomainInfo.restore(%s, %s, %s)", dompath, config, uuid)
+
         if not uuid:
             uuid = getUuid()
-        db = parentdb.addChild("%s/xend" % uuid)
-        path = parentdb.getPath()
-        vm = cls(uuid, path, db)
-        ssidref = int(sxp.child_value(config, 'ssidref'))
-        log.debug('restoring with ssidref='+str(ssidref))
-        id = xc.domain_create(ssidref = ssidref)
-        vm.setDomid(id)
+
+        try:
+            ssidref = int(sxp.child_value(config, 'ssidref'))
+        except TypeError, exn:
+            raise VmError('Invalid ssidref in config: %s' % exn)
+
+        log.debug('restoring with ssidref = %d' % ssidref)
+
+        vm = cls(uuid, dompath, cls.parseConfig(config),
+                 xc.domain_create(ssidref = ssidref))
         vm.clear_shutdown()
+        vm.create_channel()
+        vm.configure()
+        vm.exportToDB()
+        return vm
+
+    restore = classmethod(restore)
+
+
+    def parseConfig(cls, config):
+        def get_cfg(name, conv = None):
+            val = sxp.child_value(config, name)
+
+            if conv and not val is None:
+                try:
+                    return conv(val)
+                except TypeError, exn:
+                    raise VmError(
+                        'Invalid setting %s = %s in configuration: %s' %
+                        (name, val, str(exn)))
+            else:
+                return val
+
+
+        log.debug("parseConfig: config is %s" % str(config))
+
+        result = {}
+        imagecfg = "()"
+
+        result['name']         = get_cfg('name')
+        result['ssidref']      = get_cfg('ssidref',    int)
+        result['memory']       = get_cfg('memory',     int)
+        result['mem_kb']       = get_cfg('mem_kb',     int)
+        result['maxmem']       = get_cfg('maxmem',     int)
+        result['maxmem_kb']    = get_cfg('maxmem_kb',  int)
+        result['cpu']          = get_cfg('cpu',        int)
+        result['cpu_weight']   = get_cfg('cpu_weight', float)
+        result['bootloader']   = get_cfg('bootloader')
+        result['restart_mode'] = get_cfg('restart')
+
         try:
-            vm.restore = True
-            vm.construct(config)
-        finally:
-            vm.restore = False
-        vm.exportToDB(save=True, sync=True)
-        return vm
-
-    restore = classmethod(restore)
-
-    __exports__ = [
-        DBVar('config',        ty='sxpr'),
-        DBVar('state',         ty='str'),
-        DBVar('restart_mode',  ty='str'),
-        DBVar('restart_state', ty='str'),
-        DBVar('restart_time',  ty='float'),
-        DBVar('restart_count', ty='int'),
-        DBVar('device_model_pid', ty='int'),
-        ]
+            imagecfg = get_cfg('image')
+
+            if imagecfg:
+                result['image'] = imagecfg
+                result['vcpus'] = int(sxp.child_value(imagecfg, 'vcpus',
+                                                      1))
+            else:
+                result['vcpus'] = 1
+        except TypeError, exn:
+            raise VmError(
+                'Invalid configuration setting: vcpus = %s: %s' %
+                (sxp.child_value(imagecfg, 'vcpus', 1),
+                 str(exn)))
+
+        result['backend'] = []
+        for c in sxp.children(config, 'backend'):
+            result['backend'].append(sxp.name(sxp.child0(c)))
+
+        result['device'] = []
+        for d in sxp.children(config, 'device'):
+            c = sxp.child0(d)
+            result['device'].append((sxp.name(c), c))
+
+        log.debug("parseConfig: result is %s" % str(result))
+        return result
+
+
+    parseConfig = classmethod(parseConfig)
+
     
-    def __init__(self, uuid, path, db):
+    def __init__(self, uuid, parentpath, info, domid = None, augment = False):
+
         self.uuid = uuid
-        self.path = path + "/" + uuid
-
-        self.db = db
-
-        self.recreate = 0
-        self.restore = 0
-        
-        self.config = None
-        self.domid = None
-        self.cpu_weight = 1
-        self.start_time = None
-        self.name = None
-        self.memory = None
-        self.ssidref = None
+        self.info = info
+
+        self.path = parentpath + "/" + uuid
+
+        if domid:
+            self.domid = domid
+        elif 'dom' in info:
+            self.domid = int(info['dom'])
+        else:
+            self.domid = None
+
+        if augment:
+            self.augmentInfo()
+
+        self.validateInfo()
+
         self.image = None
-
-        self.target = None
 
         self.store_channel = None
         self.store_mfn = None
         self.console_channel = None
         self.console_mfn = None
-        self.controllers = {}
-        
-        self.info = None
-        self.blkif_backend = False
-        self.netif_backend = False
-        self.netif_idx = 0
-        self.tpmif_backend = False
         
         #todo: state: running, suspended
         self.state = STATE_VM_OK
         self.state_updated = threading.Condition()
         self.shutdown_pending = None
 
-        #todo: set to migrate info if migrating
-        self.migrate = None
-        
-        self.restart_mode = RESTART_ONREBOOT
         self.restart_state = None
         self.restart_time = None
         self.restart_count = 0
         
-        self.vcpus = 1
-        self.bootloader = None
-        self.device_model_pid = 0
-
         self.writeVm("uuid", self.uuid)
         self.storeDom("vm", self.path)
 
+
+    def augmentInfo(self):
+        def useIfNeeded(name, val):
+            if not self.infoIsSet(name) and val is not None:
+                self.info[name] = val
+
+        params = (("name", str),
+                  ("start-time", float))
+
+        from_store = self.gatherVm(*params)
+
+        map(lambda x, y: useIfNeeded(x[0], y), params, from_store)
+
+
+    def validateInfo(self):
+        """Validate and normalise the info block.  This has either been parsed
+        by parseConfig, or received from xc through recreate.
+        """
+        def defaultInfo(name, val):
+            if not self.infoIsSet(name):
+                self.info[name] = val()
+
+        try:
+            defaultInfo('name',         lambda: "Domain-%d" % self.domid)
+            defaultInfo('restart_mode', lambda: RESTART_ONREBOOT)
+            defaultInfo('cpu_weight',   lambda: 1.0)
+            defaultInfo('bootloader',   lambda: None)
+            defaultInfo('backend',      lambda: [])
+            defaultInfo('device',       lambda: [])
+
+            self.check_name(self.info['name'])
+
+            # Internally, we keep only maxmem_KiB, and not maxmem or maxmem_kb
+            # (which come from outside, and are in MiB and KiB respectively).
+            # This means that any maxmem or maxmem_kb settings here have come
+            # from outside, and maxmem_KiB must be updated to reflect them.
+            # If we have both maxmem and maxmem_kb and these are not
+            # consistent, then this is an error, as we've no way to tell which
+            # one takes precedence.
+
+            # Exactly the same thing applies to memory_KiB, memory, and
+            # mem_kb.
+
+            def discard_negatives(name):
+                if self.infoIsSet(name) and self.info[name] <= 0:
+                    del self.info[name]
+
+            def valid_KiB_(mb_name, kb_name):
+                discard_negatives(kb_name)
+                discard_negatives(mb_name)
+                
+                if self.infoIsSet(kb_name):
+                    if self.infoIsSet(mb_name):
+                        mb = self.info[mb_name]
+                        kb = self.info[kb_name]
+                        if mb * 1024 == kb:
+                            return kb
+                        else:
+                            raise VmError(
+                                'Inconsistent %s / %s settings: %s / %s' %
+                                (mb_name, kb_name, mb, kb))
+                    else:
+                        return self.info[kb_name]
+                elif self.infoIsSet(mb_name):
+                    return self.info[mb_name] * 1024
+                else:
+                    return None
+
+            def valid_KiB(mb_name, kb_name):
+                result = valid_KiB_(mb_name, kb_name)
+                if result <= 0:
+                    raise VmError('Invalid %s / %s: %s' %
+                                  (mb_name, kb_name, result))
+                else:
+                    return result
+
+            def delIf(name):
+                if name in self.info:
+                    del self.info[name]
+
+            self.info['memory_KiB'] = valid_KiB('memory', 'mem_kb')
+            delIf('memory')
+            delIf('mem_kb')
+            self.info['maxmem_KiB'] = valid_KiB_('maxmem', 'maxmem_kb')
+            delIf('maxmem')
+            delIf('maxmem_kb')
+
+            if not self.info['maxmem_KiB']:
+                self.info['maxmem_KiB'] = 1 << 30
+
+            if self.info['maxmem_KiB'] > self.info['memory_KiB']:
+                self.info['maxmem_KiB'] = self.info['memory_KiB']
+
+            # Validate the given backend names.
+            for s in self.info['backend']:
+                if s not in backendFlags:
+                    raise VmError('Invalid backend type: %s' % s)
+
+            for (n, c) in self.info['device']:
+                if not n or not c or n not in controllerClasses:
+                    raise VmError('invalid device (%s, %s)' %
+                                  (str(n), str(c)))
+
+            if self.info['restart_mode'] not in restart_modes:
+                raise VmError('invalid restart mode: ' +
+                              str(self.info['restart_mode']))
+
+            if 'cpumap' not in self.info:
+                if [self.info['vcpus'] == 1]:
+                    self.info['cpumap'] = [1];
+                else:
+                    raise VmError('Cannot create CPU map')
+
+        except KeyError, exn:
+            log.exception(exn)
+            raise VmError('Unspecified domain detail: %s' % str(exn))
+
+
     def readVm(self, *args):
         return xstransact.Read(self.path, *args)
 
@@ -295,20 +443,28 @@
     def storeDom(self, *args):
         return xstransact.Store(self.path, *args)
 
-    def setDB(self, db):
-        self.db = db
-
-    def saveToDB(self, save=False, sync=False):
-        self.db.saveDB(save=save, sync=sync)
-
-    def exportToDB(self, save=False, sync=False):
-        if self.image:
-            self.image.exportToDB(save=save, sync=sync)
-        self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
-        self.store_channel = self.eventChannel("store/port")
+
+    def exportToDB(self):
+        to_store = {
+            'domid':              str(self.domid),
+            'uuid':               self.uuid,
+
+            'restart_time':       str(self.restart_time),
+
+            'xend/state':         self.state,
+            'xend/restart_count': str(self.restart_count),
+            'xend/restart_mode':  str(self.info['restart_mode']),
+
+            'memory/target':      str(self.info['memory_KiB'])
+            }
+
+        for (k, v) in self.info.items():
+            to_store[k] = str(v)
+
+        log.debug("Storing %s" % str(to_store))
+
+        self.writeVm(to_store)
+
 
     def setDomid(self, domid):
         """Set the domain id.
@@ -318,40 +474,87 @@
         self.domid = domid
         self.storeDom("domid", self.domid)
 
-    def getDomain(self):
+    def getDomid(self):
         return self.domid
 
     def setName(self, name):
-        self.name = name
+        self.check_name(name)
+        self.info['name'] = name
         self.storeVm("name", name)
 
     def getName(self):
-        return self.name
+        return self.info['name']
+
+    def getPath(self):
+        return self.path
+
+    def getUuid(self):
+        return self.uuid
+
+    def getVCpuCount(self):
+        return self.info['vcpus']
+
+    def getSsidref(self):
+        return self.info['ssidref']
+
+    def getMemoryTarget(self):
+        """Get this domain's target memory size, in KiB."""
+        return self.info['memory_KiB']
 
     def setStoreRef(self, ref):
         self.store_mfn = ref
         self.storeDom("store/ring-ref", ref)
 
-    def setStoreChannel(self, channel):
-        if self.store_channel and self.store_channel != channel:
-            self.store_channel.close()
-        self.store_channel = channel
-        self.storeDom("store/port", channel.port1)
+
+    def getBackendFlags(self):
+        return reduce(lambda x, y: x | backendFlags[y],
+                      self.info['backend'], 0)
+
+
+    def closeStoreChannel(self):
+        """Close the store channel, if any.  Nothrow guarantee."""
+        
+        try:
+            if self.store_channel:
+                try:
+                    self.store_channel.close()
+                    self.removeDom("store/port")
+                finally:
+                    self.store_channel = None
+        except Exception, exn:
+            log.exception(exn)
+
 
     def setConsoleRef(self, ref):
         self.console_mfn = ref
         self.storeDom("console/ring-ref", ref)
 
+
     def setMemoryTarget(self, target):
-        self.memory_target = target
+        """Set the memory target of this domain.
+        @param target In KiB.
+        """
+        self.info['memory_KiB'] = target
         self.storeDom("memory/target", target)
 
-    def update(self, info=None):
-        """Update with  info from xc.domain_getinfo().
-        """
-        self.info = info or dom_get(self.domid)
-        self.memory = self.info['mem_kb'] / 1024
-        self.ssidref = self.info['ssidref']
+
+    def update(self, info = None):
+        """Update with info from xc.domain_getinfo().
+        """
+
+        log.debug("XendDomainInfo.update(%s) on domain %d", info, self.domid)
+
+        if not info:
+            info = dom_get(self.domid)
+            if not info:
+                return
+            
+        self.info.update(info)
+        self.validateInfo()
+
+        log.debug("XendDomainInfo.update done on domain %d: %s", self.domid,
+                  self.info)
+
 
     def state_set(self, state):
         self.state_updated.acquire()
@@ -359,7 +562,7 @@
             self.state = state
             self.state_updated.notifyAll()
         self.state_updated.release()
-        self.saveToDB()
+        self.exportToDB()
 
     def state_wait(self, state):
         self.state_updated.acquire()
@@ -370,190 +573,83 @@
     def __str__(self):
         s = "<domain"
         s += " id=" + str(self.domid)
-        s += " name=" + self.name
-        s += " memory=" + str(self.memory)
-        s += " ssidref=" + str(self.ssidref)
+        s += " name=" + self.info['name']
+        s += " memory=" + str(self.info['memory_KiB'] / 1024)
+        s += " ssidref=" + str(self.info['ssidref'])
         s += ">"
         return s
 
     __repr__ = __str__
 
-    def getDeviceController(self, type, error=True):
-        ctrl = self.controllers.get(type)
-        if not ctrl and error:
-            raise XendError("invalid device type:" + type)
-        return ctrl
-    
-    def findDeviceController(self, type):
-        return (self.getDeviceController(type, error=False)
-                or self.createDeviceController(type))
-
-    def createDeviceController(self, type):
-        ctrl = controller.createDevController(type, self, 
recreate=self.recreate)
-        self.controllers[type] = ctrl
-        return ctrl
-
-    def createDevice(self, type, devconfig, change=False):
-        if self.recreate:
-            return
-        if type == 'vbd':
-            typedev = sxp.child_value(devconfig, 'dev')
-            if re.match('^ioemu:', typedev):
-               return;
-
-            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
-
-            devnum = blkdev_name_to_number(sxp.child_value(devconfig, 'dev'))
-
-            backpath = "%s/backend/%s/%s/%d" % (backdom.path, type,
-                                                self.uuid, devnum)
-            frontpath = "%s/device/%s/%d" % (self.path, type, devnum)
-
-            front = { 'backend' : backpath,
-                      'backend-id' : "%i" % backdom.domid,
-                      'virtual-device' : "%i" % devnum }
-            xstransact.Write(frontpath, front)
-
-            (type, params) = string.split(sxp.child_value(devconfig,
-                                                          'uname'), ':', 1)
-            back = { 'type' : type,
-                     'params' : params,
-                     'frontend' : frontpath,
-                     'frontend-id' : "%i" % self.domid }
-            xstransact.Write(backpath, back)
-
-            return
-
-        if type == 'vif':
-            from xen.xend import XendRoot
-            xroot = XendRoot.instance()
-
-            def _get_config_ipaddr(config):
-                val = []
-                for ipaddr in sxp.children(config, elt='ip'):
-                    val.append(sxp.child0(ipaddr))
-                return val
-
-            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
-
-            devnum = self.netif_idx
-            self.netif_idx += 1
-
-            script = sxp.child_value(devconfig, 'script',
-                                     xroot.get_vif_script())
-            script = os.path.join(xroot.network_script_dir, script)
-            bridge = sxp.child_value(devconfig, 'bridge',
-                                     xroot.get_vif_bridge())
-            mac = sxp.child_value(devconfig, 'mac')
-            ipaddr = _get_config_ipaddr(devconfig)
-
-            backpath = "%s/backend/%s/%s/%d" % (backdom.path, type,
-                                                self.uuid, devnum)
-            frontpath = "%s/device/%s/%d" % (self.path, type, devnum)
-
-            front = { 'backend' : backpath,
-                      'backend-id' : "%i" % backdom.domid,
-                      'handle' : "%i" % devnum,
-                      'mac' : mac }
-            xstransact.Write(frontpath, front)
-
-            back = { 'script' : script,
-                     'domain' : self.name,
-                     'mac' : mac,
-                     'bridge' : bridge,
-                     'frontend' : frontpath,
-                     'frontend-id' : "%i" % self.domid,
-                     'handle' : "%i" % devnum }
-            if ipaddr:
-                back['ip'] = ' '.join(ipaddr)
-            xstransact.Write(backpath, back)
-
-            return
-        
-        if type == 'vtpm':
-            backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0'))
-
-            devnum = int(sxp.child_value(devconfig, 'instance', '0'))
-            log.error("The domain has a TPM with instance %d." % devnum)
-
-            backpath = "%s/backend/%s/%s/%d" % (backdom.path, type,
-                                                self.uuid, devnum)
-            frontpath = "%s/device/%s/%d" % (self.path, type, devnum)
-
-            front = { 'backend' : backpath,
-                      'backend-id' : "%i" % backdom.domid,
-                      'handle' : "%i" % devnum }
-            xstransact.Write(frontpath, front)
-
-            back = { 'instance' : "%i" % devnum,
-                     'frontend' : frontpath,
-                     'frontend-id' : "%i" % self.domid }
-            xstransact.Write(backpath, back)
-
-            return
-
-        ctrl = self.findDeviceController(type)
-        return ctrl.createDevice(devconfig, recreate=self.recreate,
-                                 change=change)
-
-    def configureDevice(self, type, id, devconfig):
-        ctrl = self.getDeviceController(type)
-        return ctrl.configureDevice(id, devconfig)
-
-    def destroyDevice(self, type, id, change=False, reboot=False):
-        ctrl = self.getDeviceController(type)
-        return ctrl.destroyDevice(id, change=change, reboot=reboot)
-
-    def deleteDevice(self, type, id):
-        ctrl = self.getDeviceController(type)
-        return ctrl.deleteDevice(id)
-
-    def getDevice(self, type, id, error=True):
-        ctrl = self.getDeviceController(type)
-        return ctrl.getDevice(id, error=error)
-        
-    def getDeviceIds(self, type):
-        ctrl = self.getDeviceController(type)
-        return ctrl.getDeviceIds()
-    
-    def getDeviceSxprs(self, type):
-        ctrl = self.getDeviceController(type)
-        return ctrl.getDeviceSxprs()
+
+    def getDeviceController(self, name):
+        if name not in controllerClasses:
+            raise XendError("unknown device type: " + str(name))
+
+        return controllerClasses[name](self)
+
+
+    def createDevice(self, deviceClass, devconfig):
+        return self.getDeviceController(deviceClass).createDevice(devconfig)
+
+
+    def configureDevice(self, deviceClass, devid, devconfig):
+        return self.getDeviceController(deviceClass).configureDevice(
+            devid, devconfig)
+
+
+    def destroyDevice(self, deviceClass, devid):
+        return self.getDeviceController(deviceClass).destroyDevice(devid)
+
 
     def sxpr(self):
         sxpr = ['domain',
                 ['domid', self.domid],
-                ['name', self.name],
-                ['memory', self.memory],
-                ['ssidref', self.ssidref],
-                ['target', self.target] ]
+                ['name', self.info['name']],
+                ['memory', self.info['memory_KiB'] / 1024],
+                ['ssidref', self.info['ssidref']]]
         if self.uuid:
             sxpr.append(['uuid', self.uuid])
         if self.info:
-            sxpr.append(['maxmem', self.info['maxmem_kb']/1024 ])
-            run   = (self.info['running']  and 'r') or '-'
-            block = (self.info['blocked']  and 'b') or '-'
-            pause = (self.info['paused']   and 'p') or '-'
-            shut  = (self.info['shutdown'] and 's') or '-'
-            crash = (self.info['crashed']  and 'c') or '-'
-            state = run + block + pause + shut + crash
+            sxpr.append(['maxmem', self.info['maxmem_KiB'] / 1024])
+
+            if self.infoIsSet('device'):
+                for (n, c) in self.info['device']:
+                    sxpr.append(['device', c])
+
+            def stateChar(name):
+                if name in self.info:
+                    if self.info[name]:
+                        return name[0]
+                    else:
+                        return '-'
+                else:
+                    return '?'
+
+            state = reduce(
+                lambda x, y: x + y,
+                map(stateChar,
+                    ['running', 'blocked', 'paused', 'shutdown', 'crashed']))
+
             sxpr.append(['state', state])
-            if self.info['shutdown']:
+            if self.infoIsSet('shutdown'):
                 reason = shutdown_reason(self.info['shutdown_reason'])
                 sxpr.append(['shutdown_reason', reason])
-            sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]])
-            sxpr.append(['cpu_time', self.info['cpu_time']/1e9])    
+            if self.infoIsSet('cpu_time'):
+                sxpr.append(['cpu_time', self.info['cpu_time']/1e9])    
             sxpr.append(['vcpus', self.info['vcpus']])
             sxpr.append(['cpumap', self.info['cpumap']])
-            # build a string, using '|' to seperate items, show only up
-            # to number of vcpus in domain, and trim the trailing '|'
-            sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x)+'|',
-                        self.info['vcpu_to_cpu'][0:self.info['vcpus']]))[:-1]])
+            if self.infoIsSet('vcpu_to_cpu'):
+                sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]])
+                # build a string, using '|' to separate items, show only up
+                # to number of vcpus in domain, and trim the trailing '|'
+                sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x)+'|',
+                            
self.info['vcpu_to_cpu'][0:self.info['vcpus']]))[:-1]])
             
-        if self.start_time:
-            up_time =  time.time() - self.start_time  
+        if self.infoIsSet('start_time'):
+            up_time =  time.time() - self.info['start_time']
             sxpr.append(['up_time', str(up_time) ])
-            sxpr.append(['start_time', str(self.start_time) ])
+            sxpr.append(['start_time', str(self.info['start_time']) ])
 
         if self.store_channel:
             sxpr.append(self.store_channel.sxpr())
@@ -563,36 +659,12 @@
             sxpr.append(['console_channel', self.console_channel.sxpr()])
         if self.console_mfn:
             sxpr.append(['console_mfn', self.console_mfn])
-# already in (devices)
-#        console = self.getConsole()
-#        if console:
-#            sxpr.append(console.sxpr())
-
         if self.restart_count:
             sxpr.append(['restart_count', self.restart_count])
         if self.restart_state:
             sxpr.append(['restart_state', self.restart_state])
         if self.restart_time:
             sxpr.append(['restart_time', str(self.restart_time)])
-
-        devs = self.sxpr_devices()
-        if devs:
-            sxpr.append(devs)
-        if self.config:
-            sxpr.append(['config', self.config])
-        if self.device_model_pid:
-            sxpr.append(['device_model_pid',self.device_model_pid])
-        return sxpr
-
-    def sxpr_devices(self):
-        sxpr = []
-        for ty in self.controllers.keys():
-            devs = self.getDeviceSxprs(ty)
-            sxpr += devs
-        if sxpr:
-            sxpr.insert(0, 'devices')
-        else:
-            sxpr = None
         return sxpr
 
     def check_name(self, name):
@@ -601,9 +673,8 @@
         The same name cannot be used for more than one vm at the same time.
 
         @param name: name
-        @raise: VMerror if invalid
-        """
-        if self.recreate: return
+        @raise: VmError if invalid
+        """
         if name is None or name == '':
             raise VmError('missing vm name')
         for c in name:
@@ -619,33 +690,35 @@
             return
         if dominfo.is_terminated():
             return
-        if not self.domid or (dominfo.domid != self.domid):
-            raise VmError('vm name clash: ' + name)
-        
-    def construct(self, config):
+        if self.domid is None:
+            raise VmError("VM name '%s' already in use by domain %d" %
+                          (name, dominfo.domid))
+        if dominfo.domid != self.domid:
+            raise VmError("VM name '%s' is used in both domains %d and %d" %
+                          (name, self.domid, dominfo.domid))
+
+
+    def construct(self):
         """Construct the vm instance from its configuration.
 
         @param config: configuration
         @raise: VmError on error
         """
         # todo - add support for scheduling params?
-        self.config = config
         try:
-            # Initial domain create.
-            self.setName(sxp.child_value(config, 'name'))
-            self.check_name(self.name)
-            self.init_image()
-            self.configure_cpus(config)
-            self.init_domain()
-            self.register_domain()
-            self.configure_bootloader()
+            if 'image' not in self.info:
+                raise VmError('Missing image in configuration')
+
+            self.image = ImageHandler.create(self,
+                                             self.info['image'],
+                                             self.info['device'])
+
+            self.initDomain()
 
             # Create domain devices.
-            self.configure_backends()
-            self.configure_restart()
             self.construct_image()
             self.configure()
-            self.exportToDB(save=True)
+            self.exportToDB()
         except Exception, ex:
             # Catch errors, cleanup and re-raise.
             print 'Domain construction error:', ex
@@ -654,45 +727,43 @@
             self.destroy()
             raise
 
-    def register_domain(self):
-        xd = get_component('xen.xend.XendDomain')
-        xd._add_domain(self)
-        self.exportToDB(save=True)
-
-    def configure_cpus(self, config):
-        try:
-            self.cpu_weight = float(sxp.child_value(config, 'cpu_weight', '1'))
-        except:
-            raise VmError('invalid cpu weight')
-        self.memory = int(sxp.child_value(config, 'memory'))
-        if self.memory is None:
-            raise VmError('missing memory size')
-        self.setMemoryTarget(self.memory * (1 << 20))
-        self.ssidref = int(sxp.child_value(config, 'ssidref'))
-        cpu = sxp.child_value(config, 'cpu')
-        if self.recreate and self.domid and cpu is not None and int(cpu) >= 0:
-            xc.domain_pincpu(self.domid, 0, 1<<int(cpu))
-        try:
-            image = sxp.child_value(self.config, 'image')
-            vcpus = sxp.child_value(image, 'vcpus')
-            if vcpus:
-                self.vcpus = int(vcpus)
-        except:
-            raise VmError('invalid vcpus value')
+
+    def initDomain(self):
+        log.debug('XendDomainInfo.initDomain: %s %s %s %s)',
+                  str(self.domid),
+                  str(self.info['memory_KiB']),
+                  str(self.info['ssidref']),
+                  str(self.info['cpu_weight']))
+
+        self.domid = xc.domain_create(dom = self.domid or 0,
+                                      ssidref = self.info['ssidref'])
+        if self.domid <= 0:
+            raise VmError('Creating domain failed: name=%s' %
+                          self.info['name'])
+
+        if self.info['bootloader']:
+            self.image.handleBootloading()
+
+        xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
+        m = self.image.getDomainMemory(self.info['memory_KiB'])
+        xc.domain_setmaxmem(self.domid, m)
+        xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
+
+        cpu = self.info['cpu']
+        if cpu is not None and cpu != -1:
+            xc.domain_pincpu(self.domid, 0, 1 << cpu)
+
+        self.info['start_time'] = time.time()
+
+        log.debug('init_domain> Created domain=%d name=%s memory=%d',
+                  self.domid, self.info['name'], self.info['memory_KiB'])
+
 
     def configure_vcpus(self, vcpus):
         d = {}
         for v in range(0, vcpus):
             d["cpu/%d/availability" % v] = "online"
         self.writeVm(d)
-
-    def init_image(self):
-        """Create boot image handler for the domain.
-        """
-        image = sxp.child_value(self.config, 'image')
-        if image is None:
-            raise VmError('missing image')
-        self.image = ImageHandler.create(self, image)
 
     def construct_image(self):
         """Construct the boot image for the domain.
@@ -704,23 +775,17 @@
             IntroduceDomain(self.domid, self.store_mfn,
                             self.store_channel.port1, self.path)
         # get the configured value of vcpus and update store
-        self.configure_vcpus(self.vcpus)
+        self.configure_vcpus(self.info['vcpus'])
+
 
     def delete(self):
         """Delete the vm's db.
         """
-        if dom_get(self.domid):
-            return
-        self.domid = None
-        self.saveToDB(sync=True)
         try:
-            # Todo: eventually will have to wait for devices to signal
-            # destruction before can delete the db.
-            if self.db:
-                self.db.delete()
+            xstransact.Remove(self.path, 'domid')
         except Exception, ex:
             log.warning("error in domain db delete: %s", ex)
-            pass
+
 
     def destroy_domain(self):
         """Destroy the vm's domain.
@@ -732,17 +797,16 @@
         try:
             xc.domain_destroy(dom=self.domid)
         except Exception, err:
-            log.exception("Domain destroy failed: %s", self.name)
+            log.exception("Domain destroy failed: %s", self.info['name'])
 
     def cleanup(self):
         """Cleanup vm resources: release devices.
         """
         self.state = STATE_VM_TERMINATED
         self.release_devices()
-        if self.store_channel:
-            self.setStoreChannel(None)
+        self.closeStoreChannel()
         if self.console_channel:
-            # notify processes using this cosole?
+            # notify processes using this console?
             try:
                 self.console_channel.close()
                 self.console_channel = None
@@ -750,18 +814,20 @@
                 pass
         if self.image:
             try:
-                self.device_model_pid = 0
                 self.image.destroy()
                 self.image = None
             except:
                 pass
 
     def destroy(self):
-        """Clenup vm and destroy domain.
-        """
+        """Cleanup vm and destroy domain.
+        """
+
+        log.debug("XendDomainInfo.destroy")
+
         self.destroy_domain()
         self.cleanup()
-        self.saveToDB()
+        self.exportToDB()
         return 0
 
     def is_terminated(self):
@@ -772,43 +838,21 @@
     def release_devices(self):
         """Release all vm devices.
         """
-        reboot = self.restart_pending()
-        for ctrl in self.controllers.values():
-            if ctrl.isDestroyed(): continue
-            ctrl.destroyController(reboot=reboot)
+
         t = xstransact("%s/device" % self.path)
-        for d in t.list("vbd"):
-            t.remove(d)
-        for d in t.list("vif"):
-            t.remove(d)
-        for d in t.list("vtpm"):
-            t.remove(d)
+
+        for n in controllerClasses.keys():
+            for d in t.list(n):
+                try:
+                    t.remove(d)
+                except ex:
+                    # Log and swallow any exceptions in removal -- there's
+                    # nothing more we can do.
+                    log.exception(
+                        "Device release failed: %s; %s; %s; %s" %
+                        (self.info['name'], n, d, str(ex)))
         t.commit()
 
-    def show(self):
-        """Print virtual machine info.
-        """
-        print "[VM dom=%d name=%s memory=%d ssidref=%d" % (self.domid, 
self.name, self.memory, self.ssidref)
-        print "image:"
-        sxp.show(self.image)
-        print "]"
-
-    def init_domain(self):
-        """Initialize the domain memory.
-        """
-        if self.recreate:
-            return
-        if self.start_time is None:
-            self.start_time = time.time()
-            self.storeVm(("start-time", self.start_time))
-        try:
-            cpu = int(sxp.child_value(self.config, 'cpu', '-1'))
-        except:
-            raise VmError('invalid cpu')
-        id = self.image.initDomain(self.domid, self.memory, self.ssidref, cpu, 
self.cpu_weight)
-        log.debug('init_domain> Created domain=%d name=%s memory=%d',
-                  id, self.name, self.memory)
-        self.setDomid(id)
 
     def eventChannel(self, path=None):
         """Create an event channel to the domain.
@@ -833,17 +877,8 @@
         self.console_channel = self.eventChannel("console/port")
 
     def create_configured_devices(self):
-        devices = sxp.children(self.config, 'device')
-        for d in devices:
-            dev_config = sxp.child0(d)
-            if dev_config is None:
-                raise VmError('invalid device')
-            dev_type = sxp.name(dev_config)
-
-            if not controller.isDevControllerClass(dev_type):
-                raise VmError('unknown device type: ' + dev_type)
-            
-            self.createDevice(dev_type, dev_config)
+        for (n, c) in self.info['device']:
+            self.createDevice(n, c)
 
 
     def create_devices(self):
@@ -851,13 +886,10 @@
 
         @raise: VmError for invalid devices
         """
-        if self.rebooting():
-            for ctrl in self.controllers.values():
-                ctrl.initController(reboot=True)
-        else:
+        if not self.rebooting():
             self.create_configured_devices()
-        if not self.device_model_pid:
-            self.device_model_pid = self.image.createDeviceModel()
+        if self.image:
+            self.image.createDeviceModel()
 
     def device_create(self, dev_config):
         """Create a new device.
@@ -865,60 +897,19 @@
         @param dev_config: device configuration
         """
         dev_type = sxp.name(dev_config)
-        dev = self.createDevice(dev_type, dev_config, change=True)
-        self.config.append(['device', dev.getConfig()])
-        return dev.sxpr()
-
-    def device_configure(self, dev_config, id):
+        devid = self.createDevice(dev_type, dev_config)
+#        self.config.append(['device', dev.getConfig()])
+        return self.getDeviceController(dev_type).sxpr(devid)
+
+
+    def device_configure(self, dev_config, devid):
         """Configure an existing device.
-
         @param dev_config: device configuration
-        @param id:         device id
-        """
-        type = sxp.name(dev_config)
-        dev = self.getDevice(type, id)
-        old_config = dev.getConfig()
-        new_config = dev.configure(dev_config, change=True)
-        # Patch new config into vm config.
-        new_full_config = ['device', new_config]
-        old_full_config = ['device', old_config]
-        old_index = self.config.index(old_full_config)
-        self.config[old_index] = new_full_config
-        return new_config
-
-    def device_refresh(self, type, id):
-        """Refresh a device.
-
-        @param type: device type
-        @param id:   device id
-        """
-        dev = self.getDevice(type, id)
-        dev.refresh()
-        
-    def device_delete(self, type, id):
-        """Destroy and remove a device.
-
-        @param type: device type
-        @param id:   device id
-        """
-        dev = self.getDevice(type, id)
-        dev_config = dev.getConfig()
-        if dev_config:
-            self.config.remove(['device', dev_config])
-        self.deleteDevice(type, dev.getId())
-
-    def configure_bootloader(self):
-        """Configure boot loader.
-        """
-        self.bootloader = sxp.child_value(self.config, "bootloader")
-
-    def configure_restart(self):
-        """Configure the vm restart mode.
-        """
-        r = sxp.child_value(self.config, 'restart', RESTART_ONREBOOT)
-        if r not in restart_modes:
-            raise VmError('invalid restart mode: ' + str(r))
-        self.restart_mode = r;
+        @param devid:      device id
+        """
+        deviceClass = sxp.name(dev_config)
+        self.configureDevice(deviceClass, devid, dev_config)
+
 
     def restart_needed(self, reason):
         """Determine if the vm needs to be restarted when shutdown
@@ -927,11 +918,11 @@
         @param reason: shutdown reason
         @return True if needs restart, False otherwise
         """
-        if self.restart_mode == RESTART_NEVER:
+        if self.info['restart_mode'] == RESTART_NEVER:
             return False
-        if self.restart_mode == RESTART_ALWAYS:
+        if self.info['restart_mode'] == RESTART_ALWAYS:
             return True
-        if self.restart_mode == RESTART_ONREBOOT:
+        if self.info['restart_mode'] == RESTART_ONREBOOT:
             return reason == 'reboot'
         return False
 
@@ -963,7 +954,7 @@
             tdelta = tnow - self.restart_time
             if tdelta < self.MINIMUM_RESTART_TIME:
                 self.restart_cancel()
-                msg = 'VM %s restarting too fast' % self.name
+                msg = 'VM %s restarting too fast' % self.info['name']
                 log.error(msg)
                 raise VmError(msg)
         self.restart_time = tnow
@@ -981,14 +972,15 @@
             self.restart_check()
             self.exportToDB()
             self.restart_state = STATE_RESTART_BOOTING
-            if self.bootloader:
-                self.config = self.bootloader_config()
-            self.construct(self.config)
-            self.saveToDB()
+            self.configure_bootloader()
+            self.construct()
+            self.exportToDB()
         finally:
             self.restart_state = None
 
-    def bootloader_config(self):
+    def configure_bootloader(self):
+        if not self.info['bootloader']:
+            return
         # if we're restarting with a bootloader, we need to run it
         # FIXME: this assumes the disk is the first device and
         # that we're booting from the first disk
@@ -998,72 +990,30 @@
         if dev:
             disk = sxp.child_value(dev, "uname")
             fn = blkdev_uname_to_file(disk)
-            blcfg = bootloader(self.bootloader, fn, 1, self.vcpus)
+            blcfg = bootloader(self.info['bootloader'], fn, 1, 
self.info['vcpus'])
         if blcfg is None:
             msg = "Had a bootloader specified, but can't find disk"
             log.error(msg)
             raise VmError(msg)
-        config = sxp.merge(['vm', blcfg ], self.config)
-        return config
-
-    def configure_backends(self):
-        """Set configuration flags if the vm is a backend for netif or blkif.
-        Configure the backends to use for vbd and vif if specified.
-        """
-        for c in sxp.children(self.config, 'backend'):
-            v = sxp.child0(c)
-            name = sxp.name(v)
-            if name == 'blkif':
-                self.blkif_backend = True
-            elif name == 'netif':
-                self.netif_backend = True
-            elif name == 'usbif':
-                self.usbif_backend = True
-            elif name == 'tpmif':
-                self.tpmif_backend = True
-            else:
-                raise VmError('invalid backend type:' + str(name))
+        self.config = sxp.merge(['vm', ['image', blcfg]], self.config)
+
 
     def configure(self):
         """Configure a vm.
 
         """
-        self.configure_fields()
+        self.configure_maxmem()
         self.create_devices()
-        self.create_blkif()
-
-    def create_blkif(self):
-        """Create the block device interface (blkif) for the vm.
-        The vm needs a blkif even if it doesn't have any disks
-        at creation time, for example when it uses NFS root.
-
-        """
-        return
-        blkif = self.getDeviceController("vbd", error=False)
-        if not blkif:
-            blkif = self.createDeviceController("vbd")
-            backend = blkif.getBackend(0)
-            backend.connect(recreate=self.recreate)
-
-    def configure_fields(self):
-        """Process the vm configuration fields using the registered handlers.
-        """
-        index = {}
-        for field in sxp.children(self.config):
-            field_name = sxp.name(field)
-            field_index = index.get(field_name, 0)
-            field_handler = config_handlers.get(field_name)
-            # Ignore unknown fields. Warn?
-            if field_handler:
-                v = field_handler(self, self.config, field, field_index)
-            else:
-                log.warning("Unknown config field %s", field_name)
-            index[field_name] = field_index + 1
+
+
+    def configure_maxmem(self):
+        xc.domain_setmaxmem(self.domid, maxmem_kb = self.info['maxmem_KiB'])
+
 
     def vcpu_hotplug(self, vcpu, state):
         """Disable or enable VCPU in domain.
         """
-        if vcpu > self.vcpus:
+        if vcpu > self.info['vcpus']:
             log.error("Invalid VCPU %d" % vcpu)
             return
         if int(state) == 0:
@@ -1109,26 +1059,29 @@
             # get run-time value of vcpus and update store
             self.configure_vcpus(dom_get(self.domid)['vcpus'])
 
-
-def vm_field_ignore(_, _1, _2, _3):
-    """Dummy config field handler used for fields with built-in handling.
-    Matches the signature required by config_handlers.
-    """
-    pass
-
-
-def vm_field_maxmem(vm, _1, val, _2):
-    """Config field handler to configure vm memory limit.  Matches the
-    signature required by config_handlers.
-    """
-    maxmem = sxp.child0(val)
-    if maxmem is None:
-        maxmem = vm.memory
-    try:
-        maxmem = int(maxmem)
-    except:
-        raise VmError("invalid maxmem: " + str(maxmem))
-    xc.domain_setmaxmem(vm.domid, maxmem_kb = maxmem * 1024)
+    def dom0_enforce_vcpus(self):
+        dom = 0
+        # get max number of vcpus to use for dom0 from config
+        from xen.xend import XendRoot
+        xroot = XendRoot.instance()
+        target = int(xroot.get_dom0_vcpus())
+        log.debug("number of vcpus to use is %d" % (target))
+   
+        # target = 0 means use all processors
+        if target > 0:
+            # count the number of online vcpus (cpu values in v2c map >= 0)
+            vcpu_to_cpu = dom_get(dom)['vcpu_to_cpu']
+            vcpus_online = len(filter(lambda x: x >= 0, vcpu_to_cpu))
+            log.debug("found %d vcpus online" % (vcpus_online))
+
+            # disable any extra vcpus that are online over the requested target
+            for vcpu in range(target, vcpus_online):
+                log.info("enforcement is disabling DOM%d VCPU%d" % (dom, vcpu))
+                self.vcpu_hotplug(vcpu, 0)
+
+
+    def infoIsSet(self, name):
+        return name in self.info and self.info[name] is not None
 
 
 #============================================================================
@@ -1144,37 +1097,32 @@
 addImageHandlerClass(VmxImageHandler)
 
 
-"""Table of handlers for field configuration.
-
-field_name[String]: fn(vm, config, field, index) -> value(ignored)
-"""
-config_handlers = {
-    
-    # Ignore the fields we already handle.
-    
-    'name':       vm_field_ignore,
-    'memory':     vm_field_ignore,
-    'ssidref':    vm_field_ignore,
-    'cpu':        vm_field_ignore,
-    'cpu_weight': vm_field_ignore,
-    'restart':    vm_field_ignore,
-    'image':      vm_field_ignore,
-    'device':     vm_field_ignore,
-    'backend':    vm_field_ignore,
-    'vcpus':      vm_field_ignore,
-    'bootloader': vm_field_ignore,
-    
-    # Register other config handlers.
-    'maxmem':     vm_field_maxmem
-    }
-
-
 #============================================================================
 # Register device controllers and their device config types.
 
+"""A map from device-class names to the subclass of DevController that
+implements the device control specific to that device-class."""
+controllerClasses = {}
+
+
+"""A map of backend names and the corresponding flag."""
+backendFlags = {}
+
+
+def addControllerClass(device_class, backend_name, backend_flag, cls):
+    """Register a subclass of DevController to handle the named device-class.
+
+    @param backend_flag One of the SIF_XYZ_BE_DOMAIN constants, or None if
+    no flag is to be set.
+    """
+    cls.deviceClass = device_class
+    backendFlags[backend_name] = backend_flag
+    controllerClasses[device_class] = cls
+
+
 from xen.xend.server import blkif, netif, tpmif, pciif, usbif
-controller.addDevControllerClass("vbd",  blkif.BlkifController)
-controller.addDevControllerClass("vif",  netif.NetifController)
-controller.addDevControllerClass("vtpm", tpmif.TPMifController)
-controller.addDevControllerClass("pci",  pciif.PciController)
-controller.addDevControllerClass("usb",  usbif.UsbifController)
+addControllerClass('vbd',  'blkif', SIF_BLK_BE_DOMAIN, blkif.BlkifController)
+addControllerClass('vif',  'netif', SIF_NET_BE_DOMAIN, netif.NetifController)
+addControllerClass('vtpm', 'tpmif', SIF_TPM_BE_DOMAIN, tpmif.TPMifController)
+addControllerClass('pci',  'pciif', None,              pciif.PciController)
+addControllerClass('usb',  'usbif', None,              usbif.UsbifController)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendLogging.py
--- a/tools/python/xen/xend/XendLogging.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendLogging.py      Thu Sep 22 17:42:01 2005
@@ -50,9 +50,6 @@
         self.getLogger().setLevel(level)
         self.level = level
 
-    def getLevel(self, level):
-        return logging.getLevelName(self.level)
-
     def getLogger(self):
         return logging.getLogger("xend")
 
@@ -65,8 +62,7 @@
                                            backupCount=self.backupCount)
         self.logfilename = filename
         self.logfile.setFormatter(Formatter(self.logFileFormat, 
self.dateFormat))
-        log = self.getLogger()
-        log.addHandler(self.logfile)
+        self.getLogger().addHandler(self.logfile)
 
     def getLogFile(self):
         return self.logfile
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendNode.py Thu Sep 22 17:42:01 2005
@@ -36,7 +36,7 @@
     def reboot(self):
         return 0
 
-    def notify(self, uri):
+    def notify(self, _):
         return 0
     
     def cpu_bvt_slice_set(self, ctx_allow):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendProtocol.py
--- a/tools/python/xen/xend/XendProtocol.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendProtocol.py     Thu Sep 22 17:42:01 2005
@@ -22,7 +22,7 @@
 from encode import *
 import sxp
 
-from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendRoot
 
 DEBUG = 0
 
@@ -30,6 +30,10 @@
 HTTP_CREATED                         = 201
 HTTP_ACCEPTED                        = 202
 HTTP_NO_CONTENT                      = 204
+
+
+xroot = XendRoot.instance()
+
 
 class XendError(RuntimeError):
     """Error class for 'expected errors' when talking to xend.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/XendRoot.py Thu Sep 22 17:42:01 2005
@@ -87,7 +87,7 @@
 
     dom0_min_mem_default = '0'
 
-    dom0_cpus_default = '0'
+    dom0_vcpus_default = '0'
 
     components = {}
 
@@ -332,8 +332,8 @@
     def get_dom0_min_mem(self):
         return self.get_config_int('dom0-min-mem', self.dom0_min_mem_default)
 
-    def get_dom0_cpus(self):
-        return self.get_config_int('dom0-cpus', self.dom0_cpus_default)
+    def get_dom0_vcpus(self):
+        return self.get_config_int('dom0-cpus', self.dom0_vcpus_default)
 
 def instance():
     """Get an instance of XendRoot.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/encode.py
--- a/tools/python/xen/xend/encode.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/encode.py   Thu Sep 22 17:42:01 2005
@@ -26,7 +26,6 @@
 from StringIO import StringIO
 
 import urllib
-import httplib
 import random
 import md5
 
@@ -104,7 +103,7 @@
     val = ({}, None)
     if d is None: return val
     multipart = 0
-    for (k, v) in data_values(d):
+    for (_, v) in data_values(d):
         if encode_isfile(v):
             multipart = 1
             break
@@ -156,7 +155,7 @@
 def mime_boundary():
     random.seed()
     m = md5.new()
-    for i in range(0, 10):
+    for _ in range(0, 10):
         c = chr(random.randint(1, 255))
         m.update(c)
     b = m.hexdigest()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/image.py    Thu Sep 22 17:42:01 2005
@@ -13,34 +13,29 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
+
 
 import os, string
 import re
 
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+import xen.lowlevel.xc
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 from xen.xend.XendLogging import log
-from xen.xend.xenstore import DBVar
-from xen.xend.xenstore.xstransact import xstransact
 
 from xen.xend.server import channel
 
-"""Flag for a block device backend domain."""
-SIF_BLK_BE_DOMAIN = (1<<4)
-
-"""Flag for a net device backend domain."""
-SIF_NET_BE_DOMAIN = (1<<5)
-
-"""Flag for a TPM device backend domain."""
-SIF_TPM_BE_DOMAIN = (1<<7)
+
+xc = xen.lowlevel.xc.new()
+
+
+MAX_GUEST_CMDLINE = 1024
 
 class ImageHandler:
     """Abstract base class for image handlers.
 
-    initDomain() is called to initialise the domain memory.
-    
     createImage() is called to configure and build the domain from its
     kernel image and ramdisk etc.
 
@@ -88,49 +83,57 @@
 
     findImageHandlerClass = classmethod(findImageHandlerClass)
 
-    def create(cls, vm, image):
+    def create(cls, vm, imageConfig, deviceConfig):
         """Create an image handler for a vm.
 
-        @param vm vm
-        @param image image config
         @return ImageHandler instance
         """
-        imageClass = cls.findImageHandlerClass(image)
-        return imageClass(vm, image)
+        imageClass = cls.findImageHandlerClass(imageConfig)
+        return imageClass(vm, imageConfig, deviceConfig)
 
     create = classmethod(create)
 
     #======================================================================
     # Instance vars and methods.
 
-    db = None
     ostype = None
 
-    config = None
     kernel = None
     ramdisk = None
     cmdline = None
+
     flags = 0
 
-    __exports__ = [
-        DBVar('ostype',  ty='str'),
-        DBVar('config',  ty='sxpr'),
-        DBVar('kernel',  ty='str'),
-        DBVar('ramdisk', ty='str'),
-        DBVar('cmdline', ty='str'),
-        DBVar('flags',   ty='int'),
-        ]
-
-    def __init__(self, vm, config):
+    def __init__(self, vm, imageConfig, deviceConfig):
         self.vm = vm
-        self.db = vm.db.addChild('/image')
-        self.config = config
-
-    def exportToDB(self, save=False, sync=False):
-        self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
+        self.configure(imageConfig, deviceConfig)
+
+    def configure(self, imageConfig, _):
+        """Config actions common to all unix-like domains."""
+
+        self.kernel = sxp.child_value(imageConfig, "kernel")
+        self.cmdline = ""
+        ip = sxp.child_value(imageConfig, "ip", None)
+        if ip:
+            self.cmdline += " ip=" + ip
+        root = sxp.child_value(imageConfig, "root")
+        if root:
+            self.cmdline += " root=" + root
+        args = sxp.child_value(imageConfig, "args")
+        if args:
+            self.cmdline += " " + args
+        self.ramdisk = sxp.child_value(imageConfig, "ramdisk", '')
+        
+        self.vm.storeVm(("image/ostype", self.ostype),
+                        ("image/kernel", self.kernel),
+                        ("image/cmdline", self.cmdline),
+                        ("image/ramdisk", self.ramdisk))
+
+
+    def handleBootloading():
+        self.unlink(self.kernel)
+        self.unlink(self.ramdisk)
+
 
     def unlink(self, f):
         if not f: return
@@ -139,94 +142,39 @@
         except OSError, ex:
             log.warning("error removing bootloader file '%s': %s", f, ex)
 
-    def initDomain(self, dom, memory, ssidref, cpu, cpu_weight):
-        """Initial domain create.
-
-        @return domain id
-        """
-
-        mem_kb = self.getDomainMemory(memory)
-        if not self.vm.restore:
-            dom = xc.domain_create(dom = dom or 0, ssidref = ssidref)
-            # if bootloader, unlink here. But should go after buildDomain() ?
-            if self.vm.bootloader:
-                self.unlink(self.kernel)
-                self.unlink(self.ramdisk)
-            if dom <= 0:
-                raise VmError('Creating domain failed: name=%s' % self.vm.name)
-        log.debug("initDomain: cpu=%d mem_kb=%d ssidref=%d dom=%d", cpu, 
mem_kb, ssidref, dom)
-        xc.domain_setcpuweight(dom, cpu_weight)
-        xc.domain_setmaxmem(dom, mem_kb)
-
-        try:
-            # Give the domain some memory below 4GB
-            lmem_kb = 0
-            if lmem_kb > 0:
-                xc.domain_memory_increase_reservation(dom, 
min(lmem_kb,mem_kb), 0, 32)
-            if mem_kb > lmem_kb:
-                xc.domain_memory_increase_reservation(dom, mem_kb-lmem_kb, 0, 
0)
-        except:
-            xc.domain_destroy(dom)
-            raise
-
-        if cpu != -1:
-            xc.domain_pincpu(dom, 0, 1<<int(cpu))
-        return dom
 
     def createImage(self):
         """Entry point to create domain memory image.
         Override in subclass  if needed.
         """
-        self.configure()
         self.createDomain()
 
-    def configure(self):
-        """Config actions common to all unix-like domains."""
-        self.kernel = sxp.child_value(self.config, "kernel")
-        self.cmdline = ""
-        ip = sxp.child_value(self.config, "ip", None)
-        if ip:
-            self.cmdline += " ip=" + ip
-        root = sxp.child_value(self.config, "root")
-        if root:
-            self.cmdline += " root=" + root
-        args = sxp.child_value(self.config, "args")
-        if args:
-            self.cmdline += " " + args
-        self.ramdisk = sxp.child_value(self.config, "ramdisk", '')
-        
     def createDomain(self):
         """Build the domain boot image.
         """
         # Set params and call buildDomain().
-        self.flags = 0
-        if self.vm.netif_backend: self.flags |= SIF_NET_BE_DOMAIN
-        if self.vm.blkif_backend: self.flags |= SIF_BLK_BE_DOMAIN
-        if self.vm.tpmif_backend: self.flags |= SIF_TPM_BE_DOMAIN
-
-        if self.vm.recreate or self.vm.restore:
-            return
+        self.flags = self.vm.getBackendFlags()
+
         if not os.path.isfile(self.kernel):
             raise VmError('Kernel image does not exist: %s' % self.kernel)
         if self.ramdisk and not os.path.isfile(self.ramdisk):
             raise VmError('Kernel ramdisk does not exist: %s' % self.ramdisk)
-        if len(self.cmdline) >= 256:
-            log.warning('kernel cmdline too long, domain %d', 
self.vm.getDomain())
+        if len(self.cmdline) >= MAX_GUEST_CMDLINE:
+            log.warning('kernel cmdline too long, domain %d',
+                        self.vm.getDomid())
         
         log.info("buildDomain os=%s dom=%d vcpus=%d", self.ostype,
-                 self.vm.getDomain(), self.vm.vcpus)
+                 self.vm.getDomid(), self.vm.getVCpuCount())
         err = self.buildDomain()
         if err != 0:
             raise VmError('Building domain failed: ostype=%s dom=%d err=%d'
-                          % (self.ostype, self.vm.getDomain(), err))
-
-    def getDomainMemory(self, mem_mb):
-        """Memory (in KB) the domain will need for mem_mb (in MB)."""
-        if os.uname()[4] == 'ia64':
-           """Append extra system pages, like xenstore and console"""
-           return (mem_mb * 1024 + 3 * 16)
-       else:
-            return mem_mb * 1024
+                          % (self.ostype, self.vm.getDomid(), err))
+
+    def getDomainMemory(self, mem):
+        """@return The memory required, in KiB, by the domain to store the
+        given amount, also in KiB.  This is normally just mem, but VMX domains
+        have overheads to account for."""
+        return mem
 
     def buildDomain(self):
         """Build the domain. Define in subclass."""
@@ -262,23 +210,23 @@
         else:
             console_evtchn = 0
 
-        log.debug("dom            = %d", self.vm.getDomain())
+        log.debug("dom            = %d", self.vm.getDomid())
         log.debug("image          = %s", self.kernel)
         log.debug("store_evtchn   = %d", store_evtchn)
         log.debug("console_evtchn = %d", console_evtchn)
         log.debug("cmdline        = %s", self.cmdline)
         log.debug("ramdisk        = %s", self.ramdisk)
         log.debug("flags          = %d", self.flags)
-        log.debug("vcpus          = %d", self.vm.vcpus)
-
-        ret = xc.linux_build(dom            = self.vm.getDomain(),
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+
+        ret = xc.linux_build(dom            = self.vm.getDomid(),
                              image          = self.kernel,
                              store_evtchn   = store_evtchn,
                              console_evtchn = console_evtchn,
                              cmdline        = self.cmdline,
                              ramdisk        = self.ramdisk,
                              flags          = self.flags,
-                             vcpus          = self.vm.vcpus)
+                             vcpus          = self.vm.getVCpuCount())
         if isinstance(ret, dict):
             self.set_vminfo(ret)
             return 0
@@ -286,49 +234,72 @@
 
 class VmxImageHandler(ImageHandler):
 
-    __exports__ = ImageHandler.__exports__ + [
-        DBVar('memmap',        ty='str'),
-        DBVar('memmap_value',  ty='sxpr'),
-        # device channel?
-        ]
-    
     ostype = "vmx"
-    memmap = None
-    memmap_value = []
-    device_channel = None
-    pid = 0
+
+    def configure(self, imageConfig, deviceConfig):
+        ImageHandler.configure(self, imageConfig, deviceConfig)
+        
+        self.memmap = sxp.child_value(imageConfig, 'memmap')
+        self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
+        self.device_model = sxp.child_value(imageConfig, 'device_model')
+        if not self.device_model:
+            raise VmError("vmx: missing device model")
+        self.display = sxp.child_value(imageConfig, 'display')
+
+        self.vm.storeVm(("image/memmap", self.memmap),
+                        ("image/dmargs", " ".join(self.dmargs)),
+                        ("image/device-model", self.device_model),
+                        ("image/display", self.display))
+
+        self.device_channel = None
+        self.pid = 0
+        self.memmap_value = []
+
+        self.dmargs += self.configVNC(imageConfig)
+
+
     def createImage(self):
         """Create a VM for the VMX environment.
         """
-        self.configure()
         self.parseMemmap()
         self.createDomain()
 
     def buildDomain(self):
         # Create an event channel
-        self.device_channel = channel.eventChannel(0, self.vm.getDomain())
+        self.device_channel = channel.eventChannel(0, self.vm.getDomid())
         log.info("VMX device model port: %d", self.device_channel.port2)
         if self.vm.store_channel:
             store_evtchn = self.vm.store_channel.port2
         else:
             store_evtchn = 0
-        ret = xc.vmx_build(dom            = self.vm.getDomain(),
-                            image          = self.kernel,
-                            control_evtchn = self.device_channel.port2,
-                            store_evtchn   = store_evtchn,
-                            memsize        = self.vm.memory,
-                            memmap         = self.memmap_value,
-                            cmdline        = self.cmdline,
-                            ramdisk        = self.ramdisk,
-                            flags          = self.flags,
-                            vcpus          = self.vm.vcpus)
+
+        log.debug("dom            = %d", self.vm.getDomid())
+        log.debug("image          = %s", self.kernel)
+        log.debug("control_evtchn = %d", self.device_channel.port2)
+        log.debug("store_evtchn   = %d", store_evtchn)
+        log.debug("memsize        = %d", self.vm.getMemoryTarget() / 1024)
+        log.debug("memmap         = %s", self.memmap_value)
+        log.debug("cmdline        = %s", self.cmdline)
+        log.debug("ramdisk        = %s", self.ramdisk)
+        log.debug("flags          = %d", self.flags)
+        log.debug("vcpus          = %d", self.vm.getVCpuCount())
+
+        ret = xc.vmx_build(dom            = self.vm.getDomid(),
+                           image          = self.kernel,
+                           control_evtchn = self.device_channel.port2,
+                           store_evtchn   = store_evtchn,
+                           memsize        = self.vm.getMemoryTarget() / 1024,
+                           memmap         = self.memmap_value,
+                           cmdline        = self.cmdline,
+                           ramdisk        = self.ramdisk,
+                           flags          = self.flags,
+                           vcpus          = self.vm.getVCpuCount())
         if isinstance(ret, dict):
             self.set_vminfo(ret)
             return 0
         return ret
 
     def parseMemmap(self):
-        self.memmap = sxp.child_value(self.vm.config, "memmap")
         if self.memmap is None:
             return
         memmap = sxp.parse(open(self.memmap))[0]
@@ -337,12 +308,12 @@
         
     # Return a list of cmd line args to the device models based on the
     # xm config file
-    def parseDeviceModelArgs(self):
-       dmargs = [ 'cdrom', 'boot', 'fda', 'fdb',
-                   'localtime', 'serial', 'stdvga', 'isa' ] 
-       ret = []
-       for a in dmargs:
-                   v = sxp.child_value(self.vm.config, a)
+    def parseDeviceModelArgs(self, imageConfig, deviceConfig):
+        dmargs = [ 'cdrom', 'boot', 'fda', 'fdb',
+                   'localtime', 'serial', 'stdvga', 'isa', 'vcpus' ] 
+        ret = []
+        for a in dmargs:
+            v = sxp.child_value(imageConfig, a)
 
             # python doesn't allow '-' in variable names
             if a == 'stdvga': a = 'std-vga'
@@ -351,20 +322,17 @@
             if a in ['localtime', 'std-vga', 'isa']:
                 if v != None: v = int(v)
 
-           log.debug("args: %s, val: %s" % (a,v))
-           if v: 
-               ret.append("-%s" % a)
-               ret.append("%s" % v)
+            log.debug("args: %s, val: %s" % (a,v))
+            if v: 
+                ret.append("-%s" % a)
+                ret.append("%s" % v)
 
         # Handle disk/network related options
-        devices = sxp.children(self.vm.config, 'device')
-        for device in devices:
-            name = sxp.name(sxp.child0(device))
+        for (name, info) in deviceConfig:
             if name == 'vbd':
-               vbdinfo = sxp.child(device, 'vbd')
-               uname = sxp.child_value(vbdinfo, 'uname')
-               typedev = sxp.child_value(vbdinfo, 'dev')
-               (vbdtype, vbdparam) = string.split(uname, ':', 1)
+               uname = sxp.child_value(info, 'uname')
+               typedev = sxp.child_value(info, 'dev')
+               (_, vbdparam) = string.split(uname, ':', 1)
                if re.match('^ioemu:', typedev):
                   (emtype, vbddev) = string.split(typedev, ':', 1)
                else:
@@ -378,61 +346,59 @@
                ret.append("-%s" % vbddev)
                ret.append("%s" % vbdparam)
             if name == 'vif':
-               vifinfo = sxp.child(device, 'vif')
-               mac = sxp.child_value(vifinfo, 'mac')
+               mac = sxp.child_value(info, 'mac')
                ret.append("-macaddr")
                ret.append("%s" % mac)
             if name == 'vtpm':
-               vtpminfo = sxp.child(device, 'vtpm')
-               instance = sxp.child_value(vtpminfo, 'instance')
+               instance = sxp.child_value(info, 'instance')
                ret.append("-instance")
                ret.append("%s" % instance)
-
-       # Handle graphics library related options
-       vnc = sxp.child_value(self.vm.config, 'vnc')
-       sdl = sxp.child_value(self.vm.config, 'sdl')
-       nographic = sxp.child_value(self.vm.config, 'nographic')
-       if nographic:
-           ret.append('-nographic')
-           return ret
-       
-       if vnc and sdl:
-           ret = ret + ['-vnc-and-sdl', '-k', 'en-us']
-       elif vnc:
-           ret = ret + ['-vnc', '-k', 'en-us']
-       if vnc:
-           vncport = int(self.vm.getDomain()) + 5900
-           ret = ret + ['-vncport', '%d' % vncport]
-       return ret
-                 
+        return ret
+
+    def configVNC(self, config):
+        # Handle graphics library related options
+        vnc = sxp.child_value(config, 'vnc')
+        sdl = sxp.child_value(config, 'sdl')
+        ret = []
+        nographic = sxp.child_value(config, 'nographic')
+        if nographic:
+            ret.append('-nographic')
+            return ret
+
+        if vnc and sdl:
+            ret = ret + ['-vnc-and-sdl', '-k', 'en-us']
+        elif vnc:
+            ret = ret + ['-vnc', '-k', 'en-us']
+        if vnc:
+            vncport = int(self.vm.getDomid()) + 5900
+            ret = ret + ['-vncport', '%d' % vncport]
+        return ret
+
     def createDeviceModel(self):
-        device_model = sxp.child_value(self.vm.config, 'device_model')
-        if not device_model:
-            raise VmError("vmx: missing device model")
+        if self.pid:
+            return
         # Execute device model.
         #todo: Error handling
         # XXX RN: note that the order of args matter!
-        args = [device_model]
+        args = [self.device_model]
         vnc = self.vncParams()
         if len(vnc):
             args = args + vnc
-        args = args + ([ "-d",  "%d" % self.vm.getDomain(),
+        args = args + ([ "-d",  "%d" % self.vm.getDomid(),
                   "-p", "%d" % self.device_channel.port1,
-                  "-m", "%s" % self.vm.memory ])
-       args = args + self.parseDeviceModelArgs()
+                  "-m", "%s" % (self.vm.getMemoryTarget() / 1024)])
+        args = args + self.dmargs
         env = dict(os.environ)
-        env['DISPLAY'] = sxp.child_value(self.vm.config, 'display')
-        log.info("spawning device models: %s %s", device_model, args)
-        self.pid = os.spawnve(os.P_NOWAIT, device_model, args, env)
+        env['DISPLAY'] = self.display
+        log.info("spawning device models: %s %s", self.device_model, args)
+        self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
         log.info("device model pid: %d", self.pid)
-        return self.pid
 
     def vncParams(self):
         # see if a vncviewer was specified
         # XXX RN: bit of a hack. should unify this, maybe stick in config space
         vncconnect=[]
-        image = self.config
-        args = sxp.child_value(image, "args")
+        args = self.cmdline
         if args:
             arg_list = string.split(args)
             for arg in arg_list:
@@ -446,15 +412,16 @@
         channel.eventChannelClose(self.device_channel)
         import signal
         if not self.pid:
-            self.pid = self.vm.device_model_pid
+            return
         os.kill(self.pid, signal.SIGKILL)
-        (pid, status) = os.waitpid(self.pid, 0)
+        os.waitpid(self.pid, 0)
         self.pid = 0
 
-    def getDomainMemory(self, mem_mb):
+    def getDomainMemory(self, mem):
+        """@see ImageHandler.getDomainMemory"""
         # for ioreq_t and xenstore
         static_pages = 2
-        return (mem_mb * 1024) + self.getPageTableSize(mem_mb) + 4 * 
static_pages
+        return mem + self.getPageTableSize(mem / 1024) + 4 * static_pages
             
     def getPageTableSize(self, mem_mb):
         """Return the size of memory needed for 1:1 page tables for physical
@@ -466,8 +433,9 @@
         # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
         if os.uname()[4] == 'x86_64':
             return (5 + ((mem_mb + 1) >> 1)) * 4
-       elif os.uname()[4] == 'ia64':
-           # XEN/IA64 has p2m table allocated on demand, so only return guest 
firmware size here.
-           return 16 * 1024
+        elif os.uname()[4] == 'ia64':
+            # XEN/IA64 has p2m table allocated on demand, so only return
+            # guest firmware size here.
+            return 16 * 1024
         else:
             return (1 + ((mem_mb + 3) >> 2)) * 4
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/scheduler.py
--- a/tools/python/xen/xend/scheduler.py        Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/scheduler.py        Thu Sep 22 17:42:01 2005
@@ -13,11 +13,12 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
 import threading
 
-def later(delay, fn, args=(), kwargs={}):
+def later(delay, fn, *args, **kwargs):
     """Schedule a function to be called later.
 
     @param delay:  delay in seconds
@@ -29,7 +30,7 @@
     timer.start()
     return timer
 
-def now(fn, args=(), kwargs={}):
+def now(fn, *args, **kwargs):
     """Schedule a function to be called now.
 
     @param fn:     function
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvDaemon.py Thu Sep 22 17:42:01 2005
@@ -25,7 +25,6 @@
 from xen.xend.XendLogging import log
 from xen.xend import XendRoot; xroot = XendRoot.instance()
 
-import controller
 import event
 import relocate
 from params import *
@@ -137,13 +136,6 @@
         else:
             return 0
 
-    def onSIGCHLD(self, signum, frame):
-        if self.child > 0: 
-            try: 
-                pid, sts = os.waitpid(self.child, os.WNOHANG)
-            except os.error, ex:
-                pass
-
     def fork_pid(self, pidfile):
         """Fork and write the pid of the child to 'pidfile'.
 
@@ -200,15 +192,29 @@
             # Trying to run an already-running service is a success.
             return 0
 
-        signal.signal(signal.SIGCHLD, self.onSIGCHLD)
+        ret = 0
+
+        # we use a pipe to communicate between the parent and the child process
+        # this way we know when the child has actually initialized itself so
+        # we can avoid a race condition during startup
+        
+        r,w = os.pipe()
         if self.fork_pid(XEND_PID_FILE):
-            #Parent. Sleep to give child time to start.
-            time.sleep(1)
+            os.close(w)
+            r = os.fdopen(r, 'r')
+            s = r.read()
+            r.close()
+            if not len(s):
+                ret = 1
+            else:
+                ret = int(s)
         else:
+            os.close(r)
             # Child
             self.tracing(trace)
-            self.run()
-        return 0
+            self.run(os.fdopen(w, 'w'))
+
+        return ret
 
     def tracing(self, traceon):
         """Turn tracing on or off.
@@ -290,20 +296,21 @@
     def stop(self):
         return self.cleanup(kill=True)
 
-    def run(self):
-        _enforce_dom0_cpus()
+    def run(self, status):
         try:
             log.info("Xend Daemon started")
             event.listenEvent(self)
             relocate.listenRelocation()
             servers = SrvServer.create()
             self.daemonize()
-            servers.start()
+            servers.start(status)
         except Exception, ex:
             print >>sys.stderr, 'Exception starting xend:', ex
             if XEND_DEBUG:
                 traceback.print_exc()
             log.exception("Exception starting xend (%s)" % ex)
+            status.write('1')
+            status.close()
             self.exit(1)
             
     def exit(self, rc=0):
@@ -314,32 +321,6 @@
         #sys.exit(rc)
         os._exit(rc)
 
-def _enforce_dom0_cpus():
-    dn = xroot.get_dom0_cpus()
-
-    for d in glob.glob("/sys/devices/system/cpu/cpu*"):
-        cpu = int(os.path.basename(d)[3:])
-        if (dn == 0) or (cpu < dn):
-            v = "1"
-        else:
-            v = "0"
-        try:
-            f = open("%s/online" %d, "r+")
-            c = f.read(1)
-            if (c != v):
-                if v == "0":
-                    log.info("dom0 is trying to give back cpu %d", cpu)
-                else:
-                    log.info("dom0 is trying to take cpu %d", cpu)
-                f.seek(0)
-                f.write(v)
-                f.close()
-                log.info("dom0 successfully enforced cpu %d", cpu)
-            else:
-                f.close()
-        except:
-            pass
-
 def instance():
     global inst
     try:
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/python/xen/xend/server/SrvDomainDir.py
--- a/tools/python/xen/xend/server/SrvDomainDir.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvDomainDir.py      Thu Sep 22 17:42:01 2005
@@ -85,7 +85,7 @@
     def _op_create_cb(self, dominfo, configstring, req):
         """Callback to handle domain creation.
         """
-        dom = dominfo.name
+        dom = dominfo.getName()
         domurl = "%s/%s" % (req.prePathURL(), dom)
         req.setResponseCode(http.CREATED, "created")
         req.setHeader("Location", domurl)
@@ -112,7 +112,7 @@
         fn = FormFn(self.xd.domain_restore,
                     [['file', 'str']])
         dominfo = fn(req.args)
-        dom = dominfo.name
+        dom = dominfo.getName()
         domurl = "%s/%s" % (req.prePathURL(), dom)
         req.setResponseCode(http.CREATED)
         req.setHeader("Location", domurl)
@@ -152,12 +152,12 @@
             domains = self.xd.list_sorted()
             req.write('<ul>')
             for d in domains:
-               req.write('<li><a href="%s%s"> Domain %s</a>'
-                         % (url, d.name, d.name))
-               req.write('id=%s' % d.domid)
-               req.write('memory=%d'% d.memory)
-               req.write('ssidref=%d'% d.ssidref)
-               req.write('</li>')
+                req.write('<li><a href="%s%s"> Domain %s</a>'
+                          % (url, d.getName(), d.getName()))
+                req.write('id=%s' % d.getDomain())
+                req.write('memory=%d'% d.getMemoryTarget())
+                req.write('ssidref=%d'% d.getSsidref())
+                req.write('</li>')
             req.write('</ul>')
 
     def form(self, req):
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/SrvNode.py
--- a/tools/python/xen/xend/server/SrvNode.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvNode.py   Thu Sep 22 17:42:01 2005
@@ -15,7 +15,6 @@
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
 #============================================================================
 
-import os
 
 from xen.web.SrvDir import SrvDir
 from xen.xend import sxp
@@ -32,15 +31,15 @@
         self.add('dmesg', 'SrvDmesg')
         self.add('log', 'SrvXendLog')
 
-    def op_shutdown(self, op, req):
+    def op_shutdown(self, _1, _2):
         val = self.xn.shutdown()
         return val
 
-    def op_reboot(self, op, req):
+    def op_reboot(self, _1, _2):
         val = self.xn.reboot()
         return val
 
-    def op_cpu_bvt_slice_set(self, op, req):
+    def op_cpu_bvt_slice_set(self, _, req):
         fn = FormFn(self.xn.cpu_bvt_slice_set,
                     [['ctx_allow', 'int']])
         val = fn(req.args, {})
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/SrvServer.py Thu Sep 22 17:42:01 2005
@@ -44,12 +44,17 @@
 
 from xen.web.httpserver import HttpServer, UnixHttpServer
 
-from xen.xend import XendRoot; xroot = XendRoot.instance()
+from xen.xend import XendRoot
 from xen.xend import Vifctl
 from xen.xend.XendLogging import log
 from xen.web.SrvDir import SrvDir
+import time
 
 from SrvRoot import SrvRoot
+
+
+xroot = XendRoot.instance()
+
 
 class XendServers:
 
@@ -59,13 +64,32 @@
     def add(self, server):
         self.servers.append(server)
 
-    def start(self):
+    def start(self, status):
         Vifctl.network('start')
         threads = []
         for server in self.servers:
             thread = Thread(target=server.run)
             thread.start()
             threads.append(thread)
+
+
+        # check for when all threads have initialized themselves and then
+        # close the status pipe
+
+        threads_left = True
+        while threads_left:
+            threads_left = False
+
+            for server in self.servers:
+                if not server.ready:
+                    threads_left = True
+                    break
+
+            if threads_left:
+                time.sleep(.5)
+
+        status.write('0')
+        status.close()
 
         for t in threads:
             t.join()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/blkif.py     Thu Sep 22 17:42:01 2005
@@ -13,322 +13,47 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
 
-"""Support for virtual block devices.
-"""
+
+import re
 import string
 
 from xen.util import blkif
-from xen.xend.XendError import XendError, VmError
-from xen.xend.XendRoot import get_component
-from xen.xend.XendLogging import log
 from xen.xend import sxp
-from xen.xend import Blkctl
-from xen.xend.xenstore import DBVar
 
-from xen.xend.server.controller import Dev, DevController
+from xen.xend.server.DevController import DevController
 
-class BlkifBackend:
-    """ Handler for the 'back-end' channel to a block device driver domain
-    on behalf of a front-end domain.
-    Must be connected using connect() before it can be used.
-    """
 
-    def __init__(self, controller, id, dom, recreate=False):
-        self.controller = controller
-        self.id = id
-        self.frontendDomain = self.controller.getDomain()
-        self.backendDomain = dom
-        self.destroyed = False
-        self.connected = False
-        self.status = None
-
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
-        self.frontendDomain = self.controller.getDomain()
-
-    def __str__(self):
-        return ('<BlkifBackend frontend=%d backend=%d id=%d>'
-                % (self.frontendDomain,
-                   self.backendDomain,
-                   self.id))
-
-    def getId(self):
-        return self.id
-
-    def connect(self, recreate=False):
-        """Connect to the blkif control interface.
-
-        @param recreate: true if after xend restart
-        """
-        log.debug("Connecting blkif %s", str(self))
-        if recreate or self.connected:
-            self.connected = True
-            pass
-        
-    def destroy(self, change=False, reboot=False):
-        """Disconnect from the blkif control interface and destroy it.
-        """
-        self.destroyed = True
-        # For change true need to notify front-end, or back-end will do it?
-
-    def connectInterface(self, val):
-        self.status = BLKIF_INTERFACE_STATUS_CONNECTED
-            
-    def interfaceDisconnected(self):
-        self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED
-        
-class BlkDev(Dev):
-    """Info record for a block device.
-    """
-
-    __exports__ = Dev.__exports__ + [
-        DBVar('dev',          ty='str'),
-        DBVar('vdev',         ty='int'),
-        DBVar('mode',         ty='str'),
-        DBVar('viftype',      ty='str'),
-        DBVar('params',       ty='str'),
-        DBVar('node',         ty='str'),
-        DBVar('device',       ty='long'),
-        DBVar('dev_handle',   ty='long'),
-        DBVar('start_sector', ty='long'),
-        DBVar('nr_sectors',   ty='long'),
-        ]
-
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        self.dev = None
-        self.uname = None
-        self.vdev = None
-        self.mode = None
-        self.type = None
-        self.params = None
-        self.node = None
-        self.device = None
-        self.dev_handle = 0
-        self.start_sector = None
-        self.nr_sectors = None
-        
-        self.frontendDomain = self.getDomain()
-        self.backendDomain = None
-        self.backendId = 0
-        self.configure(self.config, recreate=recreate)
-
-    def exportToDB(self, save=False):
-        Dev.exportToDB(self, save=save)
-        backend = self.getBackend()
-
-    def init(self, recreate=False, reboot=False):
-        self.frontendDomain = self.getDomain()
-        backend = self.getBackend()
-        self.backendId = backend.domid
-
-    def configure(self, config, change=False, recreate=False):
-        if change:
-            raise XendError("cannot reconfigure vbd")
-        self.config = config
-        self.uname = sxp.child_value(config, 'uname')
-        if not self.uname:
-            raise VmError('vbd: Missing uname')
-        # Split into type and type-specific params (which are passed to the
-        # type-specific control script).
-        (self.type, self.params) = string.split(self.uname, ':', 1)
-        self.dev = sxp.child_value(config, 'dev')
-        if not self.dev:
-            raise VmError('vbd: Missing dev')
-        self.mode = sxp.child_value(config, 'mode', 'r')
-        
-        self.vdev = blkif.blkdev_name_to_number(self.dev)
-        if not self.vdev:
-            raise VmError('vbd: Device not found: %s' % self.dev)
-        
-        try:
-            xd = get_component('xen.xend.XendDomain')
-            self.backendDomain = 
xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).domid
-        except:
-            raise XendError('invalid backend domain')
-
-        return self.config
-
-    def attach(self, recreate=False, change=False):
-        if recreate:
-            pass
-        else:
-            node = Blkctl.block('bind', self.type, self.params)
-            self.setNode(node)
-            self.attachBackend()
-        if change:
-            self.interfaceChanged()
-
-    def unbind(self):
-        if self.node is None: return
-        log.debug("Unbinding vbd (type %s) from %s"
-                  % (self.type, self.node))
-        Blkctl.block('unbind', self.type, self.node)
-
-    def setNode(self, node):
-    
-        # NOTE: 
-        # This clause is testing code for storage system experiments.
-        # Add a new disk type that will just pass an opaque id in the
-        # dev_handle and use an experimental device type.
-        # Please contact andrew.warfield@xxxxxxxxxxxx with any concerns.
-        if self.type == 'parallax':
-            self.node   = node
-            self.device =  61440 # (240,0)
-            self.dev_handle = long(self.params)
-            self.nr_sectors = long(0)
-            return
-        # done.
-            
-        mounted_mode = self.check_mounted(node)
-        if not '!' in self.mode and mounted_mode:
-            if mounted_mode == "w":
-                raise VmError("vbd: Segment %s is in writable use" %
-                              self.uname)
-            elif 'w' in self.mode:
-                raise VmError("vbd: Segment %s is in read-only use" %
-                              self.uname)
-            
-        segment = blkif.blkdev_segment(node)
-        if not segment:
-            raise VmError("vbd: Segment not found: uname=%s" % self.uname)
-        self.node = node
-        self.device = segment['device']
-        self.start_sector = segment['start_sector']
-        self.nr_sectors = segment['nr_sectors']
-
-    def check_mounted(self, name):
-        mode = blkif.mount_mode(name)
-        xd = get_component('xen.xend.XendDomain')
-        for vm in xd.list():
-            ctrl = vm.getDeviceController(self.getType(), error=False)
-            if (not ctrl): continue
-            for dev in ctrl.getDevices():
-                if dev is self: continue
-                if dev.type == 'phy' and name == 
blkif.expand_dev_name(dev.params):
-                    mode = dev.mode
-                    if 'w' in mode:
-                        return 'w'
-        if mode and 'r' in mode:
-            return 'r'
-        return None
-
-    def readonly(self):
-        return 'w' not in self.mode
-
-    def sxpr(self):
-        val = ['vbd',
-               ['id', self.id],
-               ['vdev', self.vdev],
-               ['device', self.device],
-               ['mode', self.mode]]
-        if self.dev:
-            val.append(['dev', self.dev])
-        if self.uname:
-            val.append(['uname', self.uname])
-        if self.node:
-            val.append(['node', self.node])
-        return val
-
-    def getBackend(self):
-        return self.controller.getBackend(self.backendDomain)
-
-    def refresh(self):
-        log.debug("Refreshing vbd domain=%d id=%s", self.frontendDomain,
-                  self.id)
-        self.interfaceChanged()
-
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device. If 'change' is true notify the front-end 
interface.
-
-        @param change: change flag
-        """
-        self.destroyed = True
-        log.debug("Destroying vbd domain=%d id=%s", self.frontendDomain,
-                  self.id)
-        if change:
-            self.interfaceChanged()
-        self.unbind()
-
-    def interfaceChanged(self):
-        """Tell the back-end to notify the front-end that a device has been
-        added or removed.
-        """
-        self.getBackend().interfaceChanged()
-
-    def attachBackend(self):
-        """Attach the device to its controller.
-
-        """
-        self.getBackend().connect()
-        
 class BlkifController(DevController):
     """Block device interface controller. Handles all block devices
     for a domain.
     """
     
-    def __init__(self, vm, recreate=False):
+    def __init__(self, vm):
         """Create a block device controller.
         """
-        DevController.__init__(self, vm, recreate=recreate)
-        self.backends = {}
-        self.backendId = 0
+        DevController.__init__(self, vm)
 
-    def initController(self, recreate=False, reboot=False):
-        self.destroyed = False
-        if reboot:
-            self.rebootBackends()
-            self.rebootDevices()
 
-    def sxpr(self):
-        val = ['blkif', ['dom', self.getDomain()]]
-        return val
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
+        
+        typedev = sxp.child_value(config, 'dev')
+        if re.match('^ioemu:', typedev):
+            return (0,{},{})
 
-    def rebootBackends(self):
-        for backend in self.backends.values():
-            backend.init(reboot=True)
+        devid = blkif.blkdev_name_to_number(sxp.child_value(config, 'dev'))
 
-    def getBackendById(self, id):
-        return self.backends.get(id)
+        (typ, params) = string.split(sxp.child_value(config, 'uname'), ':', 1)
+        back = { 'type' : typ,
+                 'params' : params
+                 }
 
-    def getBackendByDomain(self, dom):
-        for backend in self.backends.values():
-            if backend.backendDomain == dom:
-                return backend
-        return None
+        if 'r' == sxp.child_value(config, 'mode', 'r'):
+            back['read-only'] = ""  # existence indicates read-only
 
-    def getBackend(self, dom):
-        backend = self.getBackendByDomain(dom)
-        if backend: return backend
-        backend = BlkifBackend(self, self.backendId, dom)
-        self.backendId += 1
-        self.backends[backend.getId()] = backend
-        backend.init()
-        return backend
+        front = { 'virtual-device' : "%i" % devid }
 
-    def newDevice(self, id, config, recreate=False):
-        """Create a device..
-
-        @param id:      device id
-        @param config:   device configuration
-        @param recreate: if true it's being recreated (after xend restart)
-        @type  recreate: bool
-        @return: device
-        @rtype:  BlkDev
-        """
-        return BlkDev(self, id, config, recreate=recreate)
-        
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        log.debug("Destroying blkif domain=%d", self.getDomain())
-        self.destroyDevices(reboot=reboot)
-        self.destroyBackends(reboot=reboot)
-
-    def destroyBackends(self, reboot=False):
-        for backend in self.backends.values():
-            backend.destroy(reboot=reboot)
+        return (devid, back, front)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/channel.py
--- a/tools/python/xen/xend/server/channel.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/channel.py   Thu Sep 22 17:42:01 2005
@@ -43,33 +43,6 @@
 
     interdomain = classmethod(interdomain)
 
-    def restoreFromDB(cls, db, dom1, dom2, port1=0, port2=0):
-        """Create an event channel using db info if available.
-        Inverse to saveToDB().
-
-        @param db db
-        @param dom1
-        @param dom2
-        @param port1
-        @param port2
-        """
-        try:
-            dom1  = int(db['dom1'].getData())
-        except: pass
-        try:
-            dom2  = int(db['dom2'].getData())
-        except: pass
-        try:
-            port1 = int(db['port1'].getData())
-        except: pass
-        try:
-            port2 = int(db['port2'].getData())
-        except: pass
-        evtchn = cls.interdomain(dom1, dom2, port1=port1, port2=port2)
-        return evtchn
-
-    restoreFromDB = classmethod(restoreFromDB)
-
     def __init__(self, dom1, dom2, d):
         d['dom1'] = dom1
         d['dom2'] = dom2
@@ -92,18 +65,6 @@
             print 'EventChannel>close>', self
         evtchn_close(self.dom1, self.port1)
         evtchn_close(self.dom2, self.port2)
-
-    def saveToDB(self, db, save=False):
-        """Save the event channel to the db so it can be restored later,
-        using restoreFromDB() on the class.
-
-        @param db db
-        """
-        db['dom1']  = str(self.dom1)
-        db['dom2']  = str(self.dom2)
-        db['port1'] = str(self.port1)
-        db['port2'] = str(self.port2)
-        db.saveDB(save=save)
 
     def sxpr(self):
         return ['event-channel',
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/event.py
--- a/tools/python/xen/xend/server/event.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/event.py     Thu Sep 22 17:42:01 2005
@@ -174,11 +174,6 @@
         else:
             logging.removeLogStderr()
 
-    def op_debug_controller(self, name, v):
-        mode = v[1]
-        import controller
-        controller.DEBUG = (mode == 'on')
-
     def op_domain_ls(self, name, v):
         xd = xroot.get_component("xen.xend.XendDomain")
         return xd.list_names()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/netif.py     Thu Sep 22 17:42:01 2005
@@ -13,396 +13,58 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
+
 
 """Support for virtual network interfaces.
 """
 
-import random
-
-from xen.util.mac import macFromString, macToString
+import os
 
 from xen.xend import sxp
-from xen.xend import Vifctl
-from xen.xend.XendError import XendError, VmError
-from xen.xend.XendLogging import log
-from xen.xend import XendVnet
-from xen.xend.XendRoot import get_component
-from xen.xend.xenstore import DBVar
 
-from xen.xend.server.controller import Dev, DevController
+from xen.xend.server.DevController import DevController
 
-class NetDev(Dev):
-    """A network device.
-    """
 
-    # State:
-    # inherited + 
-    # ./config
-    # ./mac
-    # ./be_mac
-    # ./bridge
-    # ./script
-    # ./ipaddr ?
-    #
-    # ./credit
-    # ./period
-    #
-    # ./vifctl: up/down?
-    # ./vifname
-    #
-    #
-    # Poss should have no backend state here - except for ref to backend's own 
tree
-    # for the device? And a status - the one we want.
-    # ./back/dom
-    # ./back/devid - id for back-end (netif_handle) - same as front/devid
-    # ./back/id    - backend id (if more than one b/e per domain)
-    # ./back/status
-    # ./back/tx_shmem_frame  - actually these belong in back-end state
-    # ./back/rx_shmem_frame
-    #
-    # ./front/dom
-    # ./front/devid
-    # ./front/status - need 2: one for requested, one for actual? Or drive 
from dev status
-    # and this is front status only.
-    # ./front/tx_shmem_frame
-    # ./front/rx_shmem_frame
-    #
-    # ./evtchn/front - here or in front/back?
-    # ./evtchn/back
-    # ./evtchn/status ?
-    # At present created by dev: but should be created unbound by front/back
-    # separately and then bound (by back)?
-
-    __exports__ = Dev.__exports__ + [
-        DBVar('config',  ty='sxpr'),
-        DBVar('mac',     ty='mac'),
-        DBVar('be_mac',  ty='mac'),
-        DBVar('bridge',  ty='str'),
-        DBVar('script',  ty='str'),
-        DBVar('credit',  ty='int'),
-        DBVar('period',  ty='int'),
-        DBVar('vifname', ty='str'),
-        ]
-
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        self.vif = int(self.id)
-        self.status = None
-        self.frontendDomain = self.getDomain()
-        self.backendDomain = None
-        self.credit = None
-        self.period = None
-        self.mac = None
-        self.be_mac = None
-        self.bridge = None
-        self.script = None
-        self.ipaddr = None
-        self.mtu = None
-        self.vifname = None
-        self.configure(self.config, recreate=recreate)
-
-    def exportToDB(self, save=False):
-        Dev.exportToDB(self, save=save)
-
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        self.status = NETIF_INTERFACE_STATUS_DISCONNECTED
-        self.frontendDomain = self.getDomain()
-
-    def _get_config_mac(self, config):
-        vmac = sxp.child_value(config, 'mac')
-        if not vmac: return None
-        try:
-            mac = macFromString(vmac)
-        except:
-            raise XendError("invalid mac: %s" % vmac)
-        return mac
-
-    def _get_config_be_mac(self, config):
-        vmac = sxp.child_value(config, 'be_mac')
-        if not vmac: return None
-        try:
-            mac = macFromString(vmac)
-        except:
-            raise XendError("invalid backend mac: %s" % vmac)
-        return mac
-
-    def _get_config_ipaddr(self, config):
-        ips = sxp.children(config, elt='ip')
-        if ips:
-            val = []
-            for ipaddr in ips:
-                val.append(sxp.child0(ipaddr))
-        else:
-            val = None
-        return val
-
-    def _get_config_mtu(self, config):
-        mtu = sxp.child_value(config, 'mtu')
-        if not mtu: return None
-        try:
-            mtu = int(mtu)
-        except:
-            raise XendError("invalid mtu: %s" & mtu)
-        return mtu
-
-    def configure(self, config, change=False, recreate=False):
-        if change:
-            return self.reconfigure(config)
-        self.config = config
-        self.mac = None
-        self.be_mac = None
-        self.bridge = None
-        self.script = None
-        self.ipaddr = []
-        self.vifname = None
-
-        self.vifname = sxp.child_value(config, 'vifname')
-        if self.vifname is None:
-            self.vifname = self.default_vifname()
-        if len(self.vifname) > 15:
-            raise XendError('invalid vifname: too long: ' + self.vifname)
-        mac = self._get_config_mac(config)
-        if mac is None:
-            raise XendError("invalid mac")
-        self.mac = mac
-        self.be_mac = self._get_config_be_mac(config)
-        self.bridge = sxp.child_value(config, 'bridge')
-        self.script = sxp.child_value(config, 'script')
-        self.ipaddr = self._get_config_ipaddr(config) or []
-        self.mtu = self._get_config_mtu(config)
-        self._config_credit_limit(config)
-        
-        try:
-            if recreate:
-                self.backendDomain = int(sxp.child_value(config, 'backend', 
'0'))
-            else:
-                #todo: Code below will fail on xend restart when backend is 
not domain 0.
-                xd = get_component('xen.xend.XendDomain')
-                self.backendDomain = 
xd.domain_lookup_by_name(sxp.child_value(config, 'backend', '0')).domid
-        except:
-            raise XendError('invalid backend domain')
-        return self.config
-
-    def reconfigure(self, config):
-        """Reconfigure the interface with new values.
-        Not all configuration parameters can be changed:
-        bridge, script and ip addresses can,
-        backend and mac cannot.
-
-        To leave a parameter unchanged, omit it from the changes.
-
-        @param config configuration changes
-        @return updated interface configuration
-        @raise XendError on errors
-        """
-        changes = {}
-        mac = self._get_config_mac(config)
-        be_mac = self._get_config_be_mac(config)
-        bridge = sxp.child_value(config, 'bridge')
-        script = sxp.child_value(config, 'script')
-        ipaddr = self._get_config_ipaddr(config)
-        mtu = self._get_config_mtu(config)
-        
-        xd = get_component('xen.xend.XendDomain')
-        backendDomain = xd.domain_lookup_by_name(sxp.child_value(config, 
'backend', '0')).domid
-
-        if (mac is not None) and (mac != self.mac):
-            raise XendError("cannot change mac")
-        if (be_mac is not None) and (be_mac != self.be_mac):
-            raise XendError("cannot change backend mac")
-        if (backendDomain is not None) and (backendDomain != 
self.backendDomain):
-            raise XendError("cannot change backend")
-        if (bridge is not None) and (bridge != self.bridge):
-            changes['bridge'] = bridge
-        if (script is not None) and (script != self.script):
-            changes['script'] = script
-        if (ipaddr is not None) and (ipaddr != self.ipaddr):
-            changes['ipaddr'] = ipaddr
-        if (mtu is not None) and (mtu != self.mtu):
-            changes['mtu'] = mtu
-
-        if changes:
-            self.vifctl("down")
-            for (k, v) in changes.items():
-                setattr(self, k, v)
-            self.config = sxp.merge(config, self.config)
-            self.vifctl("up")
-
-        self._config_credit_limit(config, change=True)
-        return self.config
-
-    def _config_credit_limit(self, config, change=False):
-        period = sxp.child_value(config, 'period')
-        credit = sxp.child_value(config, 'credit')
-        if period and credit:
-            try:
-                period = int(period)
-                credit = int(credit)
-            except ex:
-                raise XendError('vif: invalid credit limit')
-            if change:
-                self.setCreditLimit(credit, period)
-                self.config = sxp.merge([sxp.name(self.config),
-                                         ['credit', credit],
-                                         ['period', period]],
-                                        self.config)
-            else:
-                self.period = period
-                self.credit = credit
-        elif period or credit:
-            raise XendError('vif: invalid credit limit')
-
-    def sxpr(self):
-        vif = str(self.vif)
-        mac = self.get_mac()
-        val = ['vif',
-               ['id', self.id],
-               ['vif', vif],
-               ['mac', mac],
-               ['vifname', self.vifname],
-               ]
-
-        if self.be_mac:
-            val.append(['be_mac', self.get_be_mac()])
-        if self.bridge:
-            val.append(['bridge', self.bridge])
-        if self.script:
-            val.append(['script', self.script])
-        for ip in self.ipaddr:
-            val.append(['ip', ip])
-        if self.credit:
-            val.append(['credit', self.credit])
-        if self.period:
-            val.append(['period', self.period])
-        return val
-
-    def get_vifname(self):
-        """Get the virtual interface device name.
-        """
-        return self.vifname
-
-    def default_vifname(self):
-        return "vif%d.%d" % (self.frontendDomain, self.vif)
-    
-    def get_mac(self):
-        """Get the MAC address as a string.
-        """
-        return macToString(self.mac)
-
-    def get_be_mac(self):
-        """Get the backend MAC address as a string.
-        """
-        return macToString(self.be_mac)
-
-    def vifctl_params(self, vmname=None):
-        """Get the parameters to pass to vifctl.
-        """
-        dom = self.frontendDomain
-        if vmname is None:
-            xd = get_component('xen.xend.XendDomain')
-            try:
-                vm = xd.domain_lookup(dom)
-                vmname = vm.name
-            except:
-                vmname = 'Domain-%d' % dom
-        return { 'domain': vmname,
-                 'vif'   : self.get_vifname(), 
-                 'mac'   : self.get_mac(),
-                 'bridge': self.bridge,
-                 'script': self.script,
-                 'ipaddr': self.ipaddr, }
-
-    def vifctl(self, op, vmname=None):
-        """Bring the device up or down.
-        The vmname is needed when bringing a device up for a new domain because
-        the domain is not yet in the table so we can't look its name up.
-
-        @param op: operation name (up, down)
-        @param vmname: vmname
-        """
-        if op == 'up':
-            Vifctl.set_vif_name(self.default_vifname(), self.vifname)
-        Vifctl.vifctl(op, **self.vifctl_params(vmname=vmname))
-        vnet = XendVnet.instance().vnet_of_bridge(self.bridge)
-        if vnet:
-            vnet.vifctl(op, self.get_vifname(), self.get_mac())
-
-    def attach(self, recreate=False, change=False):
-        if recreate:
-            pass
-        else:
-            if self.credit and self.period:
-                #self.send_be_creditlimit(self.credit, self.period)
-                pass
-            self.vifctl('up', vmname=self.getDomainName())
-        
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device's resources and disconnect from the back-end
-        device controller. If 'change' is true notify the front-end interface.
-
-        @param change: change flag
-        """
-        self.destroyed = True
-        self.status = NETIF_INTERFACE_STATUS_CLOSED
-        log.debug("Destroying vif domain=%d vif=%d", self.frontendDomain, 
self.vif)
-        self.vifctl('down')
-        if change:
-            self.reportStatus()
-
-    def setCreditLimit(self, credit, period):
-        #todo: these params should be in sxpr and vif config.
-        self.credit = credit
-        self.period = period
-
-    def getCredit(self):
-        return self.credit
-
-    def getPeriod(self):
-        return self.period
-        
-    def interfaceChanged(self):
-        """Notify the front-end that a device has been added or removed.
-        """
-        pass
-        
 class NetifController(DevController):
     """Network interface controller. Handles all network devices for a domain.
     """
     
-    def __init__(self, vm, recreate=False):
-        DevController.__init__(self, vm, recreate=recreate)
+    def __init__(self, vm):
+        DevController.__init__(self, vm)
 
-    def initController(self, recreate=False, reboot=False):
-        self.destroyed = False
-        if reboot:
-            self.rebootDevices()
 
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        log.debug("Destroying netif domain=%d", self.getDomain())
-        self.destroyDevices(reboot=reboot)
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
 
-    def sxpr(self):
-        val = ['netif', ['dom', self.getDomain()]]
-        return val
-    
-    def newDevice(self, id, config, recreate=False):
-        """Create a network device.
+        from xen.xend import XendRoot
+        xroot = XendRoot.instance()
 
-        @param id: interface id
-        @param config: device configuration
-        @param recreate: recreate flag (true after xend restart)
-        """
-        return NetDev(self, id, config, recreate=recreate)
+        def _get_config_ipaddr(config):
+            val = []
+            for ipaddr in sxp.children(config, elt='ip'):
+                val.append(sxp.child0(ipaddr))
+            return val
 
-    def limitDevice(self, vif, credit, period):        
-        if vif not in self.devices:
-            raise XendError('device does not exist for credit limit: vif'
-                            + str(self.getDomain()) + '.' + str(vif))
-        
-        dev = self.devices[vif]
-        return dev.setCreditLimit(credit, period)
+        script = os.path.join(xroot.network_script_dir,
+                              sxp.child_value(config, 'script',
+                                              xroot.get_vif_script()))
+        bridge = sxp.child_value(config, 'bridge',
+                                 xroot.get_vif_bridge())
+        mac = sxp.child_value(config, 'mac')
+        ipaddr = _get_config_ipaddr(config)
+
+        devid = self.allocateDeviceID()
+
+        back = { 'script' : script,
+                 'mac' : mac,
+                 'bridge' : bridge,
+                 'handle' : "%i" % devid }
+        if ipaddr:
+            back['ip'] = ' '.join(ipaddr)
+
+        front = { 'handle' : "%i" % devid,
+                  'mac' : mac }
+
+        return (devid, back, front)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/pciif.py     Thu Sep 22 17:42:01 2005
@@ -13,16 +13,22 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #============================================================================
 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 #============================================================================
+
 
 import types
 
-import xen.lowlevel.xc; xc = xen.lowlevel.xc.new()
+import xen.lowlevel.xc;
 
 from xen.xend import sxp
 from xen.xend.XendError import VmError
 
-from controller import Dev, DevController
+from xen.xend.server.DevController import DevController
+
+
+xc = xen.lowlevel.xc.new()
+
 
 def parse_pci(val):
     """Parse a pci field.
@@ -36,41 +42,41 @@
         v = val
     return v
 
-class PciDev(Dev):
 
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        bus = sxp.child_value(self.config, 'bus')
-        if not bus:
-            raise VmError('pci: Missing bus')
-        dev = sxp.child_value(self.config, 'dev')
-        if not dev:
-            raise VmError('pci: Missing dev')
-        func = sxp.child_value(self.config, 'func')
-        if not func:
-            raise VmError('pci: Missing func')
-        try:
-            bus = parse_pci(bus)
-            dev = parse_pci(dev)
-            func = parse_pci(func)
-        except:
-            raise VmError('pci: invalid parameter')
+class PciController(DevController):
 
-    def attach(self, recreate=False, change=False):
-        rc = xc.physdev_pci_access_modify(dom    = self.getDomain(),
+    def __init__(self, vm):
+        DevController.__init__(self, vm)
+
+
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
+
+        def get_param(field):
+            try:
+                val = sxp.child_value(config, field)
+
+                if not val:
+                    raise VmError('pci: Missing %s config setting' % field)
+
+                return parse_pci(val)
+            except:
+                raise VmError('pci: Invalid config setting %s: %s' %
+                              (field, val))
+        
+        bus  = get_param('bus')
+        dev  = get_param('dev')
+        func = get_param('func')
+
+        rc = xc.physdev_pci_access_modify(dom    = self.getDomid(),
                                           bus    = bus,
                                           dev    = dev,
                                           func   = func,
                                           enable = True)
         if rc < 0:
             #todo non-fatal
-            raise VmError('pci: Failed to configure device: bus=%s dev=%s 
func=%s' %
-                          (bus, dev, func))
+            raise VmError(
+                'pci: Failed to configure device: bus=%s dev=%s func=%s' %
+                (bus, dev, func))
 
-    def destroy(self, change=False, reboot=False):
-        pass
-
-class PciController(DevController):
-
-    def newDevice(self, id, config, recreate=False):
-        return PciDev(self, id, config, recreate=recreate)
+        return (dev, {}, {})
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/tpmif.py
--- a/tools/python/xen/xend/server/tpmif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/tpmif.py     Thu Sep 22 17:42:01 2005
@@ -1,45 +1,47 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2005 IBM Corporation
-#   Authort: Stefan Berger, stefanb@xxxxxxxxxx
-# Derived from netif.py:
-# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
+#   Author: Stefan Berger, stefanb@xxxxxxxxxx
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
 """Support for virtual TPM interfaces.
 """
 
-import random
+from xen.xend import sxp
+from xen.xend.XendLogging import log
 
-from xen.xend import sxp
-from xen.xend.XendError import XendError, VmError
-from xen.xend.XendLogging import log
-from xen.xend.XendRoot import get_component
-from xen.xend.xenstore import DBVar
+from xen.xend.server.DevController import DevController
 
-from xen.xend.server.controller import Dev, DevController
 
 class TPMifController(DevController):
     """TPM interface controller. Handles all TPM devices for a domain.
     """
 
-    def __init__(self, vm, recreate=False):
-        DevController.__init__(self, vm, recreate=recreate)
+    def __init__(self, vm):
+        DevController.__init__(self, vm)
 
-    def initController(self, recreate=False, reboot=False):
-        self.destroyed = False
 
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        self.destroyDevices(reboot=reboot)
+    def getDeviceDetails(self, config):
+        """@see DevController.getDeviceDetails"""
+        
+        devid = int(sxp.child_value(config, 'instance', '0'))
+        log.debug("The domain has a TPM with instance %d." % devid)
 
-    def sxpr(self):
-        val = ['tpmif', ['dom', self.getDomain()]]
-        return val
+        back  = { 'instance' : "%i" % devid }
+        front = { 'handle' : "%i" % devid }
 
-    def newDevice(self, id, config, recreate=False):
-        """Create a TPM device.
-
-        @param id: interface id
-        @param config: device configuration
-        @param recreate: recreate flag (true after xend restart)
-        """
-        return None
+        return (devid, back, front)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/usbif.py
--- a/tools/python/xen/xend/server/usbif.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/usbif.py     Thu Sep 22 17:42:01 2005
@@ -1,185 +1,42 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
 # Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx>
 # Copyright (C) 2004 Intel Research Cambridge
 # Copyright (C) 2004 Mark Williamson <mark.williamson@xxxxxxxxxxxx>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+
 """Support for virtual USB hubs.
 """
 
-from xen.xend import sxp
-from xen.xend.XendLogging import log
-from xen.xend.XendError import XendError
-from xen.xend.xenstore import DBVar
+from xen.xend.server.DevController import DevController
 
-from xen.xend.server.controller import Dev, DevController
-
-class UsbBackend:
-    """Handler for the 'back-end' channel to a USB device driver domain
-    on behalf of a front-end domain.
-    """
-    def __init__(self, controller, id, dom):
-        self.controller = controller
-        self.id = id
-        self.destroyed = False
-        self.connected = False
-        self.connecting = False
-        self.frontendDomain = self.controller.getDomain()
-        self.backendDomain = dom
-
-    def init(self, recreate=False, reboot=False):
-        pass
-    
-    def __str__(self):
-        return ('<UsbifBackend frontend=%d backend=%d id=%d>'
-                % (self.frontendDomain,
-                   self.backendDomain,
-                   self.id))
-
-    def connect(self, recreate=False):
-        """Connect the controller to the usbif control interface.
-
-        @param recreate: true if after xend restart
-        """
-        log.debug("Connecting usbif %s", str(self))
-        if recreate or self.connected or self.connecting:
-            pass
-        
-    def destroy(self, reboot=False):
-        """Disconnect from the usbif control interface and destroy it.
-        """
-        self.destroyed = True
-        
-    def interfaceChanged(self):
-        pass
-
-
-class UsbDev(Dev):
-
-    __exports__ = Dev.__exports__ + [
-        DBVar('port', ty='int'),
-        DBVar('path', ty='str'),
-        ]
-    
-    def __init__(self, controller, id, config, recreate=False):
-        Dev.__init__(self, controller, id, config, recreate=recreate)
-        self.port = id
-        self.path = None
-        self.frontendDomain = self.getDomain()
-        self.backendDomain = 0
-        self.configure(self.config, recreate=recreate)
-
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        self.frontendDomain = self.getDomain()
-        
-    def configure(self, config, change=False, recreate=False):
-        if change:
-            raise XendError("cannot reconfigure usb")
-        #todo: FIXME: Use sxp access methods to get this value.
-        # Must not use direct indexing.
-        self.path = config[1][1]
-        
-        #todo: FIXME: Support configuring the backend domain.
-##         try:
-##             self.backendDomain = int(sxp.child_value(config, 'backend', 
'0'))
-##         except:
-##             raise XendError('invalid backend domain')
-
-    def attach(self, recreate=False, change=False):
-        if recreate:
-            pass
-        else:
-            self.attachBackend()
-        if change:
-            self.interfaceChanged()
-            
-    def sxpr(self):
-        val = ['usb',
-               ['id', self.id],
-               ['port', self.port],
-               ['path', self.path],
-               ]
-        return val
-
-    def getBackend(self):
-        return self.controller.getBackend(self.backendDomain)
-
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device. If 'change' is true notify the front-end 
interface.
-
-        @param change: change flag
-        """
-        self.destroyed = True
-        log.debug("Destroying usb domain=%d id=%s", self.frontendDomain, 
self.id)
-        if change:
-            self.interfaceChanged()
-
-    def interfaceChanged(self):
-        """Tell the back-end to notify the front-end that a device has been
-        added or removed.
-        """
-        self.getBackend().interfaceChanged()
-
-    def attachBackend(self):
-        """Attach the device to its controller.
-
-        """
-        self.getBackend().connect()
 
 class UsbifController(DevController):
     """USB device interface controller. Handles all USB devices
     for a domain.
     """
     
-    def __init__(self, vm, recreate=False):
+    def __init__(self, vm):
         """Create a USB device controller.
         """
-        DevController.__init__(self, vm, recreate=recreate)
-        self.backends = {}
-        self.backendId = 0
+        DevController.__init__(self, vm)
 
-    def init(self, recreate=False, reboot=False):
-        self.destroyed = False
-        if reboot:
-            self.rebootBackends()
-            self.rebootDevices()
 
-    def sxpr(self):
-        val = ['usbif',
-               ['dom', self.getDomain()]]
-        return val
+    def getDeviceDetails(self, _):
+        """@see DevController.getDeviceDetails"""
 
-    def newDevice(self, id, config, recreate=False):
-        return UsbDev(self, id, config, recreate=recreate)
-
-    def destroyController(self, reboot=False):
-        """Destroy the controller and all devices.
-        """
-        self.destroyed = True
-        log.debug("Destroying blkif domain=%d", self.getDomain())
-        self.destroyDevices(reboot=reboot)
-        self.destroyBackends(reboot=reboot)
-
-    def rebootBackends(self):
-        for backend in self.backends.values():
-            backend.init(reboot=True)
-
-    def getBackendById(self, id):
-        return self.backends.get(id)
-
-    def getBackendByDomain(self, dom):
-        for backend in self.backends.values():
-            if backend.backendDomain == dom:
-                return backend
-        return None
-
-    def getBackend(self, dom):
-        backend = self.getBackendByDomain(dom)
-        if backend: return backend
-        backend = UsbBackend(self, self.backendId, dom)
-        self.backendId += 1
-        self.backends[backend.getId()] = backend
-        backend.init()
-        return backend
-    
-    def destroyBackends(self, reboot=False):
-        for backend in self.backends.values():
-            backend.destroy(reboot=reboot)
+        return (self.allocateDeviceID(), {}, {})
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xsnode.py
--- a/tools/python/xen/xend/xenstore/xsnode.py  Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xsnode.py  Thu Sep 22 17:42:01 2005
@@ -244,7 +244,9 @@
             if ex.args[0] == errno.ENOENT:
                 return False
             else:
-                raise
+                raise RuntimeError(ex.args[0],
+                                   ex.args[1] +
+                                   (', in exists(%s)' % (str(path))))
 
     def mkdirs(self, path):
         if self.exists(path):
@@ -255,7 +257,7 @@
             if x == "": continue
             p = os.path.join(p, x)
             if not self.exists(p):
-                self.getxs().write(p, "", create=True)
+                self.getxs().write(p, "")
 
     def read(self, path):
         try:
@@ -266,15 +268,17 @@
             else:
                 raise
 
-    def create(self, path, excl=False):
-        self.write(path, "", create=True, excl=excl)
-
-    def write(self, path, data, create=True, excl=False):
-        self.mkdirs(path)
-        try:
-            self.getxs().write(path, data, create=create, excl=excl)
-        except Exception, ex:
-            raise
+    def create(self, path):
+        self.write(path, "")
+
+    def write(self, path, data):
+        try:
+            self.getxs().write(path, data)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               ex.args[1] +
+                               (', while writing %s : %s' % (str(path),
+                                                             str(data))))
 
     def begin(self, path):
         self.getxs().transaction_start(path)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xsobj.py
--- a/tools/python/xen/xend/xenstore/xsobj.py   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xsobj.py   Thu Sep 22 17:42:01 2005
@@ -469,9 +469,6 @@
             n = n._addChild(x)
         return n
 
-    def getDB(self):
-        return self.__db__
-
     def setDB(self, db):
         if (db is not None) and not isinstance(db, XenNode):
             raise ValueError("invalid db")
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/python/xen/xend/xenstore/xstransact.py
--- a/tools/python/xen/xend/xenstore/xstransact.py      Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xstransact.py      Thu Sep 22 17:42:01 2005
@@ -41,7 +41,11 @@
 
     def _read(self, key):
         path = "%s/%s" % (self.path, key)
-        return xshandle().read(path)
+        try:
+            return xshandle().read(path)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               '%s, while reading %s' % (ex.args[1], path))
 
     def read(self, *args):
         if len(args) == 0:
@@ -53,13 +57,16 @@
             ret.append(self._read(key))
         return ret
 
-    def _write(self, key, data, create=True, excl=False):
-        path = "%s/%s" % (self.path, key)
-        xshandle().write(path, data, create=create, excl=excl)
+    def _write(self, key, data):
+        path = "%s/%s" % (self.path, key)
+        try:
+            xshandle().write(path, data)
+        except RuntimeError, ex:
+            raise RuntimeError(ex.args[0],
+                               ('%s, while writing %s : %s' %
+                                (ex.args[1], path, str(data))))
 
     def write(self, *args, **opts):
-        create = opts.get('create') or True
-        excl = opts.get('excl') or False
         if len(args) == 0:
             raise TypeError
         if isinstance(args[0], dict):
@@ -67,15 +74,19 @@
                 if not isinstance(d, dict):
                     raise TypeError
                 for key in d.keys():
-                    self._write(key, d[key], create, excl)
+                    try:
+                        self._write(key, d[key])
+                    except TypeError, msg:
+                        raise TypeError('Writing %s: %s: %s' %
+                                        (key, str(d[key]), msg))
         elif isinstance(args[0], list):
             for l in args:
                 if not len(l) == 2:
                     raise TypeError
-                self._write(l[0], l[1], create, excl)
+                self._write(l[0], l[1])
         elif len(args) % 2 == 0:
             for i in range(len(args) / 2):
-                self._write(args[i * 2], args[i * 2 + 1], create, excl)
+                self._write(args[i * 2], args[i * 2 + 1])
         else:
             raise TypeError
 
@@ -84,10 +95,15 @@
         return xshandle().rm(path)
 
     def remove(self, *args):
-        if len(args) == 0:
-            raise TypeError
-        for key in args:
-            self._remove(key)
+        """If no arguments are given, remove this transaction's path.
+        Otherwise, treat each argument as a subpath to this transaction's
+        path, and remove each of those instead.
+        """
+        if len(args) == 0:
+            xshandle().rm(self.path)
+        else:
+            for key in args:
+                self._remove(key)
 
     def _list(self, key):
         path = "%s/%s" % (self.path, key)
@@ -114,10 +130,20 @@
                 defval = None
             else:
                 (key, fn, defval) = tup
-            try:
-                val = fn(self._read(key))
-            except TypeError:
+
+            val = self._read(key)
+            # If fn is str, then this will successfully convert None to
+            # 'None'.  If it is int, then it will throw TypeError on None, or
+            # on any other non-integer value.  We have to, therefore, both
+            # check explicitly for None, and catch TypeError.  Either failure
+            # will result in defval being used instead.
+            if val is None:
                 val = defval
+            else:
+                try:
+                    val = fn(val)
+                except TypeError:
+                    val = defval
             ret.append(val)
         if len(ret) == 1:
             return ret[0]
@@ -146,8 +172,8 @@
 
     def Read(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.read(*args)
                 t.commit()
                 return v
@@ -165,8 +191,8 @@
 
     def Write(cls, path, *args, **opts):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 t.write(*args, **opts)
                 t.commit()
                 return
@@ -183,9 +209,13 @@
     Write = classmethod(Write)
 
     def Remove(cls, path, *args):
-        while True:
-            try:
-                t = cls(path)
+        """If only one argument is given (path), remove it.  Otherwise, treat
+        each further argument as a subpath to the given path, and remove each
+        of those instead.  This operation is performed inside a transaction.
+        """
+        while True:
+            t = cls(path)
+            try:
                 t.remove(*args)
                 t.commit()
                 return
@@ -203,8 +233,8 @@
 
     def List(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.list(*args)
                 t.commit()
                 return v
@@ -222,8 +252,8 @@
 
     def Gather(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.gather(*args)
                 t.commit()
                 return v
@@ -241,8 +271,8 @@
 
     def Store(cls, path, *args):
         while True:
-            try:
-                t = cls(path)
+            t = cls(path)
+            try:
                 v = t.store(*args)
                 t.commit()
                 return v
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xsutil.py
--- a/tools/python/xen/xend/xenstore/xsutil.py  Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xsutil.py  Thu Sep 22 17:42:01 2005
@@ -18,3 +18,6 @@
 
 def IntroduceDomain(domid, page, port, path):
     return xshandle().introduce_domain(domid, page, port, path)
+
+def GetDomainPath(domid):
+    return xshandle().get_domain_path(domid)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/xenstore/xswatch.py
--- a/tools/python/xen/xend/xenstore/xswatch.py Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/xenstore/xswatch.py Thu Sep 22 17:42:01 2005
@@ -1,4 +1,5 @@
 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx>
+# Copyright (C) 2005 XenSource Ltd
 
 # This file is subject to the terms and conditions of the GNU General
 # Public License.  See the file "COPYING" in the main directory of
@@ -15,7 +16,7 @@
     xs = None
     xslock = threading.Lock()
     
-    def __init__(self, path, fn, args=(), kwargs={}):
+    def __init__(self, path, fn, *args, **kwargs):
         self.fn = fn
         self.args = args
         self.kwargs = kwargs
@@ -46,11 +47,11 @@
         cls.threadcond.release()
         while True:
             try:
-                (ord, owr, oer) = select.select([ cls.xs ], [], [])
+                (fd, _1, _2) = select.select([ cls.xs ], [], [])
                 cls.xslock.acquire()
                 # reconfirm ready to read with lock
-                (ord, owr, oer) = select.select([ cls.xs ], [], [], 0.001)
-                if not cls.xs in ord:
+                (fd, _1, _2) = select.select([ cls.xs ], [], [], 0.001)
+                if not cls.xs in fd:
                     cls.xslock.release()
                     continue
                 we = cls.xs.read_watch()
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xm/create.py     Thu Sep 22 17:42:01 2005
@@ -109,7 +109,7 @@
           The address of the vncviewer is passed to the domain on the kernel 
command
           line using 'VNC_SERVER=<host>:<port>'. The port used by vnc is 5500 
+ DISPLAY.
           A display value with a free port is chosen if possible.
-         Only valid when vnc=1.
+          Only valid when vnc=1.
           """)
 
 gopts.var('name', val='NAME',
@@ -141,7 +141,7 @@
           use="Domain memory in MB.")
 
 gopts.var('ssidref', val='SSIDREF',
-          fn=set_u32, default=-1, 
+          fn=set_u32, default=0, 
           use="Security Identifier.")
 
 gopts.var('maxmem', val='MEMORY',
@@ -342,7 +342,7 @@
     else:
         return s
 
-def configure_image(opts, config, vals):
+def configure_image(opts, vals):
     """Create the image config.
     """
     config_image = [ vals.builder ]
@@ -359,8 +359,7 @@
         config_image.append(['args', vals.extra])
     if vals.vcpus:
         config_image.append(['vcpus', vals.vcpus])
-    config.append(['image', config_image ])
-
+    return config_image
     
 def configure_disks(opts, config_devs, vals):
     """Create the config for disks (virtual block devices).
@@ -494,17 +493,17 @@
          config_vfr.append(['vif', ['id', idx], ['ip', ip]])
      config.append(config_vfr)
 
-def configure_vmx(opts, config_devs, vals):
+def configure_vmx(opts, config_image, vals):
     """Create the config for VMX devices.
     """
-    args = [ 'memmap', 'device_model', 'cdrom',
-            'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', 
-             'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display']   
+    args = [ 'memmap', 'device_model', 'vcpus', 'cdrom',
+             'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', 
+             'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display']
     for a in args:
-       if (vals.__dict__[a]):
-           config_devs.append([a, vals.__dict__[a]])
-
-def run_bootloader(opts, config, vals):
+        if (vals.__dict__[a]):
+            config_image.append([a, vals.__dict__[a]])
+
+def run_bootloader(opts, vals):
     if not os.access(vals.bootloader, os.X_OK):
         opts.err("Bootloader isn't executable")
     if len(vals.disk) < 1:
@@ -512,11 +511,8 @@
     (uname, dev, mode, backend) = vals.disk[0]
     file = blkif.blkdev_uname_to_file(uname)
 
-    blcfg = bootloader(vals.bootloader, file, not vals.console_autoconnect,
-                       vals.vcpus, vals.blentry)
-
-    config.append(['bootloader', vals.bootloader])
-    config.append(blcfg)
+    return bootloader(vals.bootloader, file, not vals.console_autoconnect,
+                      vals.vcpus, vals.blentry)
 
 def make_config(opts, vals):
     """Create the domain configuration.
@@ -542,16 +538,19 @@
         config.append(['restart', vals.restart])
 
     if vals.bootloader:
-        run_bootloader(opts, config, vals)
+        config.append(['bootloader', vals.bootloader])
+        config_image = run_bootloader(opts, vals)
     else:
-        configure_image(opts, config, vals)
+        config_image = configure_image(opts, vals)
+    configure_vmx(opts, config_image, vals)
+    config.append(['image', config_image ])
+
     config_devs = []
     configure_disks(opts, config_devs, vals)
     configure_pci(opts, config_devs, vals)
     configure_vifs(opts, config_devs, vals)
     configure_usb(opts, config_devs, vals)
     configure_vtpm(opts, config_devs, vals)
-    configure_vmx(opts, config_devs, vals)
     config += config_devs
 
     return config
@@ -673,7 +672,7 @@
             # Local port is field 3.
             y = x.split()[3]
             # Field is addr:port, split off the port.
-            y = y.split(':')[1]
+            y = y.split(':')[-1]
             r.append(int(y))
         return r
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/Makefile
--- a/tools/vtpm/Makefile       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/Makefile       Thu Sep 22 17:42:01 2005
@@ -4,7 +4,7 @@
 include $(XEN_ROOT)/tools/vtpm/Rules.mk
 
 # Dir name for emulator (as dom0 tpm driver)
-TPM_EMULATOR_DIR = tpm_emulator-0.2
+TPM_EMULATOR_DIR = tpm_emulator
 # Dir name for vtpm instance
 VTPM_DIR = vtpm
 
@@ -13,7 +13,7 @@
 
 all: build
 
-build: $(TPM_EMULATOR_TARFILE) extract patch build_sub
+build: $(TPM_EMULATOR_DIR) $(VTPM_DIR) build_sub
 
 install: build
        $(MAKE) -C $(TPM_EMULATOR_DIR) $@
@@ -26,36 +26,32 @@
        if [ -d $(VTPM_DIR) ]; \
                then $(MAKE) -C $(VTPM_DIR) clean; \
        fi
+
+mrproper:
+       rm -f $(TPM_EMULATOR_TARFILE)
        rm -rf $(TPM_EMULATOR_DIR)
        rm -rf $(VTPM_DIR)
-
-mrproper: clean
-       rm -f $(TPM_EMULATOR_TARFILE)
 
 # Download Swiss emulator
 $(TPM_EMULATOR_TARFILE):
        wget http://download.berlios.de/tpm-emulator/$(TPM_EMULATOR_TARFILE)
 
 # Create vtpm and TPM emulator dirs
-extract: $(TPM_EMULATOR_DIR)/README $(VTPM_DIR)/README
-
-$(TPM_EMULATOR_DIR)/README:
-       -rm -rf $(TPM_EMULATOR_DIR)
-       tar -xzf $(TPM_EMULATOR_TARFILE)
-
-$(VTPM_DIR)/README:
-       -rm -rf $(VTPM_DIR)
-       cp -r --preserve $(TPM_EMULATOR_DIR) $(VTPM_DIR)
-
 # apply patches for 1) used as dom0 tpm driver 2) used as vtpm device instance
-patch: $(TPM_EMULATOR_DIR)/Makefile $(VTPM_DIR)/Makefile
-
-$(TPM_EMULATOR_DIR)/Makefile: tpm_emulator.patch
+$(TPM_EMULATOR_DIR): $(TPM_EMULATOR_TARFILE) 
+       tar -xzf $(TPM_EMULATOR_TARFILE);  
+       mv tpm_emulator-0.2 $(TPM_EMULATOR_DIR); 
+       
        -cd $(TPM_EMULATOR_DIR); \
+       patch -p1 < ../tpm_emulator-0.2b-x86_64.patch; \
        patch -p1 <../tpm_emulator.patch
 
-$(VTPM_DIR)/Makefile: vtpm.patch
+$(VTPM_DIR): $(TPM_EMULATOR_TARFILE)
+       tar -xzf $(TPM_EMULATOR_TARFILE);  
+       mv tpm_emulator-0.2 $(VTPM_DIR); 
+
        -cd $(VTPM_DIR); \
+       patch -p1 < ../tpm_emulator-0.2b-x86_64.patch; \
        patch -p1 <../vtpm.patch
 
 build_sub:
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/README
--- a/tools/vtpm/README Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/README Thu Sep 22 17:42:01 2005
@@ -23,6 +23,7 @@
 - xen-unstable 
 - IBM frontend/backend vtpm driver patch
 - vtpm_managerd
+- GNU MP Big number library (GMP)
 
 vtpmd Flow (for vtpm_manager. vtpmd never run by default)
 ============================
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/tpm_emulator.patch
--- a/tools/vtpm/tpm_emulator.patch     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/tpm_emulator.patch     Thu Sep 22 17:42:01 2005
@@ -1,12 +1,12 @@
-diff -uprN orig/tpm_emulator-0.2/AUTHORS tpm_emulator-0.2/AUTHORS
---- orig/tpm_emulator-0.2/AUTHORS      2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/AUTHORS   2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/AUTHORS tpm_emulator/AUTHORS
+--- orig/tpm_emulator-0.2-x86_64/AUTHORS       2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator/AUTHORS       2005-09-14 20:27:22.000000000 -0700
 @@ -1 +1,2 @@
  Mario Strasser <mast@xxxxxxx>
 +INTEL Corp <>
-diff -uprN orig/tpm_emulator-0.2/ChangeLog tpm_emulator-0.2/ChangeLog
---- orig/tpm_emulator-0.2/ChangeLog    2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/ChangeLog 2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/ChangeLog tpm_emulator/ChangeLog
+--- orig/tpm_emulator-0.2-x86_64/ChangeLog     2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator/ChangeLog     2005-09-14 20:27:22.000000000 -0700
 @@ -1,3 +1,7 @@
 +2005-08-16: INTEL Corp
 +      * Set default permissions to PCRs
@@ -15,10 +15,29 @@
  2005-08-15  Mario Strasser <mast@xxxxxxx>
        * all: some typos corrected
        * tpm_integrity.c: bug in TPM_Extend fixed
-diff -uprN orig/tpm_emulator-0.2/Makefile tpm_emulator-0.2/Makefile
---- orig/tpm_emulator-0.2/Makefile     2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/Makefile  2005-08-17 10:55:52.000000000 -0700
-@@ -1,15 +1,19 @@
+diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.h 
tpm_emulator/linux_module.h
+--- orig/tpm_emulator-0.2-x86_64/linux_module.h        2005-09-15 
19:21:14.844078720 -0700
++++ tpm_emulator/linux_module.h        2005-09-14 20:27:22.000000000 -0700
+@@ -1,5 +1,6 @@
+ /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
+  * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
++ * Copyright (C) 2005 INTEL Corp.
+  *
+  * This module is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published
+@@ -35,7 +36,7 @@
+ #include "tpm_version.h"
+ 
+ #define TPM_DEVICE_MINOR      224
+-#define TPM_DEVICE_NAME         "tpm"
++#define TPM_DEVICE_NAME         "tpm0"
+ #define TPM_MODULE_NAME       "tpm_emulator"
+ 
+ /* debug and log output functions */
+diff -uprN orig/tpm_emulator-0.2-x86_64/Makefile tpm_emulator/Makefile
+--- orig/tpm_emulator-0.2-x86_64/Makefile      2005-09-15 19:21:14.845078568 
-0700
++++ tpm_emulator/Makefile      2005-09-14 20:27:22.000000000 -0700
+@@ -1,16 +1,20 @@
  # Software-Based Trusted Platform Module (TPM) Emulator for Linux
  # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>
 +# Copyright (C) 2005 INTEL Corp.
@@ -33,6 +52,7 @@
 -KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
 +KERNEL_BUILD   := $(XEN_ROOT)/linux-2.6.12-xen0
  MOD_SUBDIR     := misc
+ COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
  
  # module settings
 -MODULE_NAME    := tpm_emulator
@@ -40,7 +60,7 @@
  VERSION_MAJOR  := 0
  VERSION_MINOR  := 2
  VERSION_BUILD  := $(shell date +"%s")
-@@ -27,11 +30,9 @@ DIRS           := . crypto tpm 
+@@ -34,11 +38,9 @@ DIRS           := . crypto tpm 
  SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
  OBJS           := $(patsubst %.c, %.o, $(SRCS))
  SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
@@ -54,7 +74,7 @@
  
  EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
  
-@@ -42,23 +43,17 @@ all:       $(src)/crypto/gmp.h $(src)/crypto/l
+@@ -49,23 +51,17 @@ all:       $(src)/crypto/gmp.h $(src)/crypto/l
        @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
  
  install:
@@ -84,9 +104,9 @@
  
  $(src)/crypto/libgmp.a:
        test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) 
$(src)/crypto/libgmp.a
-diff -uprN orig/tpm_emulator-0.2/README tpm_emulator-0.2/README
---- orig/tpm_emulator-0.2/README       2005-08-17 10:58:36.000000000 -0700
-+++ tpm_emulator-0.2/README    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/README tpm_emulator/README
+--- orig/tpm_emulator-0.2-x86_64/README        2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator/README        2005-09-14 20:27:22.000000000 -0700
 @@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
  Copyright
  --------------------------------------------------------------------------
@@ -97,28 +117,9 @@
                
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-diff -uprN orig/tpm_emulator-0.2/linux_module.h tpm_emulator-0.2/linux_module.h
---- orig/tpm_emulator-0.2/linux_module.h       2005-08-17 10:58:36.000000000 
-0700
-+++ tpm_emulator-0.2/linux_module.h    2005-08-17 10:55:52.000000000 -0700
-@@ -1,5 +1,6 @@
- /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
-  * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
-+ * Copyright (C) 2005 INTEL Corp.
-  *
-  * This module is free software; you can redistribute it and/or modify
-  * it under the terms of the GNU General Public License as published
-@@ -33,7 +34,7 @@
- #include "tpm_version.h"
- 
- #define TPM_DEVICE_MINOR      224
--#define TPM_DEVICE_NAME         "tpm"
-+#define TPM_DEVICE_NAME         "tpm0"
- #define TPM_MODULE_NAME       "tpm_emulator"
- 
- /* debug and log output functions */
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c tpm_emulator-0.2/tpm/tpm_data.c
---- orig/tpm_emulator-0.2/tpm/tpm_data.c       2005-08-17 10:58:36.000000000 
-0700
-+++ tpm_emulator-0.2/tpm/tpm_data.c    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c 
tpm_emulator/tpm/tpm_data.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c        2005-09-15 
19:21:14.847078264 -0700
++++ tpm_emulator/tpm/tpm_data.c        2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -139,13 +140,3 @@
      tpmData.permanent.data.pcrAttrib[i].pcrReset = TRUE;
    }
    /* set tick type */
-diff -uprN orig/tpm_emulator-0.2/tpm_version.h tpm_emulator-0.2/tpm_version.h
---- orig/tpm_emulator-0.2/tpm_version.h        2005-08-17 10:58:36.000000000 
-0700
-+++ tpm_emulator-0.2/tpm_version.h     2005-08-17 10:55:53.000000000 -0700
-@@ -2,5 +2,5 @@
- #define _TPM_VERSION_H_
- #define VERSION_MAJOR 0
- #define VERSION_MINOR 2
--#define VERSION_BUILD 1123950310
-+#define VERSION_BUILD 1124301353
- #endif /* _TPM_VERSION_H_ */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/vtpm.patch
--- a/tools/vtpm/vtpm.patch     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/vtpm.patch     Thu Sep 22 17:42:01 2005
@@ -1,12 +1,12 @@
-diff -uprN orig/tpm_emulator-0.2/AUTHORS vtpm/AUTHORS
---- orig/tpm_emulator-0.2/AUTHORS      2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/AUTHORS       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/AUTHORS vtpm/AUTHORS
+--- orig/tpm_emulator-0.2-x86_64/AUTHORS       2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/AUTHORS       2005-09-14 20:27:22.000000000 -0700
 @@ -1 +1,2 @@
  Mario Strasser <mast@xxxxxxx>
 +INTEL Corp <>
-diff -uprN orig/tpm_emulator-0.2/ChangeLog vtpm/ChangeLog
---- orig/tpm_emulator-0.2/ChangeLog    2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/ChangeLog     2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/ChangeLog vtpm/ChangeLog
+--- orig/tpm_emulator-0.2-x86_64/ChangeLog     2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/ChangeLog     2005-09-14 20:27:22.000000000 -0700
 @@ -1,3 +1,7 @@
 +2005-08-16 Intel Corp
 +      Moved module out of kernel to run as a ring 3 app
@@ -15,115 +15,9 @@
  2005-08-15  Mario Strasser <mast@xxxxxxx>
        * all: some typos corrected
        * tpm_integrity.c: bug in TPM_Extend fixed
-diff -uprN orig/tpm_emulator-0.2/Makefile vtpm/Makefile
---- orig/tpm_emulator-0.2/Makefile     2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/Makefile      2005-08-17 10:55:52.000000000 -0700
-@@ -1,21 +1,29 @@
- # Software-Based Trusted Platform Module (TPM) Emulator for Linux
- # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>
-+# Copyright (C) 2005 INTEL Corp.
- #
- # $Id: Makefile 10 2005-04-26 20:59:50Z mast $
- 
--# kernel settings
--KERNEL_RELEASE := $(shell uname -r)
--KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
--MOD_SUBDIR     := misc
--
- # module settings
--MODULE_NAME    := tpm_emulator
-+BIN            := vtpmd
- VERSION_MAJOR  := 0
- VERSION_MINOR  := 2
- VERSION_BUILD  := $(shell date +"%s")
- 
--# enable/disable DEBUG messages
--EXTRA_CFLAGS   += -DDEBUG -g  
-+# Installation program and options
-+INSTALL         = install
-+INSTALL_PROG    = $(INSTALL) -m0755
-+INSTALL_DIR     = $(INSTALL) -d -m0755
-+
-+# Xen tools installation directory
-+TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
-+
-+CC      := gcc
-+CFLAGS  += -g -Wall $(INCLUDE) -DDEBUG
-+CFLAGS  += -I. -Itpm
-+
-+# Is the simulator running in it's own vm?
-+#CFLAGS += -DVTPM_MULTI_VM
- 
- # GNU MP configuration
- GMP_LIB        := /usr/lib/libgmp.a
-@@ -27,38 +35,31 @@ DIRS           := . crypto tpm 
- SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
- OBJS           := $(patsubst %.c, %.o, $(SRCS))
- SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
--DISTSRC        := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS)
--DISTDIR        := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR)
- 
--obj-m               := $(MODULE_NAME).o
--$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
-+obj-m               := $(BIN)
-+$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
- 
- EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
- 
- # do not print "Entering directory ..."
- MAKEFLAGS      += --no-print-directory
- 
--all:  $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version
--      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
-+all: $(BIN)
-+
-+$(BIN):       $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version $(SRCS) 
$(OBJS)
-+      $(CC) $(CFLAGS) $(OBJS) $(src)/crypto/libgmp.a -o $(BIN)
-+
-+%.o: %.c
-+      $(CC) $(CFLAGS) -c $< -o $@
- 
- install:
--      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install
--      test -d /var/tpm || mkdir /var/tpm
--      test -c /dev/tpm || mknod /dev/tpm c 10 224
--      chmod 666 /dev/tpm
--      depmod -a
-+      $(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR)
- 
- clean:
--      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean
--      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a
-+      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a $(OBJS)
- 
--dist: $(DISTSRC)
--      rm -rf $(DISTDIR)
--      mkdir $(DISTDIR)
--      cp --parents $(DISTSRC) $(DISTDIR)/
--      rm -f $(DISTDIR)/crypto/gmp.h 
--      tar -chzf $(DISTDIR).tar.gz $(DISTDIR)
--      rm -rf $(DISTDIR)
-+mrproper: clean
-+      rm -f $(BIN)
- 
- $(src)/crypto/libgmp.a:
-       test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) 
$(src)/crypto/libgmp.a
-diff -uprN orig/tpm_emulator-0.2/README vtpm/README
---- orig/tpm_emulator-0.2/README       2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/README        2005-08-17 10:55:52.000000000 -0700
-@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
- Copyright
- --------------------------------------------------------------------------
- Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> and Swiss Federal 
--Institute of Technology (ETH) Zurich.
-+                   Institute of Technology (ETH) Zurich.
-+Copyright (C) 2005 INTEL Corp 
-               
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
-diff -uprN orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c 
vtpm/crypto/gmp_kernel_wrapper.c
---- orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c  2005-08-17 
10:58:36.000000000 -0700
-+++ vtpm/crypto/gmp_kernel_wrapper.c   2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c 
vtpm/crypto/gmp_kernel_wrapper.c
+--- orig/tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c   2005-09-15 
19:21:42.508873032 -0700
++++ vtpm/crypto/gmp_kernel_wrapper.c   2005-09-15 19:25:37.319176440 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -154,9 +48,9 @@
  {
 -  void *ret  = (void*)kmalloc(size, GFP_KERNEL);
 -  if (!ret) panic(KERN_CRIT TPM_MODULE_NAME 
--    "GMP: cannot allocate memory (size=%u)\n", size);
+-    "GMP: cannot allocate memory (size=%Zu)\n", size);
 +  void *ret  = (void*)malloc(size);
-+  if (!ret) error("GMP: cannot allocate memory (size=%u)\n", size);
++  if (!ret) error("GMP: cannot allocate memory (size=%Zu)\n", size);
    return ret;
  }
  
@@ -165,9 +59,10 @@
  {
 -  void *ret = (void*)kmalloc(new_size, GFP_KERNEL);
 -  if (!ret) panic(KERN_CRIT TPM_MODULE_NAME "GMP: Cannot reallocate memory "
+-    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
 +  void *ret = (void*)malloc(new_size);
 +  if (!ret) error("GMP: Cannot reallocate memory "
-     "(old_size=%u new_size=%u)\n", old_size, new_size);
++    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
    memcpy(ret, oldptr, old_size);
 -  kfree(oldptr);
 +  free(oldptr);
@@ -183,9 +78,9 @@
    }
  }
  
-diff -uprN orig/tpm_emulator-0.2/crypto/rsa.c vtpm/crypto/rsa.c
---- orig/tpm_emulator-0.2/crypto/rsa.c 2005-08-17 10:58:36.000000000 -0700
-+++ vtpm/crypto/rsa.c  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/crypto/rsa.c vtpm/crypto/rsa.c
+--- orig/tpm_emulator-0.2-x86_64/crypto/rsa.c  2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/crypto/rsa.c  2005-09-14 20:27:22.000000000 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -211,8 +106,8 @@
        sha1_final(&ctx, &msg[1]);
        if (memcmp(&msg[1], &msg[1 + SHA1_DIGEST_LENGTH], 
            SHA1_DIGEST_LENGTH) != 0) return -1;
-diff -uprN orig/tpm_emulator-0.2/linux_module.c vtpm/linux_module.c
---- orig/tpm_emulator-0.2/linux_module.c       2005-08-17 10:58:36.000000000 
-0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.c vtpm/linux_module.c
+--- orig/tpm_emulator-0.2-x86_64/linux_module.c        2005-09-15 
19:22:40.343080896 -0700
 +++ vtpm/linux_module.c        1969-12-31 16:00:00.000000000 -0800
 @@ -1,163 +0,0 @@
 -/* Software-Based Trusted Platform Module (TPM) Emulator for Linux 
@@ -283,7 +178,7 @@
 -
 -static ssize_t tpm_read(struct file *file, char *buf, size_t count, loff_t 
*ppos)
 -{
--  debug("%s(%d)", __FUNCTION__, count);
+-  debug("%s(%Zu)", __FUNCTION__, count);
 -  down(&tpm_mutex);
 -  if (tpm_response.data != NULL) {
 -    count = min(count, (size_t)tpm_response.size - (size_t)*ppos);
@@ -298,7 +193,7 @@
 -
 -static ssize_t tpm_write(struct file *file, const char *buf, size_t count, 
loff_t *ppos)
 -{
--  debug("%s(%d)", __FUNCTION__, count);
+-  debug("%s(%Zu)", __FUNCTION__, count);
 -  down(&tpm_mutex);
 -  *ppos = 0;
 -  if (tpm_response.data != NULL) kfree(tpm_response.data);
@@ -378,9 +273,9 @@
 -  return (ticks > 0) ? ticks : 1;
 -}
 -
-diff -uprN orig/tpm_emulator-0.2/linux_module.h vtpm/linux_module.h
---- orig/tpm_emulator-0.2/linux_module.h       2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/linux_module.h        2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.h vtpm/linux_module.h
+--- orig/tpm_emulator-0.2-x86_64/linux_module.h        2005-09-15 
19:21:14.844078720 -0700
++++ vtpm/linux_module.h        2005-09-14 20:27:22.000000000 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -416,17 +311,20 @@
  
 +/* module settings */
 +#define min(A,B) ((A)<(B)?(A):(B))
+ #ifndef STR
  #define STR(s) __STR__(s)
  #define __STR__(s) #s
- #include "tpm_version.h"
-@@ -39,32 +45,35 @@
+@@ -39,34 +45,38 @@
+ #define TPM_MODULE_NAME       "tpm_emulator"
+ 
  /* debug and log output functions */
++extern int dmi_id; 
  
  #ifdef DEBUG
 -#define debug(fmt, ...) printk(KERN_DEBUG "%s %s:%d: Debug: " fmt "\n", \
 -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
-+#define debug(fmt, ...) printf("%s:%d: Debug: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
++#define debug(fmt, ...) printf("TPMD[%d]: %s:%d: Debug: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  #else
  #define debug(fmt, ...) 
  #endif
@@ -436,12 +334,12 @@
 -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
 -#define alert(fmt, ...) printk(KERN_ALERT "%s %s:%d: Alert: " fmt "\n", \
 -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
-+#define info(fmt, ...)  printf("%s:%d: Info: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
-+#define error(fmt, ...) printf("%s:%d: Error: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
-+#define alert(fmt, ...) printf("%s:%d: Alert: " fmt "\n", \
-+                        __FILE__, __LINE__, ## __VA_ARGS__)
++#define info(fmt, ...)  printf("TPMD[%d]: %s:%d: Info: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
++#define error(fmt, ...) printf("TPMD[%d]: %s:%d: Error: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
++#define alert(fmt, ...) printf("TPMD[%d]: %s:%d: Alert: " fmt "\n", \
++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  
  /* memory allocation */
  
@@ -465,7 +363,7 @@
  static inline void tpm_get_random_bytes(void *buf, int nbytes)
  {
    get_random_bytes(buf, nbytes);
-@@ -84,9 +93,9 @@ uint64_t tpm_get_ticks(void);
+@@ -86,9 +96,9 @@ uint64_t tpm_get_ticks(void);
  #define CPU_TO_LE16(x) __cpu_to_le16(x)
  
  #define BE64_TO_CPU(x) __be64_to_cpu(x)
@@ -477,9 +375,116 @@
  #define BE16_TO_CPU(x) __be16_to_cpu(x)
  #define LE16_TO_CPU(x) __le16_to_cpu(x)
  
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_audit.c vtpm/tpm/tpm_audit.c
---- orig/tpm_emulator-0.2/tpm/tpm_audit.c      2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_audit.c       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/Makefile vtpm/Makefile
+--- orig/tpm_emulator-0.2-x86_64/Makefile      2005-09-15 19:21:14.845078568 
-0700
++++ vtpm/Makefile      2005-09-14 20:27:22.000000000 -0700
+@@ -1,22 +1,31 @@
+ # Software-Based Trusted Platform Module (TPM) Emulator for Linux
+ # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>
++# Copyright (C) 2005 INTEL Corp.
+ #
+ # $Id: Makefile 10 2005-04-26 20:59:50Z mast $
+ 
+-# kernel settings
+-KERNEL_RELEASE := $(shell uname -r)
+-KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
+-MOD_SUBDIR     := misc
+ COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
+ 
+ # module settings
+-MODULE_NAME    := tpm_emulator
++BIN            := vtpmd
+ VERSION_MAJOR  := 0
+ VERSION_MINOR  := 2
+ VERSION_BUILD  := $(shell date +"%s")
+ 
+-# enable/disable DEBUG messages
+-EXTRA_CFLAGS   += -DDEBUG -g  
++# Installation program and options
++INSTALL         = install
++INSTALL_PROG    = $(INSTALL) -m0755
++INSTALL_DIR     = $(INSTALL) -d -m0755
++
++# Xen tools installation directory
++TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
++
++CC      := gcc
++CFLAGS  += -g -Wall $(INCLUDE) -DDEBUG
++CFLAGS  += -I. -Itpm
++
++# Is the simulator running in it's own vm?
++#CFLAGS += -DVTPM_MULTI_VM
+ 
+ ifeq ($(COMPILE_ARCH),x86_64)
+ LIBDIR = lib64
+@@ -34,38 +43,31 @@ DIRS           := . crypto tpm 
+ SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
+ OBJS           := $(patsubst %.c, %.o, $(SRCS))
+ SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
+-DISTSRC        := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS)
+-DISTDIR        := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR)
+ 
+-obj-m               := $(MODULE_NAME).o
+-$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
++obj-m               := $(BIN)
++$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
+ 
+ EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
+ 
+ # do not print "Entering directory ..."
+ MAKEFLAGS      += --no-print-directory
+ 
+-all:  $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version
+-      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
++all: $(BIN)
++
++$(BIN):       $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version $(SRCS) 
$(OBJS)
++      $(CC) $(CFLAGS) $(OBJS) $(src)/crypto/libgmp.a -o $(BIN)
++
++%.o: %.c
++      $(CC) $(CFLAGS) -c $< -o $@
+ 
+ install:
+-      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install
+-      test -d /var/tpm || mkdir /var/tpm
+-      test -c /dev/tpm || mknod /dev/tpm c 10 224
+-      chmod 666 /dev/tpm
+-      depmod -a
++      $(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR)
+ 
+ clean:
+-      @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean
+-      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a
++      rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a $(OBJS)
+ 
+-dist: $(DISTSRC)
+-      rm -rf $(DISTDIR)
+-      mkdir $(DISTDIR)
+-      cp --parents $(DISTSRC) $(DISTDIR)/
+-      rm -f $(DISTDIR)/crypto/gmp.h 
+-      tar -chzf $(DISTDIR).tar.gz $(DISTDIR)
+-      rm -rf $(DISTDIR)
++mrproper: clean
++      rm -f $(BIN) tpm_version.h
+ 
+ $(src)/crypto/libgmp.a:
+       test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) 
$(src)/crypto/libgmp.a
+diff -uprN orig/tpm_emulator-0.2-x86_64/README vtpm/README
+--- orig/tpm_emulator-0.2-x86_64/README        2005-08-15 00:58:57.000000000 
-0700
++++ vtpm/README        2005-09-14 20:27:22.000000000 -0700
+@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
+ Copyright
+ --------------------------------------------------------------------------
+ Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> and Swiss Federal 
+-Institute of Technology (ETH) Zurich.
++                   Institute of Technology (ETH) Zurich.
++Copyright (C) 2005 INTEL Corp 
+               
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_audit.c vtpm/tpm/tpm_audit.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_audit.c       2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_audit.c       2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -542,9 +547,9 @@
    return TPM_SUCCESS;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_authorization.c 
vtpm/tpm/tpm_authorization.c
---- orig/tpm_emulator-0.2/tpm/tpm_authorization.c      2005-08-17 
10:58:36.000000000 -0700
-+++ vtpm/tpm/tpm_authorization.c       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_authorization.c 
vtpm/tpm/tpm_authorization.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_authorization.c       2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_authorization.c       2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -568,9 +573,9 @@
  }
 -
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_capability.c vtpm/tpm/tpm_capability.c
---- orig/tpm_emulator-0.2/tpm/tpm_capability.c 2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_capability.c  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_capability.c 
vtpm/tpm/tpm_capability.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_capability.c  2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_capability.c  2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -593,9 +598,9 @@
    }
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c 
vtpm/tpm/tpm_cmd_handler.c
---- orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c        2005-08-17 
10:58:36.000000000 -0700
-+++ vtpm/tpm/tpm_cmd_handler.c 2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_cmd_handler.c 
vtpm/tpm/tpm_cmd_handler.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_cmd_handler.c 2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_cmd_handler.c 2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -658,9 +663,9 @@
    return 0;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_crypto.c vtpm/tpm/tpm_crypto.c
---- orig/tpm_emulator-0.2/tpm/tpm_crypto.c     2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_crypto.c      2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c vtpm/tpm/tpm_crypto.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c      2005-09-15 
19:21:14.846078416 -0700
++++ vtpm/tpm/tpm_crypto.c      2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -678,14 +683,14 @@
      memcpy(&buf[30], areaToSign, areaToSignSize);
      if (rsa_sign(&key->key, RSA_SSA_PKCS1_SHA1, 
          buf, areaToSignSize + 30, *sig)) {
-@@ -379,4 +380,3 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
+@@ -383,4 +384,3 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
    }  
    return TPM_SUCCESS;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c vtpm/tpm/tpm_data.c
---- orig/tpm_emulator-0.2/tpm/tpm_data.c       2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_data.c        2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c vtpm/tpm/tpm_data.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c        2005-09-15 
19:21:14.847078264 -0700
++++ vtpm/tpm/tpm_data.c        2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1005,7 +1010,7 @@
  }
  
  #else
-@@ -231,7 +431,6 @@ int tpm_restore_permanent_data(void)
+@@ -232,7 +432,6 @@ int tpm_restore_permanent_data(void)
  
  int tpm_erase_permanent_data(void)
  {
@@ -1014,9 +1019,9 @@
    return res;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_deprecated.c vtpm/tpm/tpm_deprecated.c
---- orig/tpm_emulator-0.2/tpm/tpm_deprecated.c 2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_deprecated.c  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_deprecated.c 
vtpm/tpm/tpm_deprecated.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_deprecated.c  2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_deprecated.c  2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1043,9 +1048,9 @@
                          authContextSize, &contextBlob);
    if (res != TPM_SUCCESS) return res;
    len = *authContextSize;
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_emulator.h vtpm/tpm/tpm_emulator.h
---- orig/tpm_emulator-0.2/tpm/tpm_emulator.h   2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_emulator.h    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_emulator.h 
vtpm/tpm/tpm_emulator.h
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_emulator.h    2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_emulator.h    2005-09-14 20:27:22.000000000 -0700
 @@ -1,5 +1,6 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1063,9 +1068,9 @@
  
  /**
   * tpm_emulator_init - initialises and starts the TPM emulator
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_integrity.c vtpm/tpm/tpm_integrity.c
---- orig/tpm_emulator-0.2/tpm/tpm_integrity.c  2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_integrity.c   2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_integrity.c 
vtpm/tpm/tpm_integrity.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_integrity.c   2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_integrity.c   2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1079,9 +1084,9 @@
    return TPM_SUCCESS;
  }
 -
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_structures.h vtpm/tpm/tpm_structures.h
---- orig/tpm_emulator-0.2/tpm/tpm_structures.h 2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_structures.h  2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_structures.h 
vtpm/tpm/tpm_structures.h
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_structures.h  2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_structures.h  2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1099,9 +1104,9 @@
  #include "crypto/rsa.h"
  
  /*
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_testing.c vtpm/tpm/tpm_testing.c
---- orig/tpm_emulator-0.2/tpm/tpm_testing.c    2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_testing.c     2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_testing.c 
vtpm/tpm/tpm_testing.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_testing.c     2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_testing.c     2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1217,9 +1222,9 @@
    rsa_private_key_t priv_key;
    rsa_public_key_t pub_key;
  
-diff -uprN orig/tpm_emulator-0.2/tpm/tpm_ticks.c vtpm/tpm/tpm_ticks.c
---- orig/tpm_emulator-0.2/tpm/tpm_ticks.c      2005-08-17 10:58:36.000000000 
-0700
-+++ vtpm/tpm/tpm_ticks.c       2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_ticks.c vtpm/tpm/tpm_ticks.c
+--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_ticks.c       2005-08-15 
00:58:57.000000000 -0700
++++ vtpm/tpm/tpm_ticks.c       2005-09-14 20:27:22.000000000 -0700
 @@ -1,6 +1,7 @@
  /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>,
@@ -1302,9 +1307,9 @@
  }
    
  
-diff -uprN orig/tpm_emulator-0.2/tpm/vtpm_manager.h vtpm/tpm/vtpm_manager.h
---- orig/tpm_emulator-0.2/tpm/vtpm_manager.h   1969-12-31 16:00:00.000000000 
-0800
-+++ vtpm/tpm/vtpm_manager.h    2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/vtpm_manager.h 
vtpm/tpm/vtpm_manager.h
+--- orig/tpm_emulator-0.2-x86_64/tpm/vtpm_manager.h    1969-12-31 
16:00:00.000000000 -0800
++++ vtpm/tpm/vtpm_manager.h    2005-09-14 20:27:22.000000000 -0700
 @@ -0,0 +1,126 @@
 +// ===================================================================
 +// 
@@ -1432,9 +1437,9 @@
 +*********************************************************************/
 +
 +#endif //_VTPM_MANAGER_H_
-diff -uprN orig/tpm_emulator-0.2/tpmd.c vtpm/tpmd.c
---- orig/tpm_emulator-0.2/tpmd.c       1969-12-31 16:00:00.000000000 -0800
-+++ vtpm/tpmd.c        2005-08-17 10:55:52.000000000 -0700
+diff -uprN orig/tpm_emulator-0.2-x86_64/tpmd.c vtpm/tpmd.c
+--- orig/tpm_emulator-0.2-x86_64/tpmd.c        1969-12-31 16:00:00.000000000 
-0800
++++ vtpm/tpmd.c        2005-09-15 19:28:55.783005352 -0700
 @@ -0,0 +1,207 @@
 +/* Software-Based Trusted Platform Module (TPM) Emulator for Linux
 + * Copyright (C) 2005 INTEL Corp
@@ -1468,9 +1473,9 @@
 +#else
 + #define GUEST_RX_FIFO_D "/var/vtpm/fifos/guest-to-%d.fifo"
 + #define GUEST_TX_FIFO "/var/vtpm/fifos/guest-from-all.fifo"
++#endif
 +
 + int dmi_id;
-+#endif
 +                                              
 +#define BUFFER_SIZE 2048
 +
@@ -1506,7 +1511,7 @@
 +{
 +  uint8_t in[BUFFER_SIZE], *out, *addressed_out;
 +  uint32_t out_size;
-+  int in_size, written ;
++  int in_size, written;
 +  int i, guest_id=-1;
 + 
 +  int vtpm_tx_fh=-1, vtpm_rx_fh=-1;
@@ -1602,7 +1607,7 @@
 +      written = write(vtpm_tx_fh, ctrl_msg, sizeof(ctrl_msg));
 +
 +      if (written != sizeof(ctrl_msg)) {
-+        printf("ERROR: Part of response not written %d/%d.\n", written, 
sizeof(ctrl_msg));
++        printf("ERROR: Part of response not written %d/%Zu.\n", written, 
sizeof(ctrl_msg));
 +      } else {
 +        printf("Send Ctrl Message confermation\n");
 +      }
@@ -1623,7 +1628,7 @@
 +          printf("%x ", addressed_out[i]);
 +        printf("\n");
 +      } else {
-+        printf("Sent[%d]: ", out_size + sizeof(uint32_t));
++        printf("Sent[%Zu]: ", out_size + sizeof(uint32_t));
 +        for (i=0; i< out_size+ sizeof(uint32_t); i++)
 +          printf("%x ", addressed_out[i]);
 +        printf("\n");
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/README
--- a/tools/vtpm_manager/README Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/README Thu Sep 22 17:42:01 2005
@@ -51,14 +51,24 @@
 DUMMY_BACKEND                -> vtpm_manager listens on /tmp/in.fifo and 
                                 /tmp/out.fifo rather than backend
 
-MANUAL_DM_LAUNCH             -> User must manually launch & kill VTPMs
+MANUAL_DM_LAUNCH             -> Must manually launch & kill VTPMs
 
-USE_FIXED_SRK_AUTH           -> Do not randomly generate a random SRK & Owner 
auth
+WELL_KNOWN_SRK_AUTH          -> Rather than randomly generating the password 
for the SRK,
+                                use a well known value. This is necessary for 
sharing use
+                                of the SRK across applications. Such as VTPM 
and Dom0
+                                measurement software.
+
+WELL_KNOWN_OWNER_AUTH        -> Rather than randomly generating the password 
for the owner,
+                                use a well known value. This is useful for 
debugging and for
+                                poor bios which do not support clearing TPM if 
OwnerAuth is
+                                lost. However this has no protection from 
malicious app
+                                issuing a TPM_OwnerClear to wipe the TPM 
 
 Requirements
 ============
 - xen-unstable 
-- IBM frontend/backend vtpm driver patch
+- vtpm frontend/backend driver patch
+- OpenSSL Library
 
 Single-VM Flow
 ============================
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/Rules.mk
--- a/tools/vtpm_manager/Rules.mk       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/Rules.mk       Thu Sep 22 17:42:01 2005
@@ -57,7 +57,8 @@
 #CFLAGS += -DMANUAL_DM_LAUNCH
 
 # Fixed SRK
-CFLAGS += -DUSE_FIXED_SRK_AUTH
+CFLAGS += -DWELL_KNOWN_SRK_AUTH
+#CFLAGS += -DWELL_KNOWN_OWNER_AUTH
 
 # TPM Hardware Device or TPM Simulator
 #CFLAGS += -DTPM_HWDEV
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/crypto/Makefile
--- a/tools/vtpm_manager/crypto/Makefile        Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/crypto/Makefile        Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
+       rm -f *~
 
 $(BIN): $(OBJS)
        $(AR) rcs $(BIN) $(OBJS)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/Makefile
--- a/tools/vtpm_manager/manager/Makefile       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/Makefile       Thu Sep 22 17:42:01 2005
@@ -17,7 +17,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
-       rm -f $(BIN)
+       rm -f $(BIN) *~
 
 $(BIN): $(OBJS)
        $(CC) $(LDFLAGS) $^ $(LIBS) -o $@
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/dmictl.c
--- a/tools/vtpm_manager/manager/dmictl.c       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/dmictl.c       Thu Sep 22 17:42:01 2005
@@ -1,339 +1,344 @@
-// ===================================================================
-// 
-// Copyright (c) 2005, Intel Corp.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions 
-// are met:
-//
-//   * Redistributions of source code must retain the above copyright 
-//     notice, this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above 
-//     copyright notice, this list of conditions and the following 
-//     disclaimer in the documentation and/or other materials provided 
-//     with the distribution.
-//   * Neither the name of Intel Corporation nor the names of its 
-//     contributors may be used to endorse or promote products derived
-//     from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-// ===================================================================
-// 
-//   dmictl.c
-// 
-//     Functions for creating and destroying DMIs
-//
-// ==================================================================
-
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-
-#ifndef VTPM_MUTLI_VM
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <fcntl.h>
- #include <signal.h>
- #include <wait.h>
-#endif
-
-#include "vtpmpriv.h"
-#include "bsg.h"
-#include "buffer.h"
-#include "log.h"
-#include "hashtable.h"
-#include "hashtable_itr.h"
-
-#define TPM_EMULATOR_PATH "/usr/bin/vtpmd"
-
-TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res) {
-       TPM_RESULT status = TPM_FAIL;
-       
-       if (dmi_res == NULL) 
-               return TPM_SUCCESS;
-       
-       status = TCS_CloseContext(dmi_res->TCSContext);
-       free ( dmi_res->NVMLocation );
-       dmi_res->connected = FALSE;
-
-#ifndef VTPM_MULTI_VM  
-       free(dmi_res->guest_tx_fname);
-       free(dmi_res->vtpm_tx_fname);
-               
-       close(dmi_res->guest_tx_fh); dmi_res->guest_tx_fh = -1;
-       close(dmi_res->vtpm_tx_fh);  dmi_res->vtpm_tx_fh = -1; 
-       
-               
- #ifndef MANUAL_DM_LAUNCH
-  if (dmi_res->dmi_id != VTPM_CTL_DM) {
-    if (dmi_res->dmi_pid != 0) {
-      vtpmloginfo(VTPM_LOG_VTPM, "Killing dmi on pid %d.\n", dmi_res->dmi_pid);
-      if ((kill(dmi_res->dmi_pid, SIGKILL) !=0) ||
-         (waitpid(dmi_res->dmi_pid, NULL, 0) != dmi_res->dmi_pid)){
-        vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi on pid %d.\n", 
dmi_res->dmi_pid);
-        status = TPM_FAIL;
-      }
-    } else 
-      vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi because it's pid was 
0.\n");
-  }
- #endif
-#endif
-
-       return status;
-}
-       
-TPM_RESULT VTPM_Handle_New_DMI( const buffer_t *param_buf) {
-  
-  VTPM_DMI_RESOURCE *new_dmi=NULL;
-  TPM_RESULT status=TPM_FAIL;
-  BYTE type;
-  UINT32 dmi_id, domain_id, *dmi_id_key; 
-  int fh;
-
-#ifndef VTPM_MUTLI_VM
-  char dmi_id_str[11]; // UINT32s are up to 10 digits + NULL
-  struct stat file_info;
-#endif
-  
-  if (param_buf == NULL) { // Assume creation of Dom 0 control
-    type = 0;
-    domain_id = VTPM_CTL_DM;
-    dmi_id = VTPM_CTL_DM;
-  } else if (buffer_len(param_buf) != sizeof(BYTE) + sizeof(UINT32) *2) {
-    vtpmloginfo(VTPM_LOG_VTPM, "New DMI command wrong length: %d.\n", 
buffer_len(param_buf));
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  } else {
-    BSG_UnpackList( param_buf->bytes, 3,
-                   BSG_TYPE_BYTE, &type,
-                   BSG_TYPE_UINT32, &domain_id,
-                   BSG_TYPE_UINT32,  &dmi_id);
-  }
-  
-  new_dmi = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
-  if (new_dmi == NULL) { 
-    vtpmloginfo(VTPM_LOG_VTPM, "Creating new DMI instance %d attached on 
domain %d.\n", dmi_id, domain_id);
-    // Brand New DMI. Initialize the persistent pieces
-    if ((new_dmi = (VTPM_DMI_RESOURCE *) malloc (sizeof(VTPM_DMI_RESOURCE))) 
== NULL) {
-      status = TPM_RESOURCES;
-      goto abort_egress;
-    }
-    memset(new_dmi, 0, sizeof(VTPM_DMI_RESOURCE));
-    new_dmi->dmi_id = dmi_id;
-    new_dmi->connected = FALSE;
-    
-    if ((dmi_id_key = (UINT32 *) malloc (sizeof(UINT32))) == NULL) {
-      status = TPM_RESOURCES;
-      goto abort_egress;
-    }      
-    *dmi_id_key = new_dmi->dmi_id;
-    
-    // install into map
-    if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, new_dmi)){
-      free(new_dmi);
-      free(dmi_id_key);
-      status = TPM_FAIL;
-      goto egress;
-    }
-    
-  } else 
-    vtpmloginfo(VTPM_LOG_VTPM, "Re-attaching DMI instance %d on domain %d 
.\n", dmi_id, domain_id);
-  
-  if (new_dmi->connected) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Attempt to re-attach, currently attached 
instance %d. Ignoring\n", dmi_id);
-    status = TPM_BAD_PARAMETER;
-    goto egress;
-  }
-  
-  // Initialize the Non-persistent pieces
-  new_dmi->dmi_domain_id = domain_id;
-  new_dmi->NVMLocation = NULL;
-  
-  new_dmi->TCSContext = 0;
-  TPMTRYRETURN( TCS_OpenContext(&new_dmi->TCSContext) );
-  
-  new_dmi->NVMLocation = (char *) malloc(11 + strlen(DMI_NVM_FILE));
-  sprintf(new_dmi->NVMLocation, DMI_NVM_FILE, (uint32_t) new_dmi->dmi_id);
-  
-  // Measure DMI
-  // FIXME: This will measure DMI. Until then use a fixed DMI_Measurement value
-  /*
-  fh = open(TPM_EMULATOR_PATH, O_RDONLY);
-  stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
-    dmi_size = file_stat.st_size;
-  else {
-       vtpmlogerror(VTPM_LOG_VTPM, "Could not open tpm_emulator!!\n");
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  dmi_buffer
-  */
-  memset(&new_dmi->DMI_measurement, 0xcc, sizeof(TPM_DIGEST));
-  
-#ifndef VTPM_MULTI_VM
-  if (dmi_id != VTPM_CTL_DM) {
-    // Create a pair of fifo pipes
-               if( (new_dmi->guest_tx_fname = (char *) malloc(11 + 
strlen(GUEST_TX_FIFO))) == NULL){ 
-                       status = TPM_RESOURCES;
-                       goto abort_egress;
-               }
-               sprintf(new_dmi->guest_tx_fname, GUEST_TX_FIFO, (uint32_t) 
dmi_id);
-    
-               if ((new_dmi->vtpm_tx_fname = (char *) malloc(11 + 
strlen(VTPM_TX_FIFO))) == NULL) {
-                       status = TPM_RESOURCES;
-                       goto abort_egress;
-               }
-               sprintf(new_dmi->vtpm_tx_fname, VTPM_TX_FIFO, (uint32_t) 
dmi_id);
-    
-    new_dmi->guest_tx_fh = -1;
-    new_dmi->vtpm_tx_fh= -1;
-    
-    if ( stat(new_dmi->guest_tx_fname, &file_info) == -1) {
-      if ( mkfifo(new_dmi->guest_tx_fname, S_IWUSR | S_IRUSR ) ){
-                               status = TPM_FAIL;
-                               goto abort_egress;
-      }
-    }
-            
-    if ( (fh = open(new_dmi->vtpm_tx_fname, O_RDWR)) == -1) {
-      if ( mkfifo(new_dmi->vtpm_tx_fname, S_IWUSR | S_IRUSR ) ) {
-       status = TPM_FAIL;
-       goto abort_egress;
-      }
-    }
-                
-    // Launch DMI
-    sprintf(dmi_id_str, "%d", (int) dmi_id);
-#ifdef MANUAL_DM_LAUNCH
-    vtpmlogerror(VTPM_LOG_VTPM, "FAKING starting vtpm with dmi=%s\n", 
dmi_id_str);
-    new_dmi->dmi_pid = 0;
-#else
-    pid_t pid = fork();
-    
-    if (pid == -1) {
-                       vtpmlogerror(VTPM_LOG_VTPM, "Could not fork to launch 
vtpm\n");
-                 status = TPM_RESOURCES;
-      goto abort_egress;
-               } else if (pid == 0) {
-                 if ( stat(new_dmi->NVMLocation, &file_info) == -1)
-                               execl (TPM_EMULATOR_PATH, "vtmpd", "clear", 
dmi_id_str, NULL);
-                       else 
-                               execl (TPM_EMULATOR_PATH, "vtpmd", "save", 
dmi_id_str, NULL);
-                       
-                       // Returning from these at all is an error.
-                       vtpmlogerror(VTPM_LOG_VTPM, "Could not exec to launch 
vtpm\n");
-    } else {
-      new_dmi->dmi_pid = pid;
-      vtpmloginfo(VTPM_LOG_VTPM, "Launching DMI on PID = %d\n", pid);
-    }
-#endif // MANUAL_DM_LAUNCH
-  }
-#else // VTPM_MUTLI_VM
-  // FIXME: Measure DMI through call to Measurement agent in platform.
-#endif 
-       
-  vtpm_globals->DMI_table_dirty = TRUE;
-  new_dmi->connected = TRUE;  
-  status=TPM_SUCCESS;
-  goto egress;
-  
- abort_egress:
-       close_dmi( new_dmi );
-       
- egress:
-  return status;
-}
-
-TPM_RESULT VTPM_Handle_Close_DMI( const buffer_t *param_buf) {
-  
-  TPM_RESULT status=TPM_FAIL;
-  VTPM_DMI_RESOURCE *dmi_res=NULL;
-  UINT32 dmi_id;
-  
-  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  BSG_UnpackList( param_buf->bytes, 1,
-                 BSG_TYPE_UINT32, &dmi_id);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Closing DMI %d.\n", dmi_id);
-  
-  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
-  if (dmi_res == NULL ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Trying to close nonexistent DMI.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-       
-       if (!dmi_res->connected) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-connected DMI.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  // Close Dmi
-       TPMTRYRETURN(close_dmi( dmi_res ));
-  
-  status=TPM_SUCCESS;    
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  return status;
-}
-
-TPM_RESULT VTPM_Handle_Delete_DMI( const buffer_t *param_buf) {
-  
-  TPM_RESULT status=TPM_FAIL;
-  VTPM_DMI_RESOURCE *dmi_res=NULL;
-  UINT32 dmi_id;
-    
-  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-  BSG_UnpackList( param_buf->bytes, 1,
-                 BSG_TYPE_UINT32, &dmi_id);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Deleting DMI %d.\n", dmi_id);    
-  
-  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_remove(vtpm_globals->dmi_map, 
&dmi_id);
-  if (dmi_res == NULL) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-existent DMI.\n");
-    status = TPM_BAD_PARAMETER;
-    goto abort_egress;
-  }
-  
-       //TODO: Automatically delete file dmi_res->NVMLocation
-  
-  // Close DMI first
-  TPMTRYRETURN(close_dmi( dmi_res ));
-       free ( dmi_res );
-       
-  status=TPM_SUCCESS;    
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  return status;
-}
+// ===================================================================
+// 
+// Copyright (c) 2005, Intel Corp.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without 
+// modification, are permitted provided that the following conditions 
+// are met:
+//
+//   * Redistributions of source code must retain the above copyright 
+//     notice, this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above 
+//     copyright notice, this list of conditions and the following 
+//     disclaimer in the documentation and/or other materials provided 
+//     with the distribution.
+//   * Neither the name of Intel Corporation nor the names of its 
+//     contributors may be used to endorse or promote products derived
+//     from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+// ===================================================================
+// 
+//   dmictl.c
+// 
+//     Functions for creating and destroying DMIs
+//
+// ==================================================================
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+
+#ifndef VTPM_MUTLI_VM
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <signal.h>
+ #include <wait.h>
+#endif
+
+#include "vtpmpriv.h"
+#include "bsg.h"
+#include "buffer.h"
+#include "log.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+
+#define TPM_EMULATOR_PATH "/usr/bin/vtpmd"
+
+TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res) {
+  TPM_RESULT status = TPM_FAIL;
+  
+  if (dmi_res == NULL) 
+    return TPM_SUCCESS;
+
+  status = TCS_CloseContext(dmi_res->TCSContext);
+  free ( dmi_res->NVMLocation );
+  dmi_res->connected = FALSE;
+
+#ifndef VTPM_MULTI_VM  
+  free(dmi_res->guest_tx_fname);
+  free(dmi_res->vtpm_tx_fname);
+         
+  close(dmi_res->guest_tx_fh); dmi_res->guest_tx_fh = -1;
+  close(dmi_res->vtpm_tx_fh);  dmi_res->vtpm_tx_fh = -1; 
+               
+ #ifndef MANUAL_DM_LAUNCH
+  if (dmi_res->dmi_id != VTPM_CTL_DM) {
+    if (dmi_res->dmi_pid != 0) {
+      vtpmloginfo(VTPM_LOG_VTPM, "Killing dmi on pid %d.\n", dmi_res->dmi_pid);
+      if (kill(dmi_res->dmi_pid, SIGKILL) !=0) {
+        vtpmloginfo(VTPM_LOG_VTPM, "DMI on pid %d is already dead.\n", 
dmi_res->dmi_pid);
+      } else if (waitpid(dmi_res->dmi_pid, NULL, 0) != dmi_res->dmi_pid) {
+        vtpmlogerror(VTPM_LOG_VTPM, "DMI on pid %d failed to stop.\n", 
dmi_res->dmi_pid);
+        status = TPM_FAIL;
+      }
+    } else { 
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi because it's pid was 
0.\n");
+      status = TPM_FAIL;
+    }
+  }
+ #endif
+#endif
+
+  return status;
+}
+       
+TPM_RESULT VTPM_Handle_New_DMI( const buffer_t *param_buf) {
+  
+  VTPM_DMI_RESOURCE *new_dmi=NULL;
+  TPM_RESULT status=TPM_FAIL;
+  BYTE type;
+  UINT32 dmi_id, domain_id, *dmi_id_key; 
+
+#ifndef VTPM_MULTI_VM
+  int fh;
+  char dmi_id_str[11]; // UINT32s are up to 10 digits + NULL
+  struct stat file_info;
+#endif
+  
+  if (param_buf == NULL) { // Assume creation of Dom 0 control
+    type = 0;
+    domain_id = VTPM_CTL_DM;
+    dmi_id = VTPM_CTL_DM;
+  } else if (buffer_len(param_buf) != sizeof(BYTE) + sizeof(UINT32) *2) {
+    vtpmloginfo(VTPM_LOG_VTPM, "New DMI command wrong length: %d.\n", 
buffer_len(param_buf));
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  } else {
+    BSG_UnpackList( param_buf->bytes, 3,
+                   BSG_TYPE_BYTE, &type,
+                   BSG_TYPE_UINT32, &domain_id,
+                   BSG_TYPE_UINT32,  &dmi_id);
+  }
+  
+  new_dmi = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
+  if (new_dmi == NULL) { 
+    vtpmloginfo(VTPM_LOG_VTPM, "Creating new DMI instance %d attached on 
domain %d.\n", dmi_id, domain_id);
+    // Brand New DMI. Initialize the persistent pieces
+    if ((new_dmi = (VTPM_DMI_RESOURCE *) malloc (sizeof(VTPM_DMI_RESOURCE))) 
== NULL) {
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }
+    memset(new_dmi, 0, sizeof(VTPM_DMI_RESOURCE));
+    new_dmi->dmi_id = dmi_id;
+    new_dmi->connected = FALSE;
+    
+    if ((dmi_id_key = (UINT32 *) malloc (sizeof(UINT32))) == NULL) {
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }      
+    *dmi_id_key = new_dmi->dmi_id;
+    
+    // install into map
+    if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, new_dmi)){
+      free(new_dmi);
+      free(dmi_id_key);
+      status = TPM_FAIL;
+      goto egress;
+    }
+    
+  } else 
+    vtpmloginfo(VTPM_LOG_VTPM, "Re-attaching DMI instance %d on domain %d 
.\n", dmi_id, domain_id);
+  
+  if (new_dmi->connected) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Attempt to re-attach, currently attached 
instance %d. Ignoring\n", dmi_id);
+    status = TPM_BAD_PARAMETER;
+    goto egress;
+  }
+  
+  // Initialize the Non-persistent pieces
+  new_dmi->dmi_domain_id = domain_id;
+  new_dmi->NVMLocation = NULL;
+  
+  new_dmi->TCSContext = 0;
+  TPMTRYRETURN( TCS_OpenContext(&new_dmi->TCSContext) );
+  
+  new_dmi->NVMLocation = (char *) malloc(11 + strlen(DMI_NVM_FILE));
+  sprintf(new_dmi->NVMLocation, DMI_NVM_FILE, (uint32_t) new_dmi->dmi_id);
+  
+  // Measure DMI
+  // FIXME: This will measure DMI. Until then use a fixed DMI_Measurement value
+  /*
+  fh = open(TPM_EMULATOR_PATH, O_RDONLY);
+  stat_ret = fstat(fh, &file_stat);
+  if (stat_ret == 0) 
+    dmi_size = file_stat.st_size;
+  else {
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not open tpm_emulator!!\n");
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  dmi_buffer
+  */
+  memset(&new_dmi->DMI_measurement, 0xcc, sizeof(TPM_DIGEST));
+  
+#ifndef VTPM_MULTI_VM
+  if (dmi_id != VTPM_CTL_DM) {
+    // Create a pair of fifo pipes
+    if( (new_dmi->guest_tx_fname = (char *) malloc(11 + 
strlen(GUEST_TX_FIFO))) == NULL){ 
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }
+    sprintf(new_dmi->guest_tx_fname, GUEST_TX_FIFO, (uint32_t) dmi_id);
+    
+    if ((new_dmi->vtpm_tx_fname = (char *) malloc(11 + strlen(VTPM_TX_FIFO))) 
== NULL) {
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    }
+    sprintf(new_dmi->vtpm_tx_fname, VTPM_TX_FIFO, (uint32_t) dmi_id);
+    
+    new_dmi->guest_tx_fh = -1;
+    new_dmi->vtpm_tx_fh= -1;
+    
+    if ( stat(new_dmi->guest_tx_fname, &file_info) == -1) {
+      if ( mkfifo(new_dmi->guest_tx_fname, S_IWUSR | S_IRUSR ) ){
+       vtpmlogerror(VTPM_LOG_VTPM, "Failed to create dmi fifo.\n");
+       status = TPM_IOERROR;
+       goto abort_egress;
+      }
+    }
+            
+    if ( (fh = open(new_dmi->vtpm_tx_fname, O_RDWR)) == -1) {
+      if ( mkfifo(new_dmi->vtpm_tx_fname, S_IWUSR | S_IRUSR ) ) {
+       vtpmlogerror(VTPM_LOG_VTPM, "Failed to create dmi fifo.\n");
+       status = TPM_IOERROR;
+       goto abort_egress;
+      }
+    }
+                
+    // Launch DMI
+    sprintf(dmi_id_str, "%d", (int) dmi_id);
+#ifdef MANUAL_DM_LAUNCH
+    vtpmlogerror(VTPM_LOG_VTPM, "FAKING starting vtpm with dmi=%s\n", 
dmi_id_str);
+    new_dmi->dmi_pid = 0;
+#else
+    pid_t pid = fork();
+    
+    if (pid == -1) {
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not fork to launch vtpm\n");
+      status = TPM_RESOURCES;
+      goto abort_egress;
+    } else if (pid == 0) {
+      if ( stat(new_dmi->NVMLocation, &file_info) == -1)
+       execl (TPM_EMULATOR_PATH, "vtmpd", "clear", dmi_id_str, NULL);
+      else 
+       execl (TPM_EMULATOR_PATH, "vtpmd", "save", dmi_id_str, NULL);
+                       
+      // Returning from these at all is an error.
+      vtpmlogerror(VTPM_LOG_VTPM, "Could not exec to launch vtpm\n");
+    } else {
+      new_dmi->dmi_pid = pid;
+      vtpmloginfo(VTPM_LOG_VTPM, "Launching DMI on PID = %d\n", pid);
+    }
+#endif // MANUAL_DM_LAUNCH
+  }
+#else // VTPM_MUTLI_VM
+  // FIXME: Measure DMI through call to Measurement agent in platform.
+#endif 
+       
+  vtpm_globals->DMI_table_dirty = TRUE;
+  new_dmi->connected = TRUE;  
+  status=TPM_SUCCESS;
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to create DMI id=%d due to status=%s. 
Cleaning.\n", dmi_id, tpm_get_error_name(status));
+  close_dmi( new_dmi );
+       
+ egress:
+  return status;
+}
+
+TPM_RESULT VTPM_Handle_Close_DMI( const buffer_t *param_buf) {
+  
+  TPM_RESULT status=TPM_FAIL;
+  VTPM_DMI_RESOURCE *dmi_res=NULL;
+  UINT32 dmi_id;
+  
+  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+  BSG_UnpackList( param_buf->bytes, 1,
+                 BSG_TYPE_UINT32, &dmi_id);
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Closing DMI %d.\n", dmi_id);
+  
+  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi_id);
+  if (dmi_res == NULL ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Trying to close nonexistent DMI.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+       
+       if (!dmi_res->connected) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-connected DMI.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+  // Close Dmi
+       TPMTRYRETURN(close_dmi( dmi_res ));
+  
+  status=TPM_SUCCESS;    
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  return status;
+}
+
+TPM_RESULT VTPM_Handle_Delete_DMI( const buffer_t *param_buf) {
+  
+  TPM_RESULT status=TPM_FAIL;
+  VTPM_DMI_RESOURCE *dmi_res=NULL;
+  UINT32 dmi_id;
+    
+  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+  BSG_UnpackList( param_buf->bytes, 1,
+                 BSG_TYPE_UINT32, &dmi_id);
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Deleting DMI %d.\n", dmi_id);    
+  
+  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_remove(vtpm_globals->dmi_map, 
&dmi_id);
+  if (dmi_res == NULL) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-existent DMI.\n");
+    status = TPM_BAD_PARAMETER;
+    goto abort_egress;
+  }
+  
+       //TODO: Automatically delete file dmi_res->NVMLocation
+  
+  // Close DMI first
+  TPMTRYRETURN(close_dmi( dmi_res ));
+       free ( dmi_res );
+       
+  status=TPM_SUCCESS;    
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  return status;
+}
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/securestorage.c
--- a/tools/vtpm_manager/manager/securestorage.c        Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/securestorage.c        Thu Sep 22 17:42:01 2005
@@ -1,401 +1,401 @@
-// ===================================================================
-// 
-// Copyright (c) 2005, Intel Corp.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions 
-// are met:
-//
-//   * Redistributions of source code must retain the above copyright 
-//     notice, this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above 
-//     copyright notice, this list of conditions and the following 
-//     disclaimer in the documentation and/or other materials provided 
-//     with the distribution.
-//   * Neither the name of Intel Corporation nor the names of its 
-//     contributors may be used to endorse or promote products derived
-//     from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-// ===================================================================
-// 
-// securestorage.c
-// 
-//  Functions regarding securely storing DMI secrets.
-//
-// ==================================================================
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "tcg.h"
-#include "vtpm_manager.h"
-#include "vtpmpriv.h"
-#include "vtsp.h"
-#include "bsg.h"
-#include "crypto.h"
-#include "hashtable.h"
-#include "hashtable_itr.h"
-#include "buffer.h"
-#include "log.h"
-
-TPM_RESULT VTPM_Handle_Save_NVM(VTPM_DMI_RESOURCE *myDMI, 
-                               const buffer_t *inbuf, 
-                               buffer_t *outbuf) {
-  
-  TPM_RESULT status = TPM_SUCCESS;
-  symkey_t    symkey;
-  buffer_t    state_cipher = NULL_BUF,
-              symkey_cipher = NULL_BUF;
-  int fh;
-  long bytes_written;
-  BYTE *sealed_NVM=NULL;
-  UINT32 sealed_NVM_size, i;
-  struct pack_constbuf_t symkey_cipher32, state_cipher32;
-  
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Save_NVMing[%d]: 0x", buffer_len(inbuf));
-  for (i=0; i< buffer_len(inbuf); i++)
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
-  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-  
-  // Generate a sym key and encrypt state with it
-  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_genkey (&symkey) );
-  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_encrypt (&symkey, inbuf, 
&state_cipher) );
-  
-  // Encrypt symmetric key
-  TPMTRYRETURN( VTSP_Bind(    &vtpm_globals->storageKey, 
-                             &symkey.key, 
-                             &symkey_cipher) );
-  
-  // Create output blob: symkey_size + symkey_cipher + state_cipher_size + 
state_cipher
-  
-  symkey_cipher32.size = buffer_len(&symkey_cipher);
-  symkey_cipher32.data = symkey_cipher.bytes;
-  
-  state_cipher32.size = buffer_len(&state_cipher);
-  state_cipher32.data = state_cipher.bytes;
-  
-  sealed_NVM = (BYTE *) malloc( 2 * sizeof(UINT32) + symkey_cipher32.size + 
state_cipher32.size);
-  
-  sealed_NVM_size = BSG_PackList(sealed_NVM, 2,
-                                BSG_TPM_SIZE32_DATA, &symkey_cipher32,
-                                BSG_TPM_SIZE32_DATA, &state_cipher32);
-  
-  // Mark DMI Table so new save state info will get pushed to disk on return.
-  vtpm_globals->DMI_table_dirty = TRUE;
-  
-  // Write sealed blob off disk from NVMLocation
-  // TODO: How to properly return from these. Do we care if we return failure
-  //       after writing the file? We can't get the old one back.
-  // TODO: Backup old file and try and recover that way.
-  fh = open(myDMI->NVMLocation, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
-  if ( (bytes_written = write(fh, sealed_NVM, sealed_NVM_size) ) != (long) 
sealed_NVM_size) {
-    vtpmlogerror(VTPM_LOG_VTPM, "We just overwrote a DMI_NVM and failed to 
finish. %ld/%ld bytes.\n", bytes_written, (long)sealed_NVM_size);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  close(fh);
-  
-  Crypto_SHA1Full (sealed_NVM, sealed_NVM_size, (BYTE *) 
&myDMI->NVM_measurement);   
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of 
E(NVM)\n", buffer_len(&symkey_cipher), buffer_len(&state_cipher));
-  goto egress;
-  
- abort_egress:
-  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
-  
- egress:
-  
-  buffer_free ( &state_cipher);
-  buffer_free ( &symkey_cipher);
-  free(sealed_NVM);
-  Crypto_symcrypto_freekey (&symkey);
-  
-  return status;
-}
-
-
-/* inbuf = null outbuf = sealed blob size, sealed blob.*/
-TPM_RESULT VTPM_Handle_Load_NVM(VTPM_DMI_RESOURCE *myDMI, 
-                               const buffer_t *inbuf, 
-                               buffer_t *outbuf) {
-  
-  TPM_RESULT status = TPM_SUCCESS;
-  symkey_t    symkey;
-  buffer_t    state_cipher = NULL_BUF, 
-              symkey_clear = NULL_BUF, 
-              symkey_cipher = NULL_BUF;
-  struct pack_buf_t symkey_cipher32, state_cipher32;
-  
-  UINT32 sealed_NVM_size;
-  BYTE *sealed_NVM = NULL;
-  long fh_size;
-  int fh, stat_ret, i;
-  struct stat file_stat;
-  TPM_DIGEST sealedNVMHash;
-  
-  memset(&symkey, 0, sizeof(symkey_t));
-  
-  if (myDMI->NVMLocation == NULL) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to load NVM because the file name 
NULL.\n");
-    status = TPM_AUTHFAIL;
-    goto abort_egress;
-  }
-  
-  //Read sealed blob off disk from NVMLocation
-  fh = open(myDMI->NVMLocation, O_RDONLY);
-  stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
-    fh_size = file_stat.st_size;
-  else {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  sealed_NVM = (BYTE *) malloc(fh_size);
-  if (read(fh, sealed_NVM, fh_size) != fh_size) {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  close(fh);
-  
-  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Load_NVMing[%ld]: 0x", fh_size);
-  for (i=0; i< fh_size; i++)
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_NVM[i]);
-  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-  
-  sealed_NVM_size = BSG_UnpackList(sealed_NVM, 2,
-                                  BSG_TPM_SIZE32_DATA, &symkey_cipher32,
-                                  BSG_TPM_SIZE32_DATA, &state_cipher32);
-  
-  TPMTRYRETURN( buffer_init_convert (&symkey_cipher, 
-                                    symkey_cipher32.size, 
-                                    symkey_cipher32.data) );
-  
-  TPMTRYRETURN( buffer_init_convert (&state_cipher, 
-                                    state_cipher32.size, 
-                                    state_cipher32.data) );
-  
-  Crypto_SHA1Full(sealed_NVM, sealed_NVM_size, (BYTE *) &sealedNVMHash);    
-  
-  // Verify measurement of sealed blob.
-  if (memcmp(&sealedNVMHash, &myDMI->NVM_measurement, sizeof(TPM_DIGEST)) ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "VTPM LoadNVM NVM measurement check 
failed.\n");
-    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Correct hash: ");
-    for (i=0; i< sizeof(TPM_DIGEST); i++)
-      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
((BYTE*)&myDMI->NVM_measurement)[i]);
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-
-    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Measured hash: ");
-    for (i=0; i< sizeof(TPM_DIGEST); i++)
-      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&sealedNVMHash)[i]);
-    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-    
-    status = TPM_AUTHFAIL;
-    goto abort_egress;
-  }
-  
-  // Decrypt Symmetric Key
-  TPMTRYRETURN( VTSP_Unbind(  myDMI->TCSContext,
-                             vtpm_globals->storageKeyHandle,
-                             &symkey_cipher,
-                             (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
-                             &symkey_clear,
-                             &(vtpm_globals->keyAuth) ) );
-  
-  // create symmetric key using saved bits
-  Crypto_symcrypto_initkey (&symkey, &symkey_clear);
-  
-  // Decrypt State
-  TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &state_cipher, 
outbuf) );
-  
-  goto egress;
-  
- abort_egress:
-  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
-  
- egress:
-  
-  buffer_free ( &state_cipher);
-  buffer_free ( &symkey_clear);
-  buffer_free ( &symkey_cipher);
-  free( sealed_NVM );
-  Crypto_symcrypto_freekey (&symkey);
-  
-  return status;
-}
-
-TPM_RESULT VTPM_SaveService(void) {
-  TPM_RESULT status=TPM_SUCCESS;
-  int fh, dmis=-1;
-  
-  BYTE *flat_global;
-  int flat_global_size, bytes_written;
-  UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
-  struct pack_buf_t storage_key_pack = {storageKeySize, 
vtpm_globals->storageKeyWrap.bytes};
-  
-  struct hashtable_itr *dmi_itr;
-  VTPM_DMI_RESOURCE *dmi_res;
-  
-  UINT32 flat_global_full_size;
-  
-  // Global Values needing to be saved
-  flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
-    sizeof(UINT32) +       // storagekeysize
-    storageKeySize +       // storage key
-    hashtable_count(vtpm_globals->dmi_map) * // num DMIS
-    (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
-  
-  
-  flat_global = (BYTE *) malloc( flat_global_full_size);
-  
-  flat_global_size = BSG_PackList(flat_global, 4,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->owner_usage_auth,
-                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->srk_usage_auth,
-                                 BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                                 BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
-  // Per DMI values to be saved
-  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-    
-    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
-    do {
-      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
-      dmis++;
-
-      // No need to save dmi0.
-      if (dmi_res->dmi_id == 0)        
-       continue;
-      
-      
-      flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
-                                       BSG_TYPE_UINT32, &dmi_res->dmi_id,
-                                       BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
-                                       BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
-      
-    } while (hashtable_iterator_advance(dmi_itr));
-  }
-  
-  //FIXME: Once we have a way to protect a TPM key, we should use it to 
-  //       encrypt this blob. BUT, unless there is a way to ensure the key is
-  //       not used by other apps, this encryption is useless.
-  fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
-  if (fh == -1) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", 
STATE_FILE);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  if ( (bytes_written = write(fh, flat_global, flat_global_size)) != 
flat_global_size ) {
-    vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes 
written.\n", bytes_written, flat_global_size);
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  vtpm_globals->DMI_table_dirty = FALSE; 
-  
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  free(flat_global);
-  close(fh);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = 
%d)\n", (int) status, dmis);
-  return status;
-}
-
-TPM_RESULT VTPM_LoadService(void) {
-  
-  TPM_RESULT status=TPM_SUCCESS;
-  int fh, stat_ret, dmis=0;
-  long fh_size = 0, step_size;
-  BYTE *flat_global=NULL;
-  struct pack_buf_t storage_key_pack;
-  UINT32 *dmi_id_key;
-  
-  VTPM_DMI_RESOURCE *dmi_res;
-  struct stat file_stat;
-  
-  fh = open(STATE_FILE, O_RDONLY );
-  stat_ret = fstat(fh, &file_stat);
-  if (stat_ret == 0) 
-    fh_size = file_stat.st_size;
-  else {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  flat_global = (BYTE *) malloc(fh_size);
-  
-  if ((long) read(fh, flat_global, fh_size) != fh_size ) {
-    status = TPM_IOERROR;
-    goto abort_egress;
-  }
-  
-  // Global Values needing to be saved
-  step_size = BSG_UnpackList( flat_global, 4,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
-                             BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
-                             BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
-                             BSG_TPM_SIZE32_DATA, &storage_key_pack);
-  
-  TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
-  TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, 
storage_key_pack.size, storage_key_pack.data) );
-  
-  // Per DMI values to be saved
-  while ( step_size < fh_size ){
-    if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
-      vtpmlogerror(VTPM_LOG_VTPM, "Encountered %ld extra bytes at end of 
manager state.\n", fh_size-step_size);
-      step_size = fh_size;
-    } else {
-      dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
-      dmis++;
-      
-      dmi_res->connected = FALSE;
-      
-      step_size += BSG_UnpackList(flat_global + step_size, 3,
-                                 BSG_TYPE_UINT32, &dmi_res->dmi_id, 
-                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
-                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
-      
-      // install into map
-      dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
-      *dmi_id_key = dmi_res->dmi_id;
-      if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
-       status = TPM_FAIL;
-       goto abort_egress;
-      }
-      
-    }
-    
-  }
-  
-  goto egress;
-  
- abort_egress:
-  vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data\n");
- egress:
-  
-  if (flat_global)
-    free(flat_global);
-  close(fh);
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Previously saved state reloaded (status = %d, 
dmis = %d).\n", (int) status, dmis);
-  return status;
-}
+// ===================================================================
+// 
+// Copyright (c) 2005, Intel Corp.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without 
+// modification, are permitted provided that the following conditions 
+// are met:
+//
+//   * Redistributions of source code must retain the above copyright 
+//     notice, this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above 
+//     copyright notice, this list of conditions and the following 
+//     disclaimer in the documentation and/or other materials provided 
+//     with the distribution.
+//   * Neither the name of Intel Corporation nor the names of its 
+//     contributors may be used to endorse or promote products derived
+//     from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+// ===================================================================
+// 
+// securestorage.c
+// 
+//  Functions regarding securely storing DMI secrets.
+//
+// ==================================================================
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "tcg.h"
+#include "vtpm_manager.h"
+#include "vtpmpriv.h"
+#include "vtsp.h"
+#include "bsg.h"
+#include "crypto.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+#include "buffer.h"
+#include "log.h"
+
+TPM_RESULT VTPM_Handle_Save_NVM(VTPM_DMI_RESOURCE *myDMI, 
+                               const buffer_t *inbuf, 
+                               buffer_t *outbuf) {
+  
+  TPM_RESULT status = TPM_SUCCESS;
+  symkey_t    symkey;
+  buffer_t    state_cipher = NULL_BUF,
+              symkey_cipher = NULL_BUF;
+  int fh;
+  long bytes_written;
+  BYTE *sealed_NVM=NULL;
+  UINT32 sealed_NVM_size, i;
+  struct pack_constbuf_t symkey_cipher32, state_cipher32;
+  
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Save_NVMing[%d]: 0x", buffer_len(inbuf));
+  for (i=0; i< buffer_len(inbuf); i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+  
+  // Generate a sym key and encrypt state with it
+  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_genkey (&symkey) );
+  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_encrypt (&symkey, inbuf, 
&state_cipher) );
+  
+  // Encrypt symmetric key
+  TPMTRYRETURN( VTSP_Bind(    &vtpm_globals->storageKey, 
+                             &symkey.key, 
+                             &symkey_cipher) );
+  
+  // Create output blob: symkey_size + symkey_cipher + state_cipher_size + 
state_cipher
+  
+  symkey_cipher32.size = buffer_len(&symkey_cipher);
+  symkey_cipher32.data = symkey_cipher.bytes;
+  
+  state_cipher32.size = buffer_len(&state_cipher);
+  state_cipher32.data = state_cipher.bytes;
+  
+  sealed_NVM = (BYTE *) malloc( 2 * sizeof(UINT32) + symkey_cipher32.size + 
state_cipher32.size);
+  
+  sealed_NVM_size = BSG_PackList(sealed_NVM, 2,
+                                BSG_TPM_SIZE32_DATA, &symkey_cipher32,
+                                BSG_TPM_SIZE32_DATA, &state_cipher32);
+  
+  // Mark DMI Table so new save state info will get pushed to disk on return.
+  vtpm_globals->DMI_table_dirty = TRUE;
+  
+  // Write sealed blob off disk from NVMLocation
+  // TODO: How to properly return from these. Do we care if we return failure
+  //       after writing the file? We can't get the old one back.
+  // TODO: Backup old file and try and recover that way.
+  fh = open(myDMI->NVMLocation, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
+  if ( (bytes_written = write(fh, sealed_NVM, sealed_NVM_size) ) != (long) 
sealed_NVM_size) {
+    vtpmlogerror(VTPM_LOG_VTPM, "We just overwrote a DMI_NVM and failed to 
finish. %ld/%ld bytes.\n", bytes_written, (long)sealed_NVM_size);
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  close(fh);
+  
+  Crypto_SHA1Full (sealed_NVM, sealed_NVM_size, (BYTE *) 
&myDMI->NVM_measurement);   
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of 
E(NVM)\n", buffer_len(&symkey_cipher), buffer_len(&state_cipher));
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
+  
+ egress:
+  
+  buffer_free ( &state_cipher);
+  buffer_free ( &symkey_cipher);
+  free(sealed_NVM);
+  Crypto_symcrypto_freekey (&symkey);
+  
+  return status;
+}
+
+
+/* inbuf = null outbuf = sealed blob size, sealed blob.*/
+TPM_RESULT VTPM_Handle_Load_NVM(VTPM_DMI_RESOURCE *myDMI, 
+                               const buffer_t *inbuf, 
+                               buffer_t *outbuf) {
+  
+  TPM_RESULT status = TPM_SUCCESS;
+  symkey_t    symkey;
+  buffer_t    state_cipher = NULL_BUF, 
+              symkey_clear = NULL_BUF, 
+              symkey_cipher = NULL_BUF;
+  struct pack_buf_t symkey_cipher32, state_cipher32;
+  
+  UINT32 sealed_NVM_size;
+  BYTE *sealed_NVM = NULL;
+  long fh_size;
+  int fh, stat_ret, i;
+  struct stat file_stat;
+  TPM_DIGEST sealedNVMHash;
+  
+  memset(&symkey, 0, sizeof(symkey_t));
+  
+  if (myDMI->NVMLocation == NULL) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to load NVM because the file name 
NULL.\n");
+    status = TPM_AUTHFAIL;
+    goto abort_egress;
+  }
+  
+  //Read sealed blob off disk from NVMLocation
+  fh = open(myDMI->NVMLocation, O_RDONLY);
+  stat_ret = fstat(fh, &file_stat);
+  if (stat_ret == 0) 
+    fh_size = file_stat.st_size;
+  else {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  sealed_NVM = (BYTE *) malloc(fh_size);
+  if (read(fh, sealed_NVM, fh_size) != fh_size) {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  close(fh);
+  
+  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Load_NVMing[%ld]: 0x", fh_size);
+  for (i=0; i< fh_size; i++)
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_NVM[i]);
+  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+  
+  sealed_NVM_size = BSG_UnpackList(sealed_NVM, 2,
+                                  BSG_TPM_SIZE32_DATA, &symkey_cipher32,
+                                  BSG_TPM_SIZE32_DATA, &state_cipher32);
+  
+  TPMTRYRETURN( buffer_init_convert (&symkey_cipher, 
+                                    symkey_cipher32.size, 
+                                    symkey_cipher32.data) );
+  
+  TPMTRYRETURN( buffer_init_convert (&state_cipher, 
+                                    state_cipher32.size, 
+                                    state_cipher32.data) );
+  
+  Crypto_SHA1Full(sealed_NVM, sealed_NVM_size, (BYTE *) &sealedNVMHash);    
+  
+  // Verify measurement of sealed blob.
+  if (memcmp(&sealedNVMHash, &myDMI->NVM_measurement, sizeof(TPM_DIGEST)) ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "VTPM LoadNVM NVM measurement check 
failed.\n");
+    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Correct hash: ");
+    for (i=0; i< sizeof(TPM_DIGEST); i++)
+      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
((BYTE*)&myDMI->NVM_measurement)[i]);
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+
+    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Measured hash: ");
+    for (i=0; i< sizeof(TPM_DIGEST); i++)
+      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&sealedNVMHash)[i]);
+    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+    
+    status = TPM_AUTHFAIL;
+    goto abort_egress;
+  }
+  
+  // Decrypt Symmetric Key
+  TPMTRYRETURN( VTSP_Unbind(  myDMI->TCSContext,
+                             vtpm_globals->storageKeyHandle,
+                             &symkey_cipher,
+                             (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
+                             &symkey_clear,
+                             &(vtpm_globals->keyAuth) ) );
+  
+  // create symmetric key using saved bits
+  Crypto_symcrypto_initkey (&symkey, &symkey_clear);
+  
+  // Decrypt State
+  TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &state_cipher, 
outbuf) );
+  
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
+  
+ egress:
+  
+  buffer_free ( &state_cipher);
+  buffer_free ( &symkey_clear);
+  buffer_free ( &symkey_cipher);
+  free( sealed_NVM );
+  Crypto_symcrypto_freekey (&symkey);
+  
+  return status;
+}
+
+TPM_RESULT VTPM_SaveService(void) {
+  TPM_RESULT status=TPM_SUCCESS;
+  int fh, dmis=-1;
+  
+  BYTE *flat_global;
+  int flat_global_size, bytes_written;
+  UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap);
+  struct pack_buf_t storage_key_pack = {storageKeySize, 
vtpm_globals->storageKeyWrap.bytes};
+  
+  struct hashtable_itr *dmi_itr;
+  VTPM_DMI_RESOURCE *dmi_res;
+  
+  UINT32 flat_global_full_size;
+  
+  // Global Values needing to be saved
+  flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths
+    sizeof(UINT32) +       // storagekeysize
+    storageKeySize +       // storage key
+    hashtable_count(vtpm_globals->dmi_map) * // num DMIS
+    (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info
+  
+  
+  flat_global = (BYTE *) malloc( flat_global_full_size);
+  
+  flat_global_size = BSG_PackList(flat_global, 4,
+                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->owner_usage_auth,
+                                 BSG_TPM_AUTHDATA, 
&vtpm_globals->srk_usage_auth,
+                                 BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
+                                 BSG_TPM_SIZE32_DATA, &storage_key_pack);
+  
+  // Per DMI values to be saved
+  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
+    
+    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
+    do {
+      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
+      dmis++;
+
+      // No need to save dmi0.
+      if (dmi_res->dmi_id == 0)        
+       continue;
+      
+      
+      flat_global_size += BSG_PackList( flat_global + flat_global_size, 3,
+                                       BSG_TYPE_UINT32, &dmi_res->dmi_id,
+                                       BSG_TPM_DIGEST, 
&dmi_res->NVM_measurement,
+                                       BSG_TPM_DIGEST, 
&dmi_res->DMI_measurement);
+      
+    } while (hashtable_iterator_advance(dmi_itr));
+  }
+  
+  //FIXME: Once we have a way to protect a TPM key, we should use it to 
+  //       encrypt this blob. BUT, unless there is a way to ensure the key is
+  //       not used by other apps, this encryption is useless.
+  fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
+  if (fh == -1) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", 
STATE_FILE);
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  if ( (bytes_written = write(fh, flat_global, flat_global_size)) != 
flat_global_size ) {
+    vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes 
written.\n", bytes_written, flat_global_size);
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  vtpm_globals->DMI_table_dirty = FALSE; 
+  
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  free(flat_global);
+  close(fh);
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = 
%d)\n", (int) status, dmis);
+  return status;
+}
+
+TPM_RESULT VTPM_LoadService(void) {
+  
+  TPM_RESULT status=TPM_SUCCESS;
+  int fh, stat_ret, dmis=0;
+  long fh_size = 0, step_size;
+  BYTE *flat_global=NULL;
+  struct pack_buf_t storage_key_pack;
+  UINT32 *dmi_id_key;
+  
+  VTPM_DMI_RESOURCE *dmi_res;
+  struct stat file_stat;
+  
+  fh = open(STATE_FILE, O_RDONLY );
+  stat_ret = fstat(fh, &file_stat);
+  if (stat_ret == 0) 
+    fh_size = file_stat.st_size;
+  else {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  flat_global = (BYTE *) malloc(fh_size);
+  
+  if ((long) read(fh, flat_global, fh_size) != fh_size ) {
+    status = TPM_IOERROR;
+    goto abort_egress;
+  }
+  
+  // Global Values needing to be saved
+  step_size = BSG_UnpackList( flat_global, 4,
+                             BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth,
+                             BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth,
+                             BSG_TPM_SECRET,   
&vtpm_globals->storage_key_usage_auth,
+                             BSG_TPM_SIZE32_DATA, &storage_key_pack);
+  
+  TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) );
+  TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, 
storage_key_pack.size, storage_key_pack.data) );
+  
+  // Per DMI values to be saved
+  while ( step_size < fh_size ){
+    if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) {
+      vtpmlogerror(VTPM_LOG_VTPM, "Encountered %ld extra bytes at end of 
manager state.\n", fh_size-step_size);
+      step_size = fh_size;
+    } else {
+      dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE));
+      dmis++;
+      
+      dmi_res->connected = FALSE;
+      
+      step_size += BSG_UnpackList(flat_global + step_size, 3,
+                                 BSG_TYPE_UINT32, &dmi_res->dmi_id, 
+                                 BSG_TPM_DIGEST, &dmi_res->NVM_measurement,
+                                 BSG_TPM_DIGEST, &dmi_res->DMI_measurement);
+      
+      // install into map
+      dmi_id_key = (UINT32 *) malloc (sizeof(UINT32));
+      *dmi_id_key = dmi_res->dmi_id;
+      if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) {
+       status = TPM_FAIL;
+       goto abort_egress;
+      }
+      
+    }
+    
+  }
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Loaded saved state (dmis = %d).\n", dmis);
+  goto egress;
+  
+ abort_egress:
+  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load service data with error = %s\n", 
tpm_get_error_name(status));
+ egress:
+  
+  if (flat_global)
+    free(flat_global);
+  close(fh);
+  
+  return status;
+}
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/vtpm_manager.c
--- a/tools/vtpm_manager/manager/vtpm_manager.c Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/vtpm_manager.c Thu Sep 22 17:42:01 2005
@@ -1,735 +1,811 @@
-// ===================================================================
-// 
-// Copyright (c) 2005, Intel Corp.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without 
-// modification, are permitted provided that the following conditions 
-// are met:
-//
-//   * Redistributions of source code must retain the above copyright 
-//     notice, this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above 
-//     copyright notice, this list of conditions and the following 
-//     disclaimer in the documentation and/or other materials provided 
-//     with the distribution.
-//   * Neither the name of Intel Corporation nor the names of its 
-//     contributors may be used to endorse or promote products derived
-//     from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
-// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
-// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-// OF THE POSSIBILITY OF SUCH DAMAGE.
-// ===================================================================
-// 
-// vtpm_manager.c
-// 
-//  This file will house the main logic of the VTPM Manager
-//
-// ==================================================================
-
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <string.h>
-
-#ifndef VTPM_MULTI_VM
-#include <pthread.h>
-#include <errno.h>
-#include <aio.h>
-#include <time.h>
-#endif
-
-#include "vtpm_manager.h"
-#include "vtpmpriv.h"
-#include "vtsp.h"
-#include "bsg.h"
-#include "hashtable.h"
-#include "hashtable_itr.h"
-
-#include "log.h"
-#include "buffer.h"
-
-VTPM_GLOBALS *vtpm_globals=NULL;
-
-#ifdef VTPM_MULTI_VM
- #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, fmt, 
##args );
- #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
- #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, fmt, 
##args );
-#else 
- #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, "[%d]: " 
fmt, threadType, ##args );
- #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
- #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, "[%d]: 
" fmt, threadType, ##args );
-#endif
-
-// --------------------------- Static Auths --------------------------
-#ifdef USE_FIXED_SRK_AUTH
-
-static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
-                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
-
-static BYTE FIXED_EK_AUTH[20] =  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
-                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
-
-#endif
-                                  
-// -------------------------- Hash table functions --------------------
-
-static unsigned int hashfunc32(void *ky) {
-  return (* (UINT32 *) ky);
-}
-
-static int equals32(void *k1, void *k2) {
-  return (*(UINT32 *) k1 == *(UINT32 *) k2);
-}
-
-// --------------------------- Functions ------------------------------
-
-TPM_RESULT VTPM_Create_Service(){
-  
-  TPM_RESULT status = TPM_SUCCESS;
-  
-  // Generate Auth's for SRK & Owner
-#ifdef USE_FIXED_SRK_AUTH
-  memcpy(vtpm_globals->owner_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
-  memcpy(vtpm_globals->srk_usage_auth, FIXED_EK_AUTH, sizeof(TPM_AUTHDATA));
-#else    
-  Crypto_GetRandom(vtpm_globals->owner_usage_auth, sizeof(TPM_AUTHDATA) );
-  Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );  
-#endif
-  
-  // Take Owership of TPM
-  CRYPTO_INFO ek_cryptoInfo;
-  
-  vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
-  status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
-  
-  // If we can read PubEK then there is no owner and we should take it.
-  if (status == TPM_SUCCESS) { 
-    TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, 
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
-                                   &ek_cryptoInfo,
-                                   &vtpm_globals->keyAuth)); 
-  
-    TPMTRYRETURN(VTSP_DisablePubekRead(vtpm_globals->manager_tcs_handle,
-                                       (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth,  
-                                       &vtpm_globals->keyAuth));     
-  }
-  
-  // Generate storage key's auth
-  Crypto_GetRandom(  &vtpm_globals->storage_key_usage_auth, 
-                    sizeof(TPM_AUTHDATA) );
-  
-  TCS_AUTH osap;
-  TPM_AUTHDATA sharedsecret;
-  
-  TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
-                         TPM_ET_SRK,
-                         0, 
-                         (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
-                         &sharedsecret, 
-                         &osap) ); 
-  
-  TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
-                                   TPM_KEY_BIND,
-                                   (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
-                                   TPM_SRK_KEYHANDLE, 
-                                   (const TPM_AUTHDATA*)&sharedsecret,
-                                   &vtpm_globals->storageKeyWrap,
-                                   &osap) );
-  
-  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
-  
-  goto egress;
-  
- abort_egress:
-  exit(1);
-  
- egress:
-  vtpmloginfo(VTPM_LOG_VTPM, "New VTPM Service initialized (Status = %d).\n", 
status);
-  return status;
-  
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-#ifdef VTPM_MULTI_VM
-int VTPM_Service_Handler(){
-#else
-void *VTPM_Service_Handler(void *threadTypePtr){
-#endif
-  TPM_RESULT      status =  TPM_FAIL; // Should never return
-  UINT32          dmi, in_param_size, cmd_size, out_param_size, 
out_message_size, out_message_size_full, dmi_cmd_size;
-  BYTE            *cmd_header, *in_param, *out_message, *dmi_cmd;
-  buffer_t        *command_buf=NULL, *result_buf=NULL;
-  TPM_TAG         tag;
-  TPM_COMMAND_CODE ord;
-  VTPM_DMI_RESOURCE *dmi_res;
-  int  size_read, size_write, i;
-  
-#ifndef VTPM_MULTI_VM
-  int threadType = *(int *) threadTypePtr;
-  
-  // async io structures
-  struct aiocb dmi_aio;
-  struct aiocb *dmi_aio_a[1];
-  dmi_aio_a[0] = &dmi_aio;
-#endif
-  
-#ifdef DUMMY_BACKEND
-  int dummy_rx;  
-#endif
-  
-  // TODO: Reinsert ifdefs to enable support for MULTI-VM 
-  
-  cmd_header = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV);
-  command_buf = (buffer_t *) malloc(sizeof(buffer_t));
-  result_buf = (buffer_t *) malloc(sizeof(buffer_t));
-  
-#ifndef VTPM_MULTI_VM
-  TPM_RESULT *ret_value = (TPM_RESULT *) malloc(sizeof(TPM_RESULT));
-#endif
-  
-  int *tx_fh, *rx_fh;
-  
-#ifdef VTPM_MULTI_VM
-  rx_fh = &vtpm_globals->be_fh;
-#else
-  if (threadType == BE_LISTENER_THREAD) {
-#ifdef DUMMY_BACKEND    
-    dummy_rx = -1;
-    rx_fh = &dummy_rx;
-#else
-    rx_fh = &vtpm_globals->be_fh;
-#endif
-  } else { // DMI_LISTENER_THREAD
-    rx_fh = &vtpm_globals->vtpm_rx_fh;
-  }
-#endif
-  
-#ifndef VTPM_MULTI_VM
-  int fh;
-  if (threadType == BE_LISTENER_THREAD) {
-    tx_fh = &vtpm_globals->be_fh;
-    if ( (fh = open(GUEST_RX_FIFO, O_RDWR)) == -1) {
-      if ( mkfifo(GUEST_RX_FIFO, S_IWUSR | S_IRUSR ) ){
-                               *ret_value = TPM_FAIL;
-                               pthread_exit(ret_value);
-      }
-    } else 
-      close(fh);
-    
-  } else { // else DMI_LISTENER_THREAD
-    // tx_fh will be set once the DMI is identified
-    // But we need to make sure the read pip is created.
-    if ( (fh = open(VTPM_RX_FIFO, O_RDWR)) == -1) {
-      if ( mkfifo(VTPM_RX_FIFO, S_IWUSR | S_IRUSR ) ){
-       *ret_value = TPM_FAIL;
-       pthread_exit(ret_value);
-      }
-    } else 
-      close(fh);
-    
-  }
-#endif
-  
-  while(1) {
-    
-    if (threadType == BE_LISTENER_THREAD) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for Guest requests & ctrl 
messages.\n");
-    } else 
-      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for DMI messages.\n");
-    
-    
-    if (*rx_fh < 0) {
-      if (threadType == BE_LISTENER_THREAD) 
-#ifdef DUMMY_BACKEND
-       *rx_fh = open("/tmp/in.fifo", O_RDWR);
-#else
-        *rx_fh = open(VTPM_BE_DEV, O_RDWR);
-#endif
-      else  // DMI Listener   
-       *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
-      
-    }
-    
-    if (*rx_fh < 0) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
-#ifdef VTPM_MULTI_VM
-      return TPM_IOERROR; 
-#else
-      *ret_value = TPM_IOERROR;
-      pthread_exit(ret_value);
-#endif
-    }
-    
-    size_read = read(*rx_fh, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
-    if (size_read > 0) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV[%d}: 0x", size_read);
-      for (i=0; i<size_read; i++) 
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
-    } else {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't read from BE. Aborting... \n");
-      close(*rx_fh);
-      *rx_fh = -1;
-      goto abort_command;
-    }
-
-    if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "\n");
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command shorter than normal header 
(%d bytes). Aborting...\n", size_read);
-      goto abort_command;
-    }
-    
-    BSG_UnpackList(cmd_header, 4,
-                  BSG_TYPE_UINT32, &dmi,
-                  BSG_TPM_TAG, &tag,
-                  BSG_TYPE_UINT32, &in_param_size,
-                  BSG_TPM_COMMAND_CODE, &ord );
-    
-    // Note that in_param_size is in the client's context
-    cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
-    if (cmd_size > 0) {
-      in_param = (BYTE *) malloc(cmd_size);
-      size_read = read( *rx_fh, in_param, cmd_size);
-      if (size_read > 0) {
-       for (i=0; i<size_read; i++) 
-         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
-       
-      } else {
-        vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. Aborting... 
\n");
-       close(*rx_fh);
-       *rx_fh = -1;
-       goto abort_command;
-      }
-      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-      
-      if (size_read < (int) cmd_size) {
-       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) is shorter than 
header indicates(%d). Aborting...\n", size_read, cmd_size);
-       goto abort_command;
-      }
-    } else {
-      in_param = NULL;
-      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-    }            
-    
-    if ((threadType != BE_LISTENER_THREAD) && (dmi == 0)) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to access dom0 commands from 
DMI interface. Aborting...\n");
-      goto abort_command;
-    }
-    
-    dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi);
-    if (dmi_res == NULL) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to non-existent DMI 
in domain: %d. Aborting...\n", dmi);
-      goto abort_command;
-    }
-    if (!dmi_res->connected) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to disconnected DMI 
in domain: %d. Aborting...\n", dmi);
-      goto abort_command;
-    }
-    
-    if (threadType != BE_LISTENER_THREAD) 
-      tx_fh = &dmi_res->vtpm_tx_fh;
-    // else we set this before the while loop since it doesn't change.
-    
-    if ( (buffer_init_convert(command_buf, cmd_size, in_param) != TPM_SUCCESS) 
|| 
-        (buffer_init(result_buf, 0, 0) != TPM_SUCCESS) ) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
-      goto abort_command;
-    }
-    
-    // Dispatch it as either control or user request.
-    if (tag == VTPM_TAG_REQ) { 
-      if (dmi_res->dmi_id == VTPM_CTL_DM){ 
-       switch (ord) {
-       case VTPM_ORD_OPEN:
-         status = VTPM_Handle_New_DMI(command_buf);
-         break;
-          
-       case VTPM_ORD_CLOSE:
-         status = VTPM_Handle_Close_DMI(command_buf);
-         break;
-          
-       case VTPM_ORD_DELETE:
-         status = VTPM_Handle_Delete_DMI(command_buf);
-         break;
-       default:
-         status = TPM_BAD_ORDINAL; 
-       } // switch
-      } else {
-       
-       switch (ord) {                
-       case VTPM_ORD_SAVENVM:
-         status= VTPM_Handle_Save_NVM(dmi_res,
-                                      command_buf, 
-                                      result_buf);
-         break;
-       case VTPM_ORD_LOADNVM:
-         status= VTPM_Handle_Load_NVM(dmi_res, 
-                                      command_buf, 
-                                      result_buf);
-         break;
-         
-       case VTPM_ORD_TPMCOMMAND:
-         status= VTPM_Handle_TPM_Command(dmi_res, 
-                                         command_buf, 
-                                         result_buf);
-         break;
-         
-       default:
-         status = TPM_BAD_ORDINAL; 
-       } // switch
-      }
-    } else { // This is not a VTPM Command at all
-      
-      if (threadType == BE_LISTENER_THREAD) {
-       if (dmi == 0) {
-         // This usually indicates a FE/BE driver.
-         vtpmhandlerlogerror(VTPM_LOG_VTPM, "Illegal use of TPM command from 
dom0\n");
-         status = TPM_FAIL;
-       } else {
-         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Forwarding command to DMI.\n");
-         
-         if (dmi_res->guest_tx_fh < 0)
-           dmi_res->guest_tx_fh = open(dmi_res->guest_tx_fname, O_WRONLY | 
O_NONBLOCK);
-          
-         if (dmi_res->guest_tx_fh < 0){
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh to dmi.\n");
-           status = TPM_IOERROR;
-           goto abort_with_error;
-         }        
-          
-         //Note: Send message + dmi_id
-         if (cmd_size) {
-           dmi_cmd = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size);
-           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size;
-           memcpy(dmi_cmd, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
-           memcpy(dmi_cmd + VTPM_COMMAND_HEADER_SIZE_SRV, in_param, cmd_size);
-           size_write = write(dmi_res->guest_tx_fh, dmi_cmd, dmi_cmd_size);
-           
-           if (size_write > 0) {
-             vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT (DMI): 0x");
-             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size; i++) {
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", dmi_cmd[i]);
-             }
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-           } else {
-              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
-             close(dmi_res->guest_tx_fh);
-             dmi_res->guest_tx_fh = -1;
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-           free(dmi_cmd);
-         } else {
-           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV;
-           size_write = write(dmi_res->guest_tx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV );
-           if (size_write > 0) {
-             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV; i++) 
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
-             
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-           } else {
-              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
-             close(dmi_res->guest_tx_fh);
-             dmi_res->guest_tx_fh = -1;
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-         }
-          
-         if (size_write != (int) dmi_cmd_size) 
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Could not write entire command 
to DMI (%d/%d)\n", size_write, dmi_cmd_size);
-         buffer_free(command_buf);
-         
-         if (vtpm_globals->guest_rx_fh < 0) 
-           vtpm_globals->guest_rx_fh = open(GUEST_RX_FIFO, O_RDONLY);
-          
-         if (vtpm_globals->guest_rx_fh < 0){
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh to 
dmi.\n");
-            status = TPM_IOERROR;
-           goto abort_with_error;
-         }                  
-         
-          size_read = read( vtpm_globals->guest_rx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV);
-         if (size_read > 0) {
-           vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV (DMI): 0x");
-           for (i=0; i<size_read; i++) 
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cmd_header[i]);
-           
-         } else {
-            vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from DMI. 
Aborting... \n");
-           close(vtpm_globals->guest_rx_fh);
-           vtpm_globals->guest_rx_fh = -1;
-            status = TPM_IOERROR;
-           goto abort_with_error;
-         }
-          
-         if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
-           //vtpmdeepsublog("\n");
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command from DMI shorter than 
normal header. Aborting...\n");
-            status = TPM_IOERROR;
-           goto abort_with_error;
-         }
-          
-         BSG_UnpackList(cmd_header, 4,
-                        BSG_TYPE_UINT32, &dmi,
-                        BSG_TPM_TAG, &tag,
-                        BSG_TYPE_UINT32, &in_param_size,
-                        BSG_TPM_COMMAND_CODE, &status );
-        
-         // Note that in_param_size is in the client's context
-         cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
-         if (cmd_size > 0) {
-           in_param = (BYTE *) malloc(cmd_size);
-           size_read = read( vtpm_globals->guest_rx_fh, in_param, cmd_size);
-           if (size_read > 0) {
-             for (i=0; i<size_read; i++) 
-               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
-             
-           } else {
-              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. 
Aborting... \n");
-             close(vtpm_globals->guest_rx_fh);
-             vtpm_globals->guest_rx_fh = -1;
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
-            
-           if (size_read < (int)cmd_size) {
-             vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
-             vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) from DMI is 
shorter than header indicates(%d). Aborting...\n", size_read, cmd_size);
-              status = TPM_IOERROR;
-             goto abort_with_error;
-           }
-         } else {
-           in_param = NULL;
-           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
-         }
-                           
-         if (buffer_init_convert(result_buf, cmd_size, in_param) != 
TPM_SUCCESS) {
-           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
-            status = TPM_FAIL;
-           goto abort_with_error;
-         }
-         
-         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Sending DMI's response to 
guest.\n");
-       } // end else for if (dmi==0)
-        
-      } else { // This is a DMI lister thread. Thus this is from a DMI
-#ifdef VTPM_MULTI_VM
-       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to use unsupported direct 
access to TPM.\n");
-       vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "Bad Command. dmi:%d, tag:%d, 
size:%d, ord:%d, Params: ", dmi, tag, in_param_size, ord);
-       for (UINT32 q=0; q<cmd_size; q++) 
-         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[q]);
-       
-       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
-        
-       status = TPM_FAIL;
-#else
-       
-#endif
-      } // end else for if BE Listener
-    } // end else for is VTPM Command
-    
-    // Send response to Backend
-    if (*tx_fh < 0) {
-      if (threadType == BE_LISTENER_THREAD) 
-#ifdef DUMMY_BACKEND
-       *tx_fh = open("/tmp/out.fifo", O_RDWR);
-#else
-        *tx_fh = open(VTPM_BE_DEV, O_RDWR);
-#endif
-      else  // DMI Listener
-       *tx_fh = open(dmi_res->vtpm_tx_fname, O_WRONLY);
-    }
-    
-    if (*tx_fh < 0) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh.\n");
-#ifdef VTPM_MULTI_VM
-      return TPM_IOERROR; 
-#else
-      *ret_value = TPM_IOERROR;
-      pthread_exit(ret_value);
-#endif
-    }        
-    
- abort_with_error:
-    // Prepend VTPM header with destination DM stamped
-    out_param_size = buffer_len(result_buf);
-    out_message_size = VTPM_COMMAND_HEADER_SIZE_CLT + out_param_size;
-    out_message_size_full = VTPM_COMMAND_HEADER_SIZE_SRV + out_param_size;
-    out_message = (BYTE *) malloc (out_message_size_full);
-    
-    BSG_PackList(out_message, 4,
-                BSG_TYPE_UINT32, (BYTE *) &dmi,
-                BSG_TPM_TAG, (BYTE *) &tag,
-                BSG_TYPE_UINT32, (BYTE *) &out_message_size,
-                BSG_TPM_RESULT, (BYTE *) &status);
-    
-    if (buffer_len(result_buf) > 0) 
-      memcpy(out_message + VTPM_COMMAND_HEADER_SIZE_SRV, result_buf->bytes, 
out_param_size);
-    
-    
-    //Note: Send message + dmi_id
-    size_write = write(*tx_fh, out_message, out_message_size_full );
-    if (size_write > 0) {
-      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT: 0x");
-      for (i=0; i < out_message_size_full; i++) 
-       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", out_message[i]);
-      
-      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");            
-    } else {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to BE. Aborting... 
\n");
-      close(*tx_fh);
-      *tx_fh = -1;
-      goto abort_command;
-    }
-    free(out_message);
-    
-    if (size_write < (int)out_message_size_full) {
-      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Unable to write full command to BE 
(%d/%d)\n", size_write, out_message_size_full);
-      goto abort_command;
-    }
-    
-  abort_command:
-    //free buffers
-    bzero(cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
-    //free(in_param); // This was converted to command_buf. No need to free 
-    if (command_buf != result_buf) 
-      buffer_free(result_buf);
-    
-    buffer_free(command_buf);
-    
-#ifndef VTPM_MULTI_VM
-    if (threadType != BE_LISTENER_THREAD) {
-#endif
-      if ( (vtpm_globals->DMI_table_dirty) &&
-          (VTPM_SaveService() != TPM_SUCCESS) ) {
-       vtpmhandlerlogerror(VTPM_LOG_VTPM, "ERROR: Unable to save manager 
data.\n");
-      }
-#ifndef VTPM_MULTI_VM
-    }
-#endif
-    
-  } // End while(1)
-  
-}
-
-
-///////////////////////////////////////////////////////////////////////////////
-TPM_RESULT VTPM_Init_Service() {
-  TPM_RESULT status = TPM_FAIL;   
-  BYTE *randomsead;
-       UINT32 randomsize;
-       
-  if ((vtpm_globals = (VTPM_GLOBALS *) malloc(sizeof(VTPM_GLOBALS))) == NULL){
-               status = TPM_FAIL;
-               goto abort_egress;
-       }
-       memset(vtpm_globals, 0, sizeof(VTPM_GLOBALS));
-  vtpm_globals->be_fh = -1;
-
-#ifndef VTPM_MULTI_VM
-  vtpm_globals->vtpm_rx_fh = -1;
-  vtpm_globals->guest_rx_fh = -1;
-#endif
-  if ((vtpm_globals->dmi_map = create_hashtable(10, hashfunc32, equals32)) == 
NULL){
-               status = TPM_FAIL;
-               goto abort_egress;
-       }
-  
-  vtpm_globals->DMI_table_dirty = FALSE;
-  
-  // Create new TCS Object
-  vtpm_globals->manager_tcs_handle = 0;
-  
-  TPMTRYRETURN(TCS_create());
-  
-  // Create TCS Context for service
-  TPMTRYRETURN( TCS_OpenContext(&vtpm_globals->manager_tcs_handle ) );
-
-       TPMTRYRETURN( TCSP_GetRandom(vtpm_globals->manager_tcs_handle, 
-                                                                               
                                         &randomsize, 
-                                                                               
                                         &randomsead));
-
-       Crypto_Init(randomsead, randomsize);
-       TPMTRYRETURN( TCS_FreeMemory (vtpm_globals->manager_tcs_handle, 
randomsead)); 
-       
-  // Create OIAP session for service's authorized commands
-  TPMTRYRETURN( VTSP_OIAP( vtpm_globals->manager_tcs_handle, 
-                          &vtpm_globals->keyAuth) );
-  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
-
-       // If failed, create new Service.
-  if (VTPM_LoadService() != TPM_SUCCESS)
-    TPMTRYRETURN( VTPM_Create_Service() );    
-
-  
-  //Load Storage Key 
-  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
-                             TPM_SRK_KEYHANDLE,
-                             &vtpm_globals->storageKeyWrap,
-                             (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
-                             &vtpm_globals->storageKeyHandle,
-                             &vtpm_globals->keyAuth,
-                             &vtpm_globals->storageKey) );
-  
-  // Create entry for Dom0 for control messages
-  TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
-  
-  // --------------------- Command handlers ---------------------------
-  
-  goto egress;
-  
- abort_egress:
- egress:
-  
-  return(status);
-}
- 
-void VTPM_Stop_Service() {
-  VTPM_DMI_RESOURCE *dmi_res;
-  struct hashtable_itr *dmi_itr;
-  
-  // Close all the TCS contexts. TCS should evict keys based on this
-  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
-    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
-    do {
-      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
-      if (dmi_res->connected) 
-                               if (close_dmi( dmi_res ) != TPM_SUCCESS) 
-                                       vtpmlogerror(VTPM_LOG_VTPM, "Failed to 
close dmi %d properly.\n", dmi_res->dmi_id);
-      
-    } while (hashtable_iterator_advance(dmi_itr));
-               free (dmi_itr);
-  }
-  
-       
-  TCS_CloseContext(vtpm_globals->manager_tcs_handle);
-  
-  if ( (vtpm_globals->DMI_table_dirty) &&
-       (VTPM_SaveService() != TPM_SUCCESS) )
-    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
-  
-  hashtable_destroy(vtpm_globals->dmi_map, 1);
-  free(vtpm_globals);
-  
-  close(vtpm_globals->be_fh);
-  Crypto_Exit();
-       
-  vtpmloginfo(VTPM_LOG_VTPM, "VTPM Manager stopped.\n");
-}
+// ===================================================================
+// 
+// Copyright (c) 2005, Intel Corp.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without 
+// modification, are permitted provided that the following conditions 
+// are met:
+//
+//   * Redistributions of source code must retain the above copyright 
+//     notice, this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above 
+//     copyright notice, this list of conditions and the following 
+//     disclaimer in the documentation and/or other materials provided 
+//     with the distribution.
+//   * Neither the name of Intel Corporation nor the names of its 
+//     contributors may be used to endorse or promote products derived
+//     from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+// ===================================================================
+// 
+// vtpm_manager.c
+// 
+//  This file will house the main logic of the VTPM Manager
+//
+// ==================================================================
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+#ifndef VTPM_MULTI_VM
+#include <pthread.h>
+#include <errno.h>
+#include <aio.h>
+#include <time.h>
+#endif
+
+#include "vtpm_manager.h"
+#include "vtpmpriv.h"
+#include "vtsp.h"
+#include "bsg.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
+
+#include "log.h"
+#include "buffer.h"
+
+VTPM_GLOBALS *vtpm_globals=NULL;
+
+#ifdef VTPM_MULTI_VM
+ #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, fmt, 
##args );
+ #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
+ #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, fmt, 
##args );
+#else 
+ #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, "[%d]: " 
fmt, threadType, ##args );
+ #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, 
fmt, ##args );
+ #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, "[%d]: 
" fmt, threadType, ##args );
+#endif
+
+// --------------------------- Well Known Auths --------------------------
+#ifdef WELL_KNOWN_SRK_AUTH
+static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
+                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
+#endif
+
+#ifdef WELL_KNOWN_OWNER_AUTH
+static BYTE FIXED_OWNER_AUTH[20] =  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff,
+                                  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff};
+#endif
+                                  
+// -------------------------- Hash table functions --------------------
+
+static unsigned int hashfunc32(void *ky) {
+  return (* (UINT32 *) ky);
+}
+
+static int equals32(void *k1, void *k2) {
+  return (*(UINT32 *) k1 == *(UINT32 *) k2);
+}
+
+// --------------------------- Functions ------------------------------
+
+TPM_RESULT VTPM_Create_Service(){
+  
+  TPM_RESULT status = TPM_SUCCESS;
+  
+  // Generate Auth's for SRK & Owner
+#ifdef WELL_KNOWN_SRK_AUTH 
+  memcpy(vtpm_globals->srk_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA));
+#else    
+  Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) );  
+#endif
+  
+#ifdef WELL_KNOWN_OWNER_AUTH 
+  memcpy(vtpm_globals->owner_usage_auth, FIXED_OWNER_AUTH, 
sizeof(TPM_AUTHDATA));
+#else    
+  Crypto_GetRandom(vtpm_globals->owner_usage_auth, sizeof(TPM_AUTHDATA) );
+#endif
+
+  // Take Owership of TPM
+  CRYPTO_INFO ek_cryptoInfo;
+  
+  vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n");
+  status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo);
+  
+  // If we can read PubEK then there is no owner and we should take it.
+  if (status == TPM_SUCCESS) { 
+    TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle,
+                                   (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, 
+                                   (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                                   &ek_cryptoInfo,
+                                   &vtpm_globals->keyAuth)); 
+  
+    TPMTRYRETURN(VTSP_DisablePubekRead(vtpm_globals->manager_tcs_handle,
+                                       (const 
TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth,  
+                                       &vtpm_globals->keyAuth));     
+  }
+  
+  // Generate storage key's auth
+  Crypto_GetRandom(  &vtpm_globals->storage_key_usage_auth, 
+                    sizeof(TPM_AUTHDATA) );
+  
+  TCS_AUTH osap;
+  TPM_AUTHDATA sharedsecret;
+  
+  TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle,
+                         TPM_ET_SRK,
+                         0, 
+                         (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                         &sharedsecret, 
+                         &osap) ); 
+  
+  TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle,
+                                   TPM_KEY_BIND,
+                                   (const 
TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
+                                   TPM_SRK_KEYHANDLE, 
+                                   (const TPM_AUTHDATA*)&sharedsecret,
+                                   &vtpm_globals->storageKeyWrap,
+                                   &osap) );
+  
+  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
+  
+  goto egress;
+  
+ abort_egress:
+  exit(1);
+  
+ egress:
+  vtpmloginfo(VTPM_LOG_VTPM, "Finished initialized new VTPM service (Status = 
%d).\n", status);
+  return status;
+  
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+#ifdef VTPM_MULTI_VM
+int VTPM_Service_Handler(){
+#else
+void *VTPM_Service_Handler(void *threadTypePtr){
+#endif
+  TPM_RESULT      status =  TPM_FAIL; // Should never return
+  UINT32          dmi, in_param_size, cmd_size, out_param_size, 
out_message_size, out_message_size_full;
+  BYTE            *cmd_header, *in_param, *out_message;
+  buffer_t        *command_buf=NULL, *result_buf=NULL;
+  TPM_TAG         tag;
+  TPM_COMMAND_CODE ord;
+  VTPM_DMI_RESOURCE *dmi_res;
+  int  size_read, size_write, i;
+  
+#ifndef VTPM_MULTI_VM
+  UINT32 dmi_cmd_size;
+  BYTE *dmi_cmd;
+  int threadType = *(int *) threadTypePtr;
+  
+  // async io structures
+  struct aiocb dmi_aio;
+  struct aiocb *dmi_aio_a[1];
+  dmi_aio_a[0] = &dmi_aio;
+#endif
+  
+#ifdef DUMMY_BACKEND
+  int dummy_rx;  
+#endif
+  
+  cmd_header = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV);
+  command_buf = (buffer_t *) malloc(sizeof(buffer_t));
+  result_buf = (buffer_t *) malloc(sizeof(buffer_t));
+  
+#ifndef VTPM_MULTI_VM
+  TPM_RESULT *ret_value = (TPM_RESULT *) malloc(sizeof(TPM_RESULT));
+#endif
+  
+  int *tx_fh, // Pointer to the filehandle this function will write to
+      *rx_fh; // Pointer to the filehandle this function will read from
+              // For a multi VM VTPM system, this function tx/rx with the BE
+              //   via vtpm_globals->be_fh.
+              // For a single VM system, the BE_LISTENER_THREAD tx/rx with 
theBE
+              //   via vtpm_globals->be_fh, and the DMI_LISTENER_THREAD rx from
+             //   vtpm_globals->vtpm_rx_fh and tx to dmi_res->vtpm_tx_fh
+
+  // Set rx_fh to point to the correct fh based on this mode.
+#ifdef VTPM_MULTI_VM
+  rx_fh = &vtpm_globals->be_fh;
+#else
+  if (threadType == BE_LISTENER_THREAD) {
+ #ifdef DUMMY_BACKEND    
+    dummy_rx = -1;
+    rx_fh = &dummy_rx;
+ #else
+    rx_fh = &vtpm_globals->be_fh;
+ #endif
+  } else { // DMI_LISTENER_THREAD
+    rx_fh = &vtpm_globals->vtpm_rx_fh;
+  }
+#endif
+  
+  // Set tx_fh to point to the correct fh based on this mode (If static)
+  // Create any fifos that these fh will use.  
+#ifndef VTPM_MULTI_VM
+  int fh;
+  if (threadType == BE_LISTENER_THREAD) {
+    tx_fh = &vtpm_globals->be_fh;
+    if ( (fh = open(GUEST_RX_FIFO, O_RDWR)) == -1) {
+      if ( mkfifo(GUEST_RX_FIFO, S_IWUSR | S_IRUSR ) ){
+        vtpmlogerror(VTPM_LOG_VTPM, "Unable to create FIFO: %s.\n", 
GUEST_RX_FIFO);        
+       *ret_value = TPM_FAIL;
+       pthread_exit(ret_value);
+      }
+    } else 
+      close(fh);
+    
+  } else { // else DMI_LISTENER_THREAD
+    // tx_fh will be set once the DMI is identified
+    // But we need to make sure the read pip is created.
+    if ( (fh = open(VTPM_RX_FIFO, O_RDWR)) == -1) {
+      if ( mkfifo(VTPM_RX_FIFO, S_IWUSR | S_IRUSR ) ){
+        vtpmlogerror(VTPM_LOG_VTPM, "Unable to create FIFO: %s.\n", 
VTPM_RX_FIFO);
+       *ret_value = TPM_FAIL;
+       pthread_exit(ret_value);
+      }
+    } else 
+      close(fh);
+    
+  }
+#else
+  tx_fh = &vtpm_globals->be_fh;
+#endif
+  
+  ////////////////////////// Main Loop //////////////////////////////////
+  while(1) {
+    
+#ifdef VTPM_MULTI_VM
+    vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for DMI messages.\n");
+#else
+    if (threadType == BE_LISTENER_THREAD) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for Guest requests & ctrl 
messages.\n");
+    } else    
+      vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for DMI messages.\n");
+#endif
+
+    // Check status of rx_fh. If necessary attempt to re-open it.    
+    if (*rx_fh < 0) {
+#ifdef VTPM_MULTI_VM
+      *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+#else
+      if (threadType == BE_LISTENER_THREAD) 
+  #ifdef DUMMY_BACKEND
+       *rx_fh = open("/tmp/in.fifo", O_RDWR);
+  #else
+        *rx_fh = open(VTPM_BE_DEV, O_RDWR);
+  #endif
+      else  // DMI Listener   
+       *rx_fh = open(VTPM_RX_FIFO, O_RDWR);
+#endif    
+    }
+    
+    // Respond to failures to open rx_fh
+    if (*rx_fh < 0) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n");
+#ifdef VTPM_MULTI_VM
+      return TPM_IOERROR; 
+#else
+      *ret_value = TPM_IOERROR;
+      pthread_exit(ret_value);
+#endif
+    }
+    
+    // Read command header from rx_fh
+    size_read = read(*rx_fh, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
+    if (size_read > 0) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV[%d}: 0x", size_read);
+      for (i=0; i<size_read; i++) 
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
+    } else {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't read from BE. Aborting... \n");
+      close(*rx_fh);
+      *rx_fh = -1;
+      goto abort_command;
+    }
+
+    if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "\n");
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command shorter than normal header 
(%d bytes). Aborting...\n", size_read);
+      goto abort_command;
+    }
+    
+    // Unpack header
+    BSG_UnpackList(cmd_header, 4,
+                  BSG_TYPE_UINT32, &dmi,
+                  BSG_TPM_TAG, &tag,
+                  BSG_TYPE_UINT32, &in_param_size,
+                  BSG_TPM_COMMAND_CODE, &ord );
+    
+    // Using the header info, read from rx_fh the parameters of the command
+    // Note that in_param_size is in the client's context
+    cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
+    if (cmd_size > 0) {
+      in_param = (BYTE *) malloc(cmd_size);
+      size_read = read( *rx_fh, in_param, cmd_size);
+      if (size_read > 0) {
+       for (i=0; i<size_read; i++) 
+         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+       
+      } else {
+        vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from cmd. 
Aborting... \n");
+       close(*rx_fh);
+       *rx_fh = -1;
+       goto abort_command;
+      }
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+      
+      if (size_read < (int) cmd_size) {
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) is shorter than 
header indicates(%d). Aborting...\n", size_read, cmd_size);
+       goto abort_command;
+      }
+    } else {
+      in_param = NULL;
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+    }            
+
+#ifndef VTPM_MULTI_VM
+    // It's illegal to receive a Dom0 command from a DMI.
+    if ((threadType != BE_LISTENER_THREAD) && (dmi == 0)) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to access dom0 commands from 
DMI interface. Aborting...\n");
+      goto abort_command;
+    }
+#endif
+    
+    // Fetch infomation about the DMI issuing the request.
+    dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, 
&dmi);
+    if (dmi_res == NULL) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to non-existent DMI 
in domain: %d. Aborting...\n", dmi);
+      goto abort_command;
+    }
+    if (!dmi_res->connected) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to disconnected DMI 
in domain: %d. Aborting...\n", dmi);
+      goto abort_command;
+    }
+
+#ifndef VTPM_MULTI_VM
+    // Now that we know which DMI this is, we can set the tx_fh handle.
+    if (threadType != BE_LISTENER_THREAD) 
+      tx_fh = &dmi_res->vtpm_tx_fh;
+    // else we set this before the while loop since it doesn't change.
+#endif 
+   
+    // Init the buffers used to handle the command and the response
+    if ( (buffer_init_convert(command_buf, cmd_size, in_param) != TPM_SUCCESS) 
|| 
+        (buffer_init(result_buf, 0, 0) != TPM_SUCCESS) ) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
+      goto abort_command;
+    }
+    
+    // Dispatch it as either control or user request.
+    if (tag == VTPM_TAG_REQ) { 
+      if (dmi_res->dmi_id == VTPM_CTL_DM){ 
+       switch (ord) {
+       case VTPM_ORD_OPEN:
+         status = VTPM_Handle_New_DMI(command_buf);
+         break;
+          
+       case VTPM_ORD_CLOSE:
+         status = VTPM_Handle_Close_DMI(command_buf);
+         break;
+          
+       case VTPM_ORD_DELETE:
+         status = VTPM_Handle_Delete_DMI(command_buf);
+         break;
+       default:
+         status = TPM_BAD_ORDINAL; 
+       } // switch
+      } else {
+       
+       switch (ord) {                
+       case VTPM_ORD_SAVENVM:
+         status= VTPM_Handle_Save_NVM(dmi_res,
+                                      command_buf, 
+                                      result_buf);
+         break;
+       case VTPM_ORD_LOADNVM:
+         status= VTPM_Handle_Load_NVM(dmi_res, 
+                                      command_buf, 
+                                      result_buf);
+         break;
+         
+       case VTPM_ORD_TPMCOMMAND:
+         status= VTPM_Handle_TPM_Command(dmi_res, 
+                                         command_buf, 
+                                         result_buf);
+         break;
+         
+       default:
+         status = TPM_BAD_ORDINAL; 
+       } // switch
+      }
+    } else { // This is not a VTPM Command at all.
+            // This happens in two cases. 
+            // MULTI_VM = A DMI illegally sent a raw TPM command to the manager
+            // Single VM:
+            //   BE_LISTENER_THREAD: Guest issued a TPM command.
+            //                       Send this to DMI and wait for response
+            //   DMI_LISTENER_THREAD: A DMI illegally sent a raw TPM command.
+    
+#ifdef VTPM_MULTI_VM
+      // Raw TPM commands are not supported from the DMI
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to use unsupported direct 
access to TPM.\n");
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "Bad Command. dmi:%d, tag:%d, 
size:%d, ord:%d, Params: ", dmi, tag, in_param_size, ord);
+      for (i=0; i<cmd_size; i++) 
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+      
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+      status = TPM_FAIL;
+    
+#else
+      // If BE_LISTENER_THREAD then this is a TPM command from a guest
+      if (threadType == BE_LISTENER_THREAD) {
+       // Dom0 can't talk to the BE, so this must be a broken FE/BE or badness
+       if (dmi == 0) {
+         vtpmhandlerlogerror(VTPM_LOG_VTPM, "Illegal use of TPM command from 
dom0\n");
+         status = TPM_FAIL;
+       } else {
+         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Forwarding command to DMI.\n");
+         
+         // open the dmi_res->guest_tx_fh to send command to DMI
+         if (dmi_res->guest_tx_fh < 0)
+           dmi_res->guest_tx_fh = open(dmi_res->guest_tx_fname, O_WRONLY | 
O_NONBLOCK);
+
+         // handle failed opens dmi_res->guest_tx_fh        
+         if (dmi_res->guest_tx_fh < 0){
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh to dmi.\n");
+           status = TPM_IOERROR;
+           goto abort_with_error;
+         }        
+          
+         //Forward TPM CMD stamped with dmi_id to DMI for handling
+         if (cmd_size) {
+           dmi_cmd = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size);
+           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size;
+           memcpy(dmi_cmd, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
+           memcpy(dmi_cmd + VTPM_COMMAND_HEADER_SIZE_SRV, in_param, cmd_size);
+           size_write = write(dmi_res->guest_tx_fh, dmi_cmd, dmi_cmd_size);
+           
+           if (size_write > 0) {
+             vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT (DMI): 0x");
+             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size; i++) {
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", dmi_cmd[i]);
+             }
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+           } else {
+              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
+             close(dmi_res->guest_tx_fh);
+             dmi_res->guest_tx_fh = -1;
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+           free(dmi_cmd);
+         } else {
+           dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV;
+           size_write = write(dmi_res->guest_tx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV );
+           if (size_write > 0) {
+             for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV; i++) 
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", 
cmd_header[i]);
+             
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+           } else {
+              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. 
Aborting... \n");
+             close(dmi_res->guest_tx_fh);
+             dmi_res->guest_tx_fh = -1;
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+         }
+         
+         if (size_write != (int) dmi_cmd_size) 
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Could not write entire command 
to DMI (%d/%d)\n", size_write, dmi_cmd_size);
+         buffer_free(command_buf);
+        
+         // Open vtpm_globals->guest_rx_fh to receive DMI response       
+         if (vtpm_globals->guest_rx_fh < 0) 
+           vtpm_globals->guest_rx_fh = open(GUEST_RX_FIFO, O_RDONLY);
+          
+         // Handle open failures
+         if (vtpm_globals->guest_rx_fh < 0){
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh to 
dmi.\n");
+            status = TPM_IOERROR;
+           goto abort_with_error;
+         }                  
+         
+         // Read header for response to TPM command from DMI
+          size_read = read( vtpm_globals->guest_rx_fh, cmd_header, 
VTPM_COMMAND_HEADER_SIZE_SRV);
+         if (size_read > 0) {
+           vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV (DMI): 0x");
+           for (i=0; i<size_read; i++) 
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cmd_header[i]);
+           
+         } else {
+            vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from DMI. 
Aborting... \n");
+           close(vtpm_globals->guest_rx_fh);
+           vtpm_globals->guest_rx_fh = -1;
+            status = TPM_IOERROR;
+           goto abort_with_error;
+         }
+          
+         if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) {
+           //vtpmdeepsublog("\n");
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command from DMI shorter than 
normal header. Aborting...\n");
+            status = TPM_IOERROR;
+           goto abort_with_error;
+         }
+          
+         // Unpack response from DMI for TPM command
+         BSG_UnpackList(cmd_header, 4,
+                        BSG_TYPE_UINT32, &dmi,
+                        BSG_TPM_TAG, &tag,
+                        BSG_TYPE_UINT32, &in_param_size,
+                        BSG_TPM_COMMAND_CODE, &status );
+        
+         // If response has parameters, read them.
+         // Note that in_param_size is in the client's context
+         cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT;
+         if (cmd_size > 0) {
+           in_param = (BYTE *) malloc(cmd_size);
+           size_read = read( vtpm_globals->guest_rx_fh, in_param, cmd_size);
+           if (size_read > 0) {
+             for (i=0; i<size_read; i++) 
+               vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+             
+           } else {
+              vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. 
Aborting... \n");
+             close(vtpm_globals->guest_rx_fh);
+             vtpm_globals->guest_rx_fh = -1;
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
+            
+           if (size_read < (int)cmd_size) {
+             vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
+             vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) from DMI is 
shorter than header indicates(%d). Aborting...\n", size_read, cmd_size);
+              status = TPM_IOERROR;
+             goto abort_with_error;
+           }
+         } else {
+           in_param = NULL;
+           vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n");
+         }
+          
+         if (buffer_init_convert(result_buf, cmd_size, in_param) != 
TPM_SUCCESS) {
+           vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. 
Aborting...\n");
+            status = TPM_FAIL;
+           goto abort_with_error;
+         }
+         
+         vtpmhandlerloginfo(VTPM_LOG_VTPM, "Sending DMI's response to 
guest.\n");
+       } // end else for if (dmi==0)
+        
+      } else { // This is a DMI lister thread. Thus this is from a DMI
+       // Raw TPM commands are not supported from the DMI
+       vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to use unsupported direct 
access to TPM.\n");
+       vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "Bad Command. dmi:%d, tag:%d, 
size:%d, ord:%d, Params: ", dmi, tag, in_param_size, ord);
+       for (i=0; i<cmd_size; i++) 
+         vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]);
+       
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
+        
+       status = TPM_FAIL;
+      } // end else for if BE Listener
+#endif
+      
+    } // end else for is VTPM Command
+
+    // This marks the beginning of preparing response to be sent out.
+    // Errors while handling responses jump here to reply with error messages
+    // NOTE: Currently there are no recoverable errors in multi-VM mode. If one
+    //       is added to the code, this ifdef should be removed.
+    //       Also note this is NOT referring to errors in commands, but rather
+    //       this is about I/O errors and such.
+#ifndef VTPM_MULTI_VM
+ abort_with_error:
+#endif
+    
+    // Open tx_fh in preperation to send reponse back
+    if (*tx_fh < 0) {
+#ifdef VTPM_MULTI_VM
+      *tx_fh = open(VTPM_BE_DEV, O_RDWR);
+#else
+      if (threadType == BE_LISTENER_THREAD) 
+ #ifdef DUMMY_BACKEND
+       *tx_fh = open("/tmp/out.fifo", O_RDWR);
+ #else
+        *tx_fh = open(VTPM_BE_DEV, O_RDWR);
+ #endif
+      else  // DMI Listener
+       *tx_fh = open(dmi_res->vtpm_tx_fname, O_WRONLY);
+#endif
+      }
+
+    
+    // Handle failed open
+    if (*tx_fh < 0) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound 
fh.\n");
+#ifdef VTPM_MULTI_VM
+      return TPM_IOERROR; 
+#else
+      *ret_value = TPM_IOERROR;
+      pthread_exit(ret_value);
+#endif
+    }        
+    
+    // Prepend VTPM header with destination DM stamped
+    out_param_size = buffer_len(result_buf);
+    out_message_size = VTPM_COMMAND_HEADER_SIZE_CLT + out_param_size;
+    out_message_size_full = VTPM_COMMAND_HEADER_SIZE_SRV + out_param_size;
+    out_message = (BYTE *) malloc (out_message_size_full);
+    
+    BSG_PackList(out_message, 4,
+                BSG_TYPE_UINT32, (BYTE *) &dmi,
+                BSG_TPM_TAG, (BYTE *) &tag,
+                BSG_TYPE_UINT32, (BYTE *) &out_message_size,
+                BSG_TPM_RESULT, (BYTE *) &status);
+    
+    if (buffer_len(result_buf) > 0) 
+      memcpy(out_message + VTPM_COMMAND_HEADER_SIZE_SRV, result_buf->bytes, 
out_param_size);
+    
+    
+    //Note: Send message + dmi_id
+    size_write = write(*tx_fh, out_message, out_message_size_full );
+    if (size_write > 0) {
+      vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT: 0x");
+      for (i=0; i < out_message_size_full; i++) 
+       vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", out_message[i]);
+      
+      vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n");            
+    } else {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to BE. Aborting... 
\n");
+      close(*tx_fh);
+      *tx_fh = -1;
+      goto abort_command;
+    }
+    free(out_message);
+    
+    if (size_write < (int)out_message_size_full) {
+      vtpmhandlerlogerror(VTPM_LOG_VTPM, "Unable to write full command to BE 
(%d/%d)\n", size_write, out_message_size_full);
+      goto abort_command;
+    }
+    
+    // On certain failures an error message cannot be sent. 
+    // This marks the beginning of cleanup in preperation for the next command.
+  abort_command:
+    //free buffers
+    bzero(cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV);
+    //free(in_param); // This was converted to command_buf. No need to free 
+    if (command_buf != result_buf) 
+      buffer_free(result_buf);
+    
+    buffer_free(command_buf);
+    
+#ifndef VTPM_MULTI_VM
+    if (threadType != BE_LISTENER_THREAD) {
+#endif
+      if ( (vtpm_globals->DMI_table_dirty) &&
+          (VTPM_SaveService() != TPM_SUCCESS) ) {
+       vtpmhandlerlogerror(VTPM_LOG_VTPM, "ERROR: Unable to save manager 
data.\n");
+      }
+#ifndef VTPM_MULTI_VM
+    }
+#endif
+    
+  } // End while(1)
+  
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+TPM_RESULT VTPM_Init_Service() {
+  TPM_RESULT status = TPM_FAIL;   
+  BYTE *randomsead;
+  UINT32 randomsize;
+
+  if ((vtpm_globals = (VTPM_GLOBALS *) malloc(sizeof(VTPM_GLOBALS))) == NULL){
+    status = TPM_FAIL;
+    goto abort_egress;
+  }
+  memset(vtpm_globals, 0, sizeof(VTPM_GLOBALS));
+  vtpm_globals->be_fh = -1;
+
+#ifndef VTPM_MULTI_VM
+  vtpm_globals->vtpm_rx_fh = -1;
+  vtpm_globals->guest_rx_fh = -1;
+#endif
+  if ((vtpm_globals->dmi_map = create_hashtable(10, hashfunc32, equals32)) == 
NULL){
+    status = TPM_FAIL;
+    goto abort_egress;
+  }
+  
+  vtpm_globals->DMI_table_dirty = FALSE;
+  
+  // Create new TCS Object
+  vtpm_globals->manager_tcs_handle = 0;
+  
+  TPMTRYRETURN(TCS_create());
+  
+  // Create TCS Context for service
+  TPMTRYRETURN( TCS_OpenContext(&vtpm_globals->manager_tcs_handle ) );
+
+  TPMTRYRETURN( TCSP_GetRandom(vtpm_globals->manager_tcs_handle, 
+                              &randomsize, 
+                              &randomsead));
+  
+  Crypto_Init(randomsead, randomsize);
+  TPMTRYRETURN( TCS_FreeMemory (vtpm_globals->manager_tcs_handle, 
randomsead)); 
+       
+  // Create OIAP session for service's authorized commands
+  TPMTRYRETURN( VTSP_OIAP( vtpm_globals->manager_tcs_handle, 
+                          &vtpm_globals->keyAuth) );
+  vtpm_globals->keyAuth.fContinueAuthSession = TRUE;
+
+       // If failed, create new Service.
+  if (VTPM_LoadService() != TPM_SUCCESS)
+    TPMTRYRETURN( VTPM_Create_Service() );    
+
+  //Load Storage Key 
+  TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle,
+                             TPM_SRK_KEYHANDLE,
+                             &vtpm_globals->storageKeyWrap,
+                             (const 
TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth,
+                             &vtpm_globals->storageKeyHandle,
+                             &vtpm_globals->keyAuth,
+                             &vtpm_globals->storageKey) );
+
+  // Create entry for Dom0 for control messages
+  TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) );
+    
+  // --------------------- Command handlers ---------------------------
+  
+  goto egress;
+  
+ abort_egress:
+ egress:
+  
+  return(status);
+}
+ 
+void VTPM_Stop_Service() {
+  VTPM_DMI_RESOURCE *dmi_res;
+  struct hashtable_itr *dmi_itr;
+  
+  // Close all the TCS contexts. TCS should evict keys based on this
+  if (hashtable_count(vtpm_globals->dmi_map) > 0) {
+    dmi_itr = hashtable_iterator(vtpm_globals->dmi_map);
+    do {
+      dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr);
+      if (dmi_res->connected) 
+       close_dmi( dmi_res ); // Not really interested in return code
+      
+    } while (hashtable_iterator_advance(dmi_itr));
+               free (dmi_itr);
+  }
+  
+       
+  TCS_CloseContext(vtpm_globals->manager_tcs_handle);
+  
+  if ( (vtpm_globals->DMI_table_dirty) &&
+       (VTPM_SaveService() != TPM_SUCCESS) )
+    vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n");
+  
+  hashtable_destroy(vtpm_globals->dmi_map, 1);
+  free(vtpm_globals);
+  
+  close(vtpm_globals->be_fh);
+  Crypto_Exit();
+       
+  vtpmloginfo(VTPM_LOG_VTPM, "VTPM Manager stopped.\n");
+}
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/manager/vtpmpriv.h
--- a/tools/vtpm_manager/manager/vtpmpriv.h     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/manager/vtpmpriv.h     Thu Sep 22 17:42:01 2005
@@ -47,8 +47,8 @@
 
 #define STATE_FILE    "/var/vtpm/VTPM"
 #define DMI_NVM_FILE  "/var/vtpm/vtpm_dm_%d.data"
-#define VTPM_BE_DEV   "/dev/vtpm"
-#define VTPM_CTL_DM         0
+#define VTPM_BE_DEV   "/dev/vtpm0"
+#define VTPM_CTL_DM   0
 
 #ifndef VTPM_MUTLI_VM
  #include <sys/types.h>
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/Makefile
--- a/tools/vtpm_manager/tcs/Makefile   Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/Makefile   Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
+       rm -f *~
 
 $(BIN): $(OBJS)
        $(AR) rcs $(BIN) $(OBJS)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/contextmgr.c
--- a/tools/vtpm_manager/tcs/contextmgr.c       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/contextmgr.c       Thu Sep 22 17:42:01 2005
@@ -43,6 +43,7 @@
 #include "tcs.h"
 #include "contextmgr.h"
 #include "log.h"
+#include "hashtable.h"
 
 BYTE* AddMemBlock(CONTEXT_HANDLE* pContextHandle, // in
                  int    BlockSize)  { // in
@@ -131,12 +132,14 @@
   return bFound;
 }
 
-BOOL AddHandleToList(CONTEXT_HANDLE* pContextHandle, // in
+BOOL AddHandleToList(TCS_CONTEXT_HANDLE hContext, // in
                     TPM_RESOURCE_TYPE type, // in
                     TPM_HANDLE    handle)  { // in
   HANDLE_LIST* pNewHandle = NULL;
-  
+
   vtpmloginfo(VTPM_LOG_TCS_DEEP, "Adding Handle to list\n");
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
+
   if (pContextHandle == NULL)
     return 0;
   
@@ -154,11 +157,13 @@
   return 1;
 }
 
-BOOL DeleteHandleFromList(   CONTEXT_HANDLE*     pContextHandle, // in
+BOOL DeleteHandleFromList(   TCS_CONTEXT_HANDLE hContext, // in                
     
                              TPM_HANDLE          handle) { // in
     
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
+
   HANDLE_LIST *pCurrentHandle = pContextHandle->pHandleList, 
-    *pLastHandle = pCurrentHandle;
+              *pLastHandle = pCurrentHandle;
   
   vtpmloginfo(VTPM_LOG_TCS_DEEP, "Deleting Handle from list\n");
   
@@ -202,10 +207,10 @@
     
     switch (pCurrentHandle->type) {
     case TPM_RT_KEY:
-      returncode = returncode && !TCSP_EvictKey((TCS_CONTEXT_HANDLE) 
pContextHandle, pCurrentHandle->handle);
+      returncode = returncode && !TCSP_EvictKey(pContextHandle->handle, 
pCurrentHandle->handle);
       break;
     case TPM_RT_AUTH:
-      returncode = returncode && !TCSP_TerminateHandle((TCS_CONTEXT_HANDLE) 
pContextHandle, pCurrentHandle->handle);
+      returncode = returncode && !TCSP_TerminateHandle(pContextHandle->handle, 
pCurrentHandle->handle);
       break;
     default:
       returncode = FALSE;
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/contextmgr.h
--- a/tools/vtpm_manager/tcs/contextmgr.h       Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/contextmgr.h       Thu Sep 22 17:42:01 2005
@@ -57,6 +57,7 @@
 } HANDLE_LIST;
 
 typedef struct context_handle {
+  TCS_CONTEXT_HANDLE handle;
   int nBlockCount;
   BLOCK* pTopBlock;
   HANDLE_LIST* pHandleList;
@@ -69,11 +70,11 @@
                     BYTE*           pTCPA_BYTEs); // in
 
 
-BOOL AddHandleToList(   CONTEXT_HANDLE*     pContextHandle, // in
+BOOL AddHandleToList(   TCS_CONTEXT_HANDLE hContext, // in     
                         TPM_RESOURCE_TYPE   type, // in
                         TPM_HANDLE          handle); // in
 
-BOOL DeleteHandleFromList(   CONTEXT_HANDLE*     pContextHandle, // in
+BOOL DeleteHandleFromList(   TCS_CONTEXT_HANDLE hContext, // in        
                              TPM_HANDLE          handle); // in
 
 BOOL FreeHandleList(    CONTEXT_HANDLE*     pContextHandle); // in
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/tcs.c
--- a/tools/vtpm_manager/tcs/tcs.c      Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/tcs.c      Thu Sep 22 17:42:01 2005
@@ -47,9 +47,10 @@
 #include "contextmgr.h"
 #include "tpmddl.h"
 #include "log.h"
+#include "hashtable.h"
+#include "hashtable_itr.h"
 
 // Static Global Vars for the TCS
-static BOOL TCS_m_bConnected;
 static int TCS_m_nCount = 0;
 
 #define TCPA_MAX_BUFFER_LENGTH 0x2000
@@ -57,6 +58,21 @@
 static BYTE InBuf [TCPA_MAX_BUFFER_LENGTH];
 static BYTE OutBuf[TCPA_MAX_BUFFER_LENGTH];
 
+struct hashtable *context_ht;
+
+// -------------------------- Hash table functions --------------------
+
+static unsigned int hashfunc32(void *ky) {
+  return (* (UINT32 *) ky);
+}
+
+static int equals32(void *k1, void *k2) {
+  return (*(UINT32 *) k1 == *(UINT32 *) k2);
+}
+
+CONTEXT_HANDLE *LookupContext( TCS_CONTEXT_HANDLE  hContext) {
+  return( (CONTEXT_HANDLE *) hashtable_search(context_ht, &hContext) );
+}
 
 // 
---------------------------------------------------------------------------------
 // Initialization/Uninitialization SubComponent API
@@ -64,34 +80,50 @@
 TPM_RESULT TCS_create() {
   TDDL_RESULT hRes = TDDL_E_FAIL;
   TPM_RESULT result = TPM_FAIL;
-  TCS_m_bConnected = FALSE;
   
   if (TCS_m_nCount == 0) {
     vtpmloginfo(VTPM_LOG_TCS, "Constructing new TCS:\n");
     hRes = TDDL_Open();
-    
-    if (hRes == TDDL_SUCCESS) {
-      TCS_m_bConnected = TRUE;
+
+    context_ht = create_hashtable(10, hashfunc32, equals32);
+         
+    if ((hRes == TDDL_SUCCESS) && (context_ht != NULL)) {
       result = TPM_SUCCESS;
+      TCS_m_nCount++;
+    } else {
+      result = TPM_IOERROR;
+      hashtable_destroy(context_ht, 1);
     }
   } else
-    TCS_m_bConnected = TRUE;
-  
-  TCS_m_nCount++;
-  
+    TCS_m_nCount++;
+    
   return(result);
 }
 
 
 void TCS_destroy()
 {
-  // FIXME: Should iterate through all open contexts and close them.
   TCS_m_nCount--;
   
-  if (TCS_m_bConnected == TRUE && TCS_m_nCount == 0) {
+  if (TCS_m_nCount == 0) {
     vtpmloginfo(VTPM_LOG_TCS, "Destructing TCS:\n");
     TDDL_Close();
-    TCS_m_bConnected = FALSE;
+
+    struct hashtable_itr *context_itr;
+    TCS_CONTEXT_HANDLE  *hContext;
+    
+    // Close all the TCS contexts. TCS should evict keys based on this
+    if (hashtable_count(context_ht) > 0) {
+      context_itr = hashtable_iterator(context_ht);
+      do {
+        hContext = (TCS_CONTEXT_HANDLE *) hashtable_iterator_key(context_itr);
+       if (TCS_CloseContext(*hContext) != TPM_SUCCESS) 
+           vtpmlogerror(VTPM_LOG_TCS, "Failed to close context %d 
properly.\n", *hContext);
+      
+      } while (hashtable_iterator_advance(context_itr));
+      free(context_itr);
+    }
+    hashtable_destroy(context_ht, 1);
   }
   
 }
@@ -101,7 +133,7 @@
                         BYTE**              ppMemPtr) {// out
 
   TPM_RESULT returnCode = TPM_FAIL;
-  CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext;
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
   
   if (pContextHandle != NULL && ppMemPtr != NULL) {
     *ppMemPtr = (BYTE *)AddMemBlock(pContextHandle, MemSize);
@@ -114,7 +146,7 @@
 TPM_RESULT TCS_FreeMemory(  TCS_CONTEXT_HANDLE  hContext, // in
                             BYTE*               pMemory) { // in
   TPM_RESULT returnCode = TPM_FAIL;
-  CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext;
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
   
   if ( (pContextHandle != NULL && pMemory != NULL) &&
        (DeleteMemBlock(pContextHandle, pMemory) == TRUE) )
@@ -126,15 +158,15 @@
 
 TPM_RESULT TCS_OpenContext(TCS_CONTEXT_HANDLE* hContext) { // out
   TPM_RESULT returnCode = TPM_FAIL;
+  TCS_CONTEXT_HANDLE *newContext;
   
   vtpmloginfo(VTPM_LOG_TCS, "Calling TCS_OpenContext:\n");
   
   // hContext must point to a null memory context handle
   if(*hContext == HANDLE_NULL) {
-    CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE 
*)malloc(sizeof(CONTEXT_HANDLE));
+    CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE *) 
malloc(sizeof(CONTEXT_HANDLE));
     if (pContextHandle == NULL) 
       return TPM_SIZE;
-    
     
     // initialize to 0
     pContextHandle->nBlockCount = 0;
@@ -144,19 +176,32 @@
     // Create New Block
     AddMemBlock(pContextHandle, BLOCK_SIZE);
     
-    *hContext = (TCS_CONTEXT_HANDLE)pContextHandle;
-    returnCode = TPM_SUCCESS;
+    newContext = (TCS_CONTEXT_HANDLE *) malloc(sizeof(TCS_CONTEXT_HANDLE));
+    *newContext = (TCS_CONTEXT_HANDLE) (((uintptr_t) pContextHandle >> 2) & 
0xffffffff);
+    
+    if (hashtable_search(context_ht, &newContext) !=NULL)
+       *newContext += 1;
+    
+    pContextHandle->handle = *newContext;
+    if (!hashtable_insert(context_ht, newContext, pContextHandle)) {
+        free(newContext);
+        free(pContextHandle);
+       returnCode = TPM_FAIL;
+    } else {
+       *hContext = *newContext;
+       returnCode = TPM_SUCCESS;
+    }
   }
   
   return(returnCode);
 }
 
 TPM_RESULT TCS_CloseContext(TCS_CONTEXT_HANDLE hContext) {// in
-  //FIXME: TCS SHOULD Track track failed auths and make sure
+  //FIXME: TCS SHOULD Track failed auths and make sure
   //we don't try and re-free them here.
   TPM_RESULT returnCode = TPM_FAIL;
   
-  CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext;
+  CONTEXT_HANDLE* pContextHandle = LookupContext(hContext);
   
   if(pContextHandle != NULL) {
     // Print test info
@@ -171,6 +216,9 @@
       vtpmlogerror(VTPM_LOG_TCS, "Not all handles evicted from TPM.\n");
     
     // Release the TPM's resources
+    if (hashtable_remove(context_ht, &hContext) == NULL) 
+      vtpmlogerror(VTPM_LOG_TCS, "Not all handles evicted from TPM.\n");
+    
     free(pContextHandle);
     returnCode = TPM_SUCCESS;
   }
@@ -255,7 +303,7 @@
                     BSG_TYPE_UINT32, authHandle, 
                     BSG_TPM_NONCE, nonce0);
       
-      if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_AUTH, 
*authHandle)) 
+      if (!AddHandleToList(hContext, TPM_RT_AUTH, *authHandle)) 
         vtpmlogerror(VTPM_LOG_TCS, "New AuthHandle not recorded\n");
       
       vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize);
@@ -321,7 +369,7 @@
                     BSG_TPM_NONCE, nonceEven, 
                     BSG_TPM_NONCE, nonceEvenOSAP);
       
-      if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_AUTH, 
*authHandle)) {
+      if (!AddHandleToList(hContext, TPM_RT_AUTH, *authHandle)) {
            vtpmlogerror(VTPM_LOG_TCS, "New AuthHandle not recorded\n");
       }
       
@@ -498,7 +546,7 @@
                           BSG_TYPE_UINT32, &paramSize, 
                           BSG_TPM_COMMAND_CODE, &returnCode);
     
-    if (!DeleteHandleFromList((CONTEXT_HANDLE *)hContext, handle)) 
+    if (!DeleteHandleFromList(hContext, handle)) 
       vtpmlogerror(VTPM_LOG_TCS, "KeyHandle not removed from list\n");
        
     
@@ -897,7 +945,7 @@
                      phKeyTCSI);
       unpackAuth(pAuth, OutBuf+i);
       
-      if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_KEY, 
*phKeyTCSI)) {
+      if (!AddHandleToList(hContext, TPM_RT_KEY, *phKeyTCSI)) {
         vtpmlogerror(VTPM_LOG_TCS, "New KeyHandle not recorded\n");
       }
       
@@ -942,7 +990,7 @@
                           BSG_TYPE_UINT32, &paramSize, 
                           BSG_TPM_COMMAND_CODE, &returnCode);
     
-    if (!DeleteHandleFromList((CONTEXT_HANDLE *)hContext, hKey)) {
+    if (!DeleteHandleFromList(hContext, hKey)) {
       vtpmlogerror(VTPM_LOG_TCS, "KeyHandle not removed from list\n");
     }   
     
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/tcs.h
--- a/tools/vtpm_manager/tcs/tcs.h      Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/tcs.h      Thu Sep 22 17:42:01 2005
@@ -41,6 +41,7 @@
 #define __TCS_H__
 
 #include "tcg.h"
+#include "contextmgr.h"
 #include "buffer.h"
 
 #define HANDLE_NULL 0
@@ -235,4 +236,7 @@
                                UINT32 *outDataSize,// in/out
                                BYTE *outData);     // out
 
+///////////// Private Functions ////////////////////
+CONTEXT_HANDLE* LookupContext( TCS_CONTEXT_HANDLE hContext);
+
 #endif //TCS_H
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/tcs/transmit.c
--- a/tools/vtpm_manager/tcs/transmit.c Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/tcs/transmit.c Thu Sep 22 17:42:01 2005
@@ -69,7 +69,7 @@
     ERRORDIE (TPM_IOERROR);
   }
   else if ((TDDL_UINT32) size < insize) {
-    vtpmlogerror(VTPM_LOG_TXDATA, "Wrote %d instead of %d bytes!\n", size, 
insize);
+    vtpmlogerror(VTPM_LOG_TXDATA, "Wrote %d instead of %d bytes!\n", (int) 
size, insize);
     // ... ?
   }
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/util/Makefile
--- a/tools/vtpm_manager/util/Makefile  Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/util/Makefile  Thu Sep 22 17:42:01 2005
@@ -13,6 +13,7 @@
        rm -f *.a *.so *.o *.rpm $(DEP_FILES)
 
 mrproper: clean
+       rm -f *~
 
 $(BIN): $(OBJS)
        $(AR) rcs $(BIN) $(OBJS)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm_manager/util/tcg.h
--- a/tools/vtpm_manager/util/tcg.h     Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm_manager/util/tcg.h     Thu Sep 22 17:42:01 2005
@@ -453,14 +453,14 @@
 // DEPENDS: local var 'status' of type TPM_RESULT
 // DEPENDS: label 'abort_egress' which cleans up and returns the status
 #define ERRORDIE(s) do { status = s; \
-                         fprintf (stderr, "*** ERRORDIE in %s, line %i\n", 
__func__, __LINE__); \
+                         fprintf (stderr, "*** ERRORDIE in %s at %s: %i\n", 
__func__, __FILE__, __LINE__); \
                          goto abort_egress; } \
                     while (0)
 
 // ASSUME: the return value used after the abort_egress label has been set
 // already (eg. the 'status' local var)
 #define STATUSCHECK(s) if (s != TPM_SUCCESS) { \
-                            fprintf (stderr, "*** ERR in %s, line %i\n", 
__func__, __LINE__); \
+                            fprintf (stderr, "*** ERR in %s at %s:%i\n", 
__func__, __FILE__, __LINE__); \
                             goto abort_egress; \
                         }
 
@@ -475,7 +475,7 @@
 // Try command c. If it fails, print error message, set status to actual 
return code. Goto shame
 #define TPMTRYRETURN(c) do { status = c; \
                              if (status != TPM_SUCCESS) { \
-                               printf("ERROR in %s:%i code: %s.\n", __func__, 
__LINE__, tpm_get_error_name(status)); \
+                               printf("ERROR in %s at %s:%i code: %s.\n", 
__func__, __FILE__, __LINE__, tpm_get_error_name(status)); \
                                goto abort_egress; \
                              } \
                         } while(0)    
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/01simple.test
--- a/tools/xenstore/testsuite/01simple.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/01simple.test    Thu Sep 22 17:42:01 2005
@@ -1,4 +1,4 @@
 # Create an entry, read it.
-write /test create contents
+write /test contents
 expect contents
 read /test
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/02directory.test
--- a/tools/xenstore/testsuite/02directory.test Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/02directory.test Thu Sep 22 17:42:01 2005
@@ -3,7 +3,7 @@
 dir /
 
 # Create a file.
-write /test create contents
+write /test contents
 
 # Directory shows it.
 expect test
@@ -21,16 +21,14 @@
 dir /dir
 
 # Create a file, check it exists.
-write /dir/test2 create contents2
+write /dir/test2 contents2
 expect test2
 dir /dir
 expect contents2
 read /dir/test2
 
-# Creating dir over the top should fail.
-expect mkdir failed: File exists
+# Creating dir over the top should succeed.
 mkdir /dir
-expect mkdir failed: File exists
 mkdir /dir/test2
 
 # Mkdir implicitly creates directories.
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/03write.test
--- a/tools/xenstore/testsuite/03write.test     Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/03write.test     Thu Sep 22 17:42:01 2005
@@ -1,31 +1,20 @@
-# Write without create fails.
-expect write failed: No such file or directory
-write /test none contents
-
-# Exclusive write succeeds
-write /test excl contents
+# Write succeeds
+write /test contents
 expect contents
 read /test
 
-# Exclusive write fails to overwrite.
-expect write failed: File exists
-write /test excl contents
-
-# Non-exclusive overwrite succeeds.
-write /test none contents2
+# Overwrite succeeds.
+write /test contents2
 expect contents2
-read /test
-write /test create contents3
-expect contents3
 read /test
 
 # Write should implicitly create directories
-write /dir/test create contents
+write /dir/test contents
 expect test
 dir /dir
 expect contents
 read /dir/test
-write /dir/1/2/3/4 excl contents4
+write /dir/1/2/3/4 contents4
 expect test
 expect 1
 dir /dir
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/04rm.test
--- a/tools/xenstore/testsuite/04rm.test        Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/04rm.test        Thu Sep 22 17:42:01 2005
@@ -1,11 +1,10 @@
-# Remove non-existant fails.
-expect rm failed: No such file or directory
+# Remove non-existant is OK, as long as parent exists
 rm /test
 expect rm failed: No such file or directory
 rm /dir/test
 
 # Create file and remove it
-write /test excl contents
+write /test contents
 rm /test
 
 # Create directory and remove it.
@@ -14,5 +13,5 @@
 
 # Create directory, create file, remove all.
 mkdir /dir
-write /dir/test excl contents
+write /dir/test contents
 rm /dir
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/05filepermissions.test
--- a/tools/xenstore/testsuite/05filepermissions.test   Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/05filepermissions.test   Thu Sep 22 17:42:01 2005
@@ -5,7 +5,7 @@
 getperm /dir/test
 
 # Create file: inherits from root (0 READ)
-write /test excl contents
+write /test contents
 expect 0 READ
 getperm /test
 setid 1
@@ -14,7 +14,7 @@
 expect contents
 read /test
 expect write failed: Permission denied
-write /test none contents
+write /test contents
 
 # Take away read access to file.
 setid 0
@@ -25,7 +25,7 @@
 expect read failed: Permission denied
 read /test
 expect write failed: Permission denied
-write /test none contents
+write /test contents
 
 # Grant everyone write access to file.
 setid 0
@@ -35,7 +35,7 @@
 getperm /test
 expect read failed: Permission denied
 read /test
-write /test none contents2
+write /test contents2
 setid 0
 expect contents2
 read /test
@@ -47,7 +47,7 @@
 getperm /test
 expect contents2
 read /test
-write /test none contents3
+write /test contents3
 expect contents3
 read /test
 
@@ -59,7 +59,7 @@
 getperm /test
 expect contents3
 read /test
-write /test none contents4
+write /test contents4
 
 # User 2 can do nothing.
 setid 2
@@ -70,7 +70,7 @@
 expect read failed: Permission denied
 read /test
 expect write failed: Permission denied
-write /test none contents4
+write /test contents4
 
 # Tools can always access things.
 setid 0
@@ -78,4 +78,4 @@
 getperm /test
 expect contents4
 read /test
-write /test none contents5
+write /test contents5
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/06dirpermissions.test
--- a/tools/xenstore/testsuite/06dirpermissions.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/06dirpermissions.test    Thu Sep 22 17:42:01 2005
@@ -11,7 +11,7 @@
 getperm /dir
 dir /dir
 expect write failed: Permission denied
-write /dir/test create contents2
+write /dir/test contents2
 
 # Remove everyone's read access to directoy.
 setid 0
@@ -22,7 +22,7 @@
 expect read failed: Permission denied
 read /dir/test create contents2
 expect write failed: Permission denied
-write /dir/test create contents2
+write /dir/test contents2
 
 # Grant everyone write access to directory.
 setid 0
@@ -32,7 +32,7 @@
 getperm /dir
 expect dir failed: Permission denied
 dir /dir
-write /dir/test create contents
+write /dir/test contents
 setid 0
 expect 1 WRITE
 getperm /dir/test
@@ -47,7 +47,7 @@
 getperm /dir
 expect test
 dir /dir
-write /dir/test2 create contents
+write /dir/test2 contents
 expect contents
 read /dir/test2
 setperm /dir/test2 1 NONE
@@ -60,7 +60,7 @@
 expect test
 expect test2
 dir /dir
-write /dir/test3 create contents
+write /dir/test3 contents
 
 # User 2 can do nothing.  Can't even tell if file exists.
 setid 2
@@ -79,17 +79,9 @@
 expect read failed: Permission denied
 read /dir/test4
 expect write failed: Permission denied
-write /dir/test none contents
+write /dir/test contents
 expect write failed: Permission denied
-write /dir/test create contents
-expect write failed: Permission denied
-write /dir/test excl contents
-expect write failed: Permission denied
-write /dir/test4 none contents
-expect write failed: Permission denied
-write /dir/test4 create contents
-expect write failed: Permission denied
-write /dir/test4 excl contents
+write /dir/test4 contents
 
 # Tools can always access things.
 setid 0
@@ -99,13 +91,13 @@
 expect test2
 expect test3
 dir /dir
-write /dir/test4 create contents
+write /dir/test4 contents
 
 # Inherited by child.
 mkdir /dir/subdir
 expect 1 NONE
 getperm /dir/subdir
-write /dir/subfile excl contents
+write /dir/subfile contents
 expect 1 NONE
 getperm /dir/subfile
 
@@ -114,12 +106,12 @@
 expect 2 READ/WRITE
 getperm /dir/subdir
 setid 3
-write /dir/subdir/subfile excl contents
+write /dir/subdir/subfile contents
 expect 3 READ/WRITE
 getperm /dir/subdir/subfile
 
 # Inheritence works through multiple directories, too.
-write /dir/subdir/1/2/3/4 excl contents
+write /dir/subdir/1/2/3/4 contents
 expect 3 READ/WRITE
 getperm /dir/subdir/1/2/3/4
 mkdir /dir/subdir/a/b/c/d
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/07watch.test
--- a/tools/xenstore/testsuite/07watch.test     Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/07watch.test     Thu Sep 22 17:42:01 2005
@@ -1,8 +1,8 @@
 # Watch something, write to it, check watch has fired.
-write /test create contents
+write /test contents
 
 1 watch /test token
-2 write /test create contents2
+2 write /test contents2
 expect 1:/test:token
 1 waitwatch
 1 ackwatch token
@@ -44,7 +44,7 @@
 
 # ignore watches while doing commands, should work.
 watch /dir token
-1 write /dir/test create contents
+1 write /dir/test contents
 expect contents
 read /dir/test
 expect /dir/test:token
@@ -56,7 +56,7 @@
 1 watch /dir token1
 3 watch /dir token3
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 expect 3:/dir/test:token3
 3 waitwatch
 3 ackwatch token3
@@ -73,7 +73,7 @@
 # If one dies (without acking), the other should still get ack.
 1 watch /dir token1
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 expect 2:/dir/test:token2
 2 waitwatch
 2 close
@@ -85,7 +85,7 @@
 # If one dies (without reading at all), the other should still get ack.
 1 watch /dir token1
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 2 close
 expect 1:/dir/test:token1
 1 waitwatch
@@ -97,7 +97,7 @@
 1 watch /dir token1
 1 unwatch /dir token1
 1 watch /dir token2
-2 write /dir/test2 create contents
+2 write /dir/test2 contents
 expect 1:/dir/test2:token2
 1 waitwatch
 1 unwatch /dir token2
@@ -107,7 +107,7 @@
 # unwatch while watch pending.  Other watcher still gets the event.
 1 watch /dir token1
 2 watch /dir token2
-write /dir/test create contents
+write /dir/test contents
 2 unwatch /dir token2
 expect 1:/dir/test:token1
 1 waitwatch
@@ -117,17 +117,17 @@
 
 # unwatch while watch pending.  Should clear this so we get next event.
 1 watch /dir token1
-write /dir/test create contents
+write /dir/test contents
 1 unwatch /dir token1
 1 watch /dir/test token2
-write /dir/test none contents2
+write /dir/test contents2
 expect 1:/dir/test:token2
 1 waitwatch
 1 ackwatch token2
 
 # check we only get notified once.
 1 watch /test token
-2 write /test create contents2
+2 write /test contents2
 expect 1:/test:token
 1 waitwatch
 1 ackwatch token
@@ -137,9 +137,9 @@
 
 # watches are queued in order.
 1 watch / token
-2 write /test1 create contents
-2 write /test2 create contents
-2 write /test3 create contents
+2 write /test1 contents
+2 write /test2 contents
+2 write /test3 contents
 expect 1:/test1:token
 1 waitwatch
 1 ackwatch token
@@ -153,8 +153,8 @@
 
 # Creation of subpaths should be covered correctly.
 1 watch / token
-2 write /test/subnode create contents2
-2 write /test/subnode/subnode create contents2
+2 write /test/subnode contents2
+2 write /test/subnode/subnode contents2
 expect 1:/test/subnode:token
 1 waitwatch
 1 ackwatch token
@@ -167,11 +167,13 @@
 
 # Watch event must have happened before we registered interest.
 1 watch / token
-2 write /test/subnode create contents2
-1 watch / token2 0
+2 write /test/subnode contents2
+1 watchnoack / token2 0
 expect 1:/test/subnode:token
 1 waitwatch
 1 ackwatch token
+expect 1:/:token2
+1 waitwatch
 expect 1: waitwatch failed: Connection timed out
 1 waitwatch
 1 close
@@ -185,7 +187,7 @@
 
 # Watch should not double-send after we ack, even if we did something in 
between.
 1 watch /test2 token
-2 write /test2/foo create contents2
+2 write /test2/foo contents2
 expect 1:/test2/foo:token
 1 waitwatch
 expect 1:contents2
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/08transaction.slowtest
--- a/tools/xenstore/testsuite/08transaction.slowtest   Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/08transaction.slowtest   Thu Sep 22 17:42:01 2005
@@ -1,7 +1,7 @@
 # Test transaction timeouts.  Take a second each.
 
 mkdir /test
-write /test/entry1 create contents
+write /test/entry1 contents
 
 # Transactions can take as long as the want...
 start /test
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/08transaction.test
--- a/tools/xenstore/testsuite/08transaction.test       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/08transaction.test       Thu Sep 22 17:42:01 2005
@@ -4,7 +4,7 @@
 
 # Simple transaction: create a file inside transaction.
 1 start /test
-1 write /test/entry1 create contents
+1 write /test/entry1 contents
 2 dir /test
 expect 1:entry1
 1 dir /test
@@ -16,14 +16,14 @@
 
 # Create a file and abort transaction.
 1 start /test
-1 write /test/entry1 create contents
+1 write /test/entry1 contents
 2 dir /test
 expect 1:entry1
 1 dir /test
 1 abort
 2 dir /test
 
-write /test/entry1 create contents
+write /test/entry1 contents
 # Delete in transaction, commit
 1 start /test
 1 rm /test/entry1
@@ -34,7 +34,7 @@
 2 dir /test
 
 # Delete in transaction, abort.
-write /test/entry1 create contents
+write /test/entry1 contents
 1 start /test
 1 rm /test/entry1
 expect 2:entry1
@@ -84,8 +84,8 @@
 # Multiple events from single transaction don't trigger assert
 1 watch /test token
 2 start /test
-2 write /test/1 create contents
-2 write /test/2 create contents
+2 write /test/1 contents
+2 write /test/2 contents
 2 commit
 expect 1:/test/1:token
 1 waitwatch
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/09domain.test
--- a/tools/xenstore/testsuite/09domain.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/09domain.test    Thu Sep 22 17:42:01 2005
@@ -3,7 +3,7 @@
 # Create a domain, write an entry.
 expect handle is 1
 introduce 1 100 7 /my/home
-1 write /entry1 create contents
+1 write /entry1 contents
 expect entry1
 expect tool
 dir /
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/10domain-homedir.test
--- a/tools/xenstore/testsuite/10domain-homedir.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/10domain-homedir.test    Thu Sep 22 17:42:01 2005
@@ -4,7 +4,7 @@
 mkdir /home
 expect handle is 1
 introduce 1 100 7 /home
-1 write entry1 create contents
+1 write entry1 contents
 expect contents
 read /home/entry1
 expect entry1
@@ -13,7 +13,7 @@
 # Place a watch using a relative path: expect relative answer.
 1 mkdir foo
 1 watch foo token
-write /home/foo/bar create contents
+write /home/foo/bar contents
 expect 1:foo/bar:token
 1 waitwatch
 1 ackwatch token
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/11domain-watch.test
--- a/tools/xenstore/testsuite/11domain-watch.test      Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/11domain-watch.test      Thu Sep 22 17:42:01 2005
@@ -1,13 +1,13 @@
 # Test watching from a domain.
 
 # Watch something, write to it, check watch has fired.
-write /test create contents
+write /test contents
 mkdir /dir
 
 expect handle is 1
 introduce 1 100 7 /my/home
 1 watch /test token
-write /test create contents2
+write /test contents2
 expect 1:/test:token
 1 waitwatch
 1 ackwatch token
@@ -19,10 +19,10 @@
 expect handle is 1
 introduce 1 100 7 /my/home
 1 watch /dir token
-write /dir/test create contents
-1 write /dir/test2 create contents2
-1 write /dir/test3 create contents3
-1 write /dir/test4 create contents4
+write /dir/test contents
+1 write /dir/test2 contents2
+1 write /dir/test3 contents3
+1 write /dir/test4 contents4
 expect 1:/dir/test:token
 1 waitwatch
 1 ackwatch token
@@ -35,7 +35,7 @@
 1 watch /dir token1
 1 unwatch /dir token1
 1 watch /dir token2
-write /dir/test2 create contents
+write /dir/test2 contents
 expect 1:/dir/test2:token2
 1 waitwatch
 1 unwatch /dir token2
@@ -46,7 +46,7 @@
 expect handle is 1
 introduce 1 100 7 /my/home
 1 watch /dir token1
-write /dir/test2 create contents
+write /dir/test2 contents
 1 unwatch /dir token1
 release 1
 1 close
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/12readonly.test
--- a/tools/xenstore/testsuite/12readonly.test  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/12readonly.test  Thu Sep 22 17:42:01 2005
@@ -1,6 +1,6 @@
 # Test that read only connection can't alter store.
 
-write /test create contents
+write /test contents
 
 readonly
 expect test
@@ -20,9 +20,9 @@
 
 # These don't work
 expect write failed: Read-only file system
-write /test2 create contents
+write /test2 contents
 expect write failed: Read-only file system
-write /test create contents
+write /test contents
 expect setperm failed: Read-only file system
 setperm /test 100 NONE
 expect setperm failed: Read-only file system
@@ -35,7 +35,7 @@
 # Check that watches work like normal.
 watch / token
 1 readwrite
-1 write /test create contents
+1 write /test contents
 expect /test:token
 waitwatch
 ackwatch token
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/13watch-ack.test
--- a/tools/xenstore/testsuite/13watch-ack.test Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/13watch-ack.test Thu Sep 22 17:42:01 2005
@@ -13,10 +13,10 @@
 1 watch /test/1 token1
 1 watch /test/2 token2
 1 watch /test/3 token3
-2 write /test/2 create contents2
+2 write /test/2 contents2
 expect 1:/test/2:token2
 1 waitwatch
-3 write /test/1 create contents1
-4 write /test/3 create contents3
+3 write /test/1 contents1
+4 write /test/3 contents3
 1 ackwatch token2
 1 close
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/14complexperms.test
--- a/tools/xenstore/testsuite/14complexperms.test      Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/14complexperms.test      Thu Sep 22 17:42:01 2005
@@ -12,13 +12,7 @@
 expect *Permission denied
 read /dir/file 
 expect *Permission denied
-write /dir/file none value 
-expect *Permission denied
-write /dir/file create value 
-expect *Permission denied
-write /dir/file excl value 
-expect write failed: Invalid argument
-write /dir/file crap value 
+write /dir/file value 
 expect *Permission denied
 mkdir /dir/file 
 expect *Permission denied
@@ -29,8 +23,9 @@
 getperm /dir/file 
 expect *Permission denied
 setperm /dir/file 0 NONE 
-watch /dir/file token 
-1 write /dir/file create contents
+# We get no watch event when there's no permission.  It's a corner case.
+watchnoack /dir/file token 
+1 write /dir/file contents
 1 rm /dir/file
 expect waitwatch failed: Connection timed out
 waitwatch
@@ -50,7 +45,7 @@
 
 # Now it exists
 setid 0
-write /dir/file create contents
+write /dir/file contents
 
 setid 1
 expect *Permission denied
@@ -58,13 +53,7 @@
 expect *Permission denied
 read /dir/file 
 expect *Permission denied
-write /dir/file none value 
-expect *Permission denied
-write /dir/file create value 
-expect *Permission denied
-write /dir/file excl value 
-expect write failed: Invalid argument
-write /dir/file crap value 
+write /dir/file value 
 expect *Permission denied
 mkdir /dir/file 
 expect *Permission denied
@@ -75,8 +64,8 @@
 getperm /dir/file 
 expect *Permission denied
 setperm /dir/file 0 NONE 
-watch /dir/file token 
-1 write /dir/file create contents
+watchnoack /dir/file token 
+1 write /dir/file contents
 1 rm /dir/file
 expect waitwatch failed: Connection timed out
 waitwatch
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/testsuite/15nowait.test
--- a/tools/xenstore/testsuite/15nowait.test    Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/15nowait.test    Thu Sep 22 17:42:01 2005
@@ -1,10 +1,10 @@
 # If we don't wait for an ack, we can crash daemon as it never expects to be
 # sending out two replies on top of each other.
-noackwrite /1 create 1
-noackwrite /2 create 2
-noackwrite /3 create 3
-noackwrite /4 create 4
-noackwrite /5 create 5
+noackwrite /1 1
+noackwrite /2 2
+noackwrite /3 3
+noackwrite /4 4
+noackwrite /5 5
 readack
 readack
 readack
@@ -13,11 +13,11 @@
 
 expect handle is 1
 introduce 1 100 7 /my/home
-1 noackwrite /1 create 1
-1 noackwrite /2 create 2
-1 noackwrite /3 create 3
-1 noackwrite /4 create 4
-1 noackwrite /5 create 5
+1 noackwrite /1 1
+1 noackwrite /2 2
+1 noackwrite /3 3
+1 noackwrite /4 4
+1 noackwrite /5 5
 1 readack
 1 readack
 1 readack
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/xenstore/testsuite/16block-watch-crash.test
--- a/tools/xenstore/testsuite/16block-watch-crash.test Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/testsuite/16block-watch-crash.test Thu Sep 22 17:42:01 2005
@@ -4,8 +4,8 @@
 watch /test token
 1 start /test
 # This will block on above
-noackwrite /test/entry create contents
-1 write /test/entry2 create contents
+noackwrite /test/entry contents
+1 write /test/entry2 contents
 1 commit
 readack
 expect /test/entry2:token
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xenstore_client.c
--- a/tools/xenstore/xenstore_client.c  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xenstore_client.c  Thu Sep 22 17:42:01 2005
@@ -102,7 +102,7 @@
        optind++;
 #elif defined(CLIENT_write)
        success = xs_write(xsh, argv[optind], argv[optind + 1],
-                          strlen(argv[optind + 1]), O_CREAT);
+                          strlen(argv[optind + 1]));
        if (!success) {
            warnx("could not write path %s", argv[optind]);
            ret = 1;
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c   Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xenstored_core.c   Thu Sep 22 17:42:01 2005
@@ -961,14 +961,19 @@
        return dir;
 }
 
-/* path, flags, data... */
+static bool node_exists(struct connection *conn, const char *node)
+{
+       struct stat st;
+
+       return lstat(node_dir(conn->transaction, node), &st) == 0;
+}
+
+/* path, data... */
 static void do_write(struct connection *conn, struct buffered_data *in)
 {
        unsigned int offset, datalen;
-       char *vec[2];
+       char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
        char *node, *tmppath;
-       enum xs_perm_type mode;
-       struct stat st;
 
        /* Extra "strings" can be created by binary data. */
        if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
@@ -985,37 +990,20 @@
        if (transaction_block(conn, node))
                return;
 
-       offset = strlen(vec[0]) + strlen(vec[1]) + 2;
+       offset = strlen(vec[0]) + 1;
        datalen = in->used - offset;
 
-       if (streq(vec[1], XS_WRITE_NONE))
-               mode = XS_PERM_WRITE;
-       else if (streq(vec[1], XS_WRITE_CREATE))
-               mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
-       else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
-               mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
-       else {
-               send_error(conn, EINVAL);
-               return;
-       }
-
-       if (!check_node_perms(conn, node, mode)) {
+       if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
                send_error(conn, errno);
                return;
        }
 
-       if (lstat(node_dir(conn->transaction, node), &st) != 0) {
+       if (!node_exists(conn, node)) {
                char *dir;
 
                /* Does not exist... */
                if (errno != ENOENT) {
                        send_error(conn, errno);
-                       return;
-               }
-
-               /* Not going to create it? */
-               if (streq(vec[1], XS_WRITE_NONE)) {
-                       send_error(conn, ENOENT);
                        return;
                }
 
@@ -1027,11 +1015,6 @@
                
        } else {
                /* Exists... */
-               if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
-                       send_error(conn, EEXIST);
-                       return;
-               }
-
                tmppath = tempfile(node_datafile(conn->transaction, node),
                                   in->buffer + offset, datalen);
                if (!tmppath) {
@@ -1050,7 +1033,6 @@
 static void do_mkdir(struct connection *conn, const char *node)
 {
        char *dir;
-       struct stat st;
 
        node = canonicalize(conn, node);
        if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
@@ -1066,9 +1048,9 @@
        if (transaction_block(conn, node))
                return;
 
-       /* Must not already exist. */
-       if (lstat(node_dir(conn->transaction, node), &st) == 0) {
-               send_error(conn, EEXIST);
+       /* If it already exists, fine. */
+       if (node_exists(conn, node)) {
+               send_ack(conn, XS_MKDIR);
                return;
        }
 
@@ -1089,6 +1071,15 @@
 
        node = canonicalize(conn, node);
        if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
+               /* Didn't exist already?  Fine, if parent exists. */
+               if (errno == ENOENT) {
+                       if (node_exists(conn, get_parent(node))) {
+                               send_ack(conn, XS_RM);
+                               return;
+                       }
+                       /* Restore errno, just in case. */
+                       errno = ENOENT;
+               }
                send_error(conn, errno);
                return;
        }
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xenstored_watch.c  Thu Sep 22 17:42:01 2005
@@ -236,6 +236,9 @@
        trace_create(watch, "watch");
        talloc_set_destructor(watch, destroy_watch);
        send_ack(conn, XS_WATCH);
+
+       /* We fire once up front: simplifies clients and restart. */
+       add_event(conn, watch, watch->node);
 }
 
 void do_watch_ack(struct connection *conn, const char *token)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs.c
--- a/tools/xenstore/xs.c       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs.c       Thu Sep 22 17:42:01 2005
@@ -326,38 +326,23 @@
 }
 
 /* Write the value of a single file.
- * Returns false on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns false on failure.
  */
 bool xs_write(struct xs_handle *h, const char *path,
-             const void *data, unsigned int len, int createflags)
-{
-       const char *flags;
-       struct iovec iovec[3];
-
-       /* Format: Flags (as string), path, data. */
-       if (createflags == 0)
-               flags = XS_WRITE_NONE;
-       else if (createflags == O_CREAT)
-               flags = XS_WRITE_CREATE;
-       else if (createflags == (O_CREAT|O_EXCL))
-               flags = XS_WRITE_CREATE_EXCL;
-       else {
-               errno = EINVAL;
-               return false;
-       }
+             const void *data, unsigned int len)
+{
+       struct iovec iovec[2];
 
        iovec[0].iov_base = (void *)path;
        iovec[0].iov_len = strlen(path) + 1;
-       iovec[1].iov_base = (void *)flags;
-       iovec[1].iov_len = strlen(flags) + 1;
-       iovec[2].iov_base = (void *)data;
-       iovec[2].iov_len = len;
+       iovec[1].iov_base = (void *)data;
+       iovec[1].iov_len = len;
 
        return xs_bool(xs_talkv(h, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
 }
 
 /* Create a new directory.
- * Returns false on failure.
+ * Returns false on failure, or success if it already exists.
  */
 bool xs_mkdir(struct xs_handle *h, const char *path)
 {
@@ -365,7 +350,7 @@
 }
 
 /* Destroy a file or directory (directories must be empty).
- * Returns false on failure.
+ * Returns false on failure, or success if it doesn't exist.
  */
 bool xs_rm(struct xs_handle *h, const char *path)
 {
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs.h
--- a/tools/xenstore/xs.h       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs.h       Thu Sep 22 17:42:01 2005
@@ -53,18 +53,18 @@
 void *xs_read(struct xs_handle *h, const char *path, unsigned int *len);
 
 /* Write the value of a single file.
- * Returns false on failure.  createflags can be 0, O_CREAT, or O_CREAT|O_EXCL.
+ * Returns false on failure.
  */
 bool xs_write(struct xs_handle *h, const char *path, const void *data,
-             unsigned int len, int createflags);
+             unsigned int len);
 
 /* Create a new directory.
- * Returns false on failure.
+ * Returns false on failure, or success if it already exists.
  */
 bool xs_mkdir(struct xs_handle *h, const char *path);
 
 /* Destroy a file or directory (and children).
- * Returns false on failure.
+ * Returns false on failure, or success if it doesn't exist.
  */
 bool xs_rm(struct xs_handle *h, const char *path);
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_crashme.c
--- a/tools/xenstore/xs_crashme.c       Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_crashme.c       Thu Sep 22 17:42:01 2005
@@ -267,17 +267,12 @@
                free(xs_read(h, name, &num));
                break;
        case 2: {
-               int flags = random_flags(&state);
                char *contents = talloc_asprintf(NULL, "%i",
                                                 get_randomness(&state));
                unsigned int len = get_randomness(&state)%(strlen(contents)+1);
                if (verbose)
-                       printf("WRITE %s %s %.*s\n", name,
-                              flags == O_CREAT ? "O_CREAT" 
-                              : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
-                              : flags == 0 ? "0" : "CRAPFLAGS",
-                              len, contents);
-               xs_write(h, name, contents, len, flags);
+                       printf("WRITE %s %.*s\n", name, len, contents);
+               xs_write(h, name, contents, len);
                break;
        }
        case 3:
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_random.c
--- a/tools/xenstore/xs_random.c        Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_random.c        Thu Sep 22 17:42:01 2005
@@ -26,7 +26,7 @@
        void *(*read)(void *h, const char *path, unsigned int *len);
 
        bool (*write)(void *h, const char *path, const void *data,
-                     unsigned int len, int createflags);
+                     unsigned int len);
 
        bool (*mkdir)(void *h, const char *path);
 
@@ -74,9 +74,9 @@
 static void maybe_convert_to_directory(const char *filename)
 {
        struct stat st;
-       char *dirname = talloc_asprintf(filename, "%.*s", 
-                                       strrchr(filename, '/') - filename,
-                                       filename);
+       char *dirname = talloc_asprintf(
+               filename, "%.*s",
+               (int)(strrchr(filename, '/') - filename), filename);
        if (lstat(dirname, &st) == 0 && S_ISREG(st.st_mode))
                convert_to_dir(dirname);
 }
@@ -249,7 +249,7 @@
 
        /* Copy permissions from parent */
        command = talloc_asprintf(filename, "cp %.*s/.perms %s",
-                                 strrchr(filename, '/') - filename,
+                                 (int)(strrchr(filename, '/') - filename),
                                  filename, permfile);
        do_command(command);
 }      
@@ -308,7 +308,7 @@
        char *slash = strrchr(name + 1, '/');
        if (!slash)
                return talloc_strdup(name, "/");
-       return talloc_asprintf(name, "%.*s", slash-name, name);
+       return talloc_asprintf(name, "%.*s", (int)(slash-name), name);
 }
 
 static void make_dirs(const char *filename)
@@ -333,40 +333,18 @@
 
 static bool file_write(struct file_ops_info *info,
                       const char *path, const void *data,
-                      unsigned int len, int createflags)
+                      unsigned int len)
 {
        char *filename = filename_to_data(path_to_name(info, path));
        int fd;
 
-       /* Kernel isn't strict, but library is. */
-       if (createflags & ~(O_CREAT|O_EXCL)) {
-               errno = EINVAL;
-               return false;
-       }
-
        if (!write_ok(info, path))
                return false;
 
-       /* We regard it as existing if dir exists. */
-       if (strends(filename, ".DATA")) {
-               if (!createflags)
-                       createflags = O_CREAT;
-               if (createflags & O_EXCL) {
-                       errno = EEXIST;
-                       return false;
-               }
-       }
-
-       if (createflags & O_CREAT)
-               make_dirs(parent_filename(filename));
-
-       fd = open(filename, createflags|O_TRUNC|O_WRONLY, 0600);
-       if (fd < 0) {
-               /* FIXME: Another hack. */
-               if (!(createflags & O_CREAT) && errno == EISDIR)
-                       errno = EEXIST;
+       make_dirs(parent_filename(filename));
+       fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0600);
+       if (fd < 0)
                return false;
-       }
 
        if (write(fd, data, len) != (int)len)
                barf_perror("Bad write to %s", filename);
@@ -385,7 +363,7 @@
 
        make_dirs(parent_filename(dirname));
        if (mkdir(dirname, 0700) != 0)
-               return false;
+               return (errno == EEXIST);
 
        init_perms(dirname);
        return true;
@@ -401,8 +379,11 @@
                return false;
        }
 
-       if (lstat(filename, &st) != 0)
-               return false;
+       if (lstat(filename, &st) != 0) {
+               if (lstat(parent_filename(filename), &st) != 0)
+                       return false;
+               return true;
+       }
 
        if (!write_ok(info, path))
                return false;
@@ -843,20 +824,6 @@
        return ret;
 }
 
-static int random_flags(int *state)
-{
-       switch (get_randomness(state) % 4) {
-       case 0:
-               return 0;
-       case 1:
-               return O_CREAT;
-       case 2:
-               return O_CREAT|O_EXCL;
-       default:
-               return get_randomness(state);
-       }
-}
-
 /* Do the next operation, return the results. */
 static char *do_next_op(struct ops *ops, void *h, int state, bool verbose)
 {
@@ -880,18 +847,12 @@
                ret = linearize_read(ops->read(h, name, &num), &num);
                break;
        case 2: {
-               int flags = random_flags(&state);
                char *contents = talloc_asprintf(NULL, "%i",
                                                 get_randomness(&state));
                unsigned int len = get_randomness(&state)%(strlen(contents)+1);
                if (verbose)
-                       printf("WRITE %s %s %.*s\n", name,
-                              flags == O_CREAT ? "O_CREAT" 
-                              : flags == (O_CREAT|O_EXCL) ? "O_CREAT|O_EXCL"
-                              : flags == 0 ? "0" : "CRAPFLAGS",
-                              len, contents);
-               ret = bool_to_errstring(ops->write(h, name, contents, len,
-                                                  flags));
+                       printf("WRITE %s %.*s\n", name, len, contents);
+               ret = bool_to_errstring(ops->write(h, name, contents, len));
                talloc_steal(ret, contents);
                break;
        }
@@ -1102,7 +1063,8 @@
 
                ret = do_next_op(data->ops, h, i + data->seed, verbose);
                if (verbose)
-                       printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+                       printf("-> %.*s\n",
+                              (int)(strchr(ret, '\n') - ret), ret);
                if (streq(ret, "FAILED:Bad file descriptor"))
                        goto out;
                if (kill(daemon_pid, 0) != 0)
@@ -1373,13 +1335,14 @@
 
                file = do_next_op(&file_ops, fileh, i+data->seed, verbose);
                if (verbose)
-                       printf("-> %.*s\n", strchr(file, '/') - file, file);
+                       printf("-> %.*s\n",
+                              (int)(strchr(file, '/') - file), file);
                
                if (verbose)
                        printf("XS: ");
                xs = do_next_op(&xs_ops, xsh, i+data->seed, verbose);
                if (verbose)
-                       printf("-> %.*s\n", strchr(xs, '/') - xs, xs);
+                       printf("-> %.*s\n", (int)(strchr(xs, '/') - xs), xs);
 
                if (!streq(file, xs))
                        goto out;
@@ -1547,7 +1510,8 @@
                        aborted++;
 
                if (verbose)
-                       printf("-> %.*s\n", strchr(ret, '\n') - ret, ret);
+                       printf("-> %.*s\n",
+                              (int)(strchr(ret, '\n') - ret), ret);
 
                talloc_free(ret);
 
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_stress.c
--- a/tools/xenstore/xs_stress.c        Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_stress.c        Thu Sep 22 17:42:01 2005
@@ -61,7 +61,7 @@
                        barf_perror("%i: can't read %s iter %i",
                                    childnum, file, i);
                sprintf(tmp, "%i", atoi(contents) + 1);
-               if (!xs_write(h, file, tmp, strlen(tmp)+1, 0))
+               if (!xs_write(h, file, tmp, strlen(tmp)+1))
                        barf_perror("%i: can't write %s iter %i",
                                    childnum, file, i);
 
@@ -91,7 +91,7 @@
 
        if (togo == 0) {
                sprintf(filename, "%s/count", base);
-               if (!xs_write(h, filename, "0", 2, O_EXCL|O_CREAT))
+               if (!xs_write(h, filename, "0", 1))
                        barf_perror("Writing to %s", filename);
                return;
        }
diff -r 97dbd9524a7e -r 06d84bf87159 tools/xenstore/xs_test.c
--- a/tools/xenstore/xs_test.c  Thu Sep 22 17:34:14 2005
+++ b/tools/xenstore/xs_test.c  Thu Sep 22 17:42:01 2005
@@ -192,7 +192,7 @@
             "Reads commands from stdin, one per line:"
             "  dir <path>\n"
             "  read <path>\n"
-            "  write <path> <flags> <value>...\n"
+            "  write <path> <value>...\n"
             "  setid <id>\n"
             "  mkdir <path>\n"
             "  rm <path>\n"
@@ -200,6 +200,7 @@
             "  setperm <path> <id> <flags> ...\n"
             "  shutdown\n"
             "  watch <path> <token>\n"
+            "  watchnoack <path> <token>\n"
             "  waitwatch\n"
             "  ackwatch <token>\n"
             "  unwatch <path> <token>\n"
@@ -213,7 +214,7 @@
             "  notimeout\n"
             "  readonly\n"
             "  readwrite\n"
-            "  noackwrite <path> <flags> <value>...\n"
+            "  noackwrite <path> <value>...\n"
             "  readack\n"
             "  dump\n");
 }
@@ -348,47 +349,22 @@
                output("%.*s\n", len, value);
 }
 
-static void do_write(unsigned int handle, char *path, char *flags, char *data)
-{
-       int f;
-
-       if (streq(flags, "none"))
-               f = 0;
-       else if (streq(flags, "create"))
-               f = O_CREAT;
-       else if (streq(flags, "excl"))
-               f = O_CREAT | O_EXCL;
-       else if (streq(flags, "crap"))
-               f = 100;
-       else
-               barf("write flags 'none', 'create' or 'excl' only");
-
-       if (!xs_write(handles[handle], path, data, strlen(data), f))
+static void do_write(unsigned int handle, char *path, char *data)
+{
+       if (!xs_write(handles[handle], path, data, strlen(data)))
                failed(handle);
 }
 
 static void do_noackwrite(unsigned int handle,
-                         char *path, const char *flags, char *data)
+                         char *path, char *data)
 {
        struct xsd_sockmsg msg;
 
-       /* Format: Flags (as string), path, data. */
-       if (streq(flags, "none"))
-               flags = XS_WRITE_NONE;
-       else if (streq(flags, "create"))
-               flags = XS_WRITE_CREATE;
-       else if (streq(flags, "excl"))
-               flags = XS_WRITE_CREATE_EXCL;
-       else
-               barf("noackwrite flags 'none', 'create' or 'excl' only");
-
-       msg.len = strlen(path) + 1 + strlen(flags) + 1 + strlen(data);
+       msg.len = strlen(path) + 1 + strlen(data);
        msg.type = XS_WRITE;
        if (!write_all_choice(handles[handle]->fd, &msg, sizeof(msg)))
                failed(handle);
        if (!write_all_choice(handles[handle]->fd, path, strlen(path) + 1))
-               failed(handle);
-       if (!write_all_choice(handles[handle]->fd, flags, strlen(flags) + 1))
                failed(handle);
        if (!write_all_choice(handles[handle]->fd, data, strlen(data)))
                failed(handle);
@@ -505,10 +481,20 @@
                failed(handle);
 }
 
-static void do_watch(unsigned int handle, const char *node, const char *token)
+static void do_watch(unsigned int handle, const char *node, const char *token,
+                    bool swallow_event)
 {
        if (!xs_watch(handles[handle], node, token))
                failed(handle);
+
+       /* Convenient for testing... */
+       if (swallow_event) {
+               char **vec = xs_read_watch(handles[handle]);
+               if (!vec || !streq(vec[0], node) || !streq(vec[1], token))
+                       failed(handle);
+               if (!xs_acknowledge_watch(handles[handle], token))
+                       failed(handle);
+       }
 }
 
 static void set_timeout(void)
@@ -778,8 +764,7 @@
        else if (streq(command, "read"))
                do_read(handle, arg(line, 1));
        else if (streq(command, "write"))
-               do_write(handle,
-                        arg(line, 1), arg(line, 2), arg(line, 3));
+               do_write(handle, arg(line, 1), arg(line, 2));
        else if (streq(command, "setid"))
                do_setid(handle, arg(line, 1));
        else if (streq(command, "mkdir"))
@@ -793,7 +778,9 @@
        else if (streq(command, "shutdown"))
                do_shutdown(handle);
        else if (streq(command, "watch"))
-               do_watch(handle, arg(line, 1), arg(line, 2));
+               do_watch(handle, arg(line, 1), arg(line, 2), true);
+       else if (streq(command, "watchnoack"))
+               do_watch(handle, arg(line, 1), arg(line, 2), false);
        else if (streq(command, "waitwatch"))
                do_waitwatch(handle);
        else if (streq(command, "ackwatch"))
@@ -832,7 +819,7 @@
                xs_daemon_close(handles[handle]);
                handles[handle] = NULL;
        } else if (streq(command, "noackwrite"))
-               do_noackwrite(handle, arg(line,1), arg(line,2), arg(line,3));
+               do_noackwrite(handle, arg(line,1), arg(line,2));
        else if (streq(command, "readack"))
                do_readack(handle);
        else
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/ia64/xen/grant_table.c
--- a/xen/arch/ia64/xen/grant_table.c   Thu Sep 22 17:34:14 2005
+++ b/xen/arch/ia64/xen/grant_table.c   Thu Sep 22 17:42:01 2005
@@ -850,7 +850,7 @@
 #endif
 
 static long
-gnttab_donate(gnttab_donate_t *uop, unsigned int count)
+gnttab_transfer(gnttab_transfer_t *uop, unsigned int count)
 {
     struct domain *d = current->domain;
     struct domain *e;
@@ -864,27 +864,27 @@
     return GNTST_general_error;
 #else
     for (i = 0; i < count; i++) {
-        gnttab_donate_t *gop = &uop[i];
+        gnttab_transfer_t *gop = &uop[i];
 #if GRANT_DEBUG
-        printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
+        printk("gnttab_transfer: i=%d mfn=%lx domid=%d gref=%08x\n",
                i, gop->mfn, gop->domid, gop->handle);
 #endif
         page = &frame_table[gop->mfn];
         
         if (unlikely(IS_XEN_HEAP_FRAME(page))) { 
-            printk("gnttab_donate: xen heap frame mfn=%lx\n", 
+            printk("gnttab_transfer: xen heap frame mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
         if (unlikely(!pfn_valid(page_to_pfn(page)))) {
-            printk("gnttab_donate: invalid pfn for mfn=%lx\n", 
+            printk("gnttab_transfer: invalid pfn for mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
         }
         if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
-            printk("gnttab_donate: can't find domain %d\n", gop->domid);
+            printk("gnttab_transfer: can't find domain %d\n", gop->domid);
             gop->status = GNTST_bad_domain;
             continue;
         }
@@ -904,7 +904,7 @@
             x = y;
             if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
                          (1 | PGC_allocated)) || unlikely(_nd != _d)) {
-                printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+                printk("gnttab_transfer: Bad page values %p: ed=%p(%u), sd=%p,"
                        " caf=%08x, taf=%" PRtype_info "\n", 
                        (void *) page_to_pfn(page),
                         d, d->domain_id, unpickle_domptr(_nd), x, 
@@ -947,14 +947,14 @@
             break;
         }
         if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) {
-            printk("gnttab_donate: target domain is dying\n");
+            printk("gnttab_transfer: target domain is dying\n");
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             result = GNTST_general_error;
             break;
         }
-        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
-            printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->ref))) {
+            printk("gnttab_transfer: gnttab_prepare_for_transfer fails\n");
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             result = GNTST_general_error;
@@ -964,10 +964,10 @@
         ASSERT(e->tot_pages <= e->max_pages);
         if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
             unlikely(e->tot_pages == e->max_pages) ||
-            unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
-            printk("gnttab_donate: Transferee has no reservation headroom (%d,"
+            unlikely(!gnttab_prepare_for_transfer(e, d, gop->ref))) {
+            printk("gnttab_transfer: Transferee has no reservation headroom 
(%d,"
                    "%d) or provided a bad grant ref (%08x) or is dying (%p)\n",
-                   e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+                   e->tot_pages, e->max_pages, gop->ref, e->d_flags);
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             result = GNTST_general_error;
@@ -987,7 +987,7 @@
          * Transfer is all done: tell the guest about its new page
          * frame.
          */
-        gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+        gnttab_notify_transfer(e, d, gop->ref, gop->mfn);
         
         put_domain(e);
         
@@ -1037,11 +1037,11 @@
             rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
             break;
 #endif
-        case GNTTABOP_donate:
+        case GNTTABOP_transfer:
             if (unlikely(!array_access_ok(uop, count, 
-                                          sizeof(gnttab_donate_t))))
+                                          sizeof(gnttab_transfer_t))))
                 goto out;
-            rc = gnttab_donate(uop, count);
+            rc = gnttab_transfer(uop, count);
             break;
         default:
             rc = -ENOSYS;
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/shadow.c     Thu Sep 22 17:42:01 2005
@@ -697,6 +697,8 @@
         }
     }
 
+    __shadow_get_l2e(v, va, &sl2e);
+
     if ( shadow_mode_refcounts(d) )
     {
         l1_pgentry_t old_spte;
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/shadow32.c   Thu Sep 22 17:42:01 2005
@@ -399,22 +399,26 @@
         perfc_decr(shadow_l1_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l1_table(d, smfn);
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_l2_shadow:
         perfc_decr(shadow_l2_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_hl2_shadow:
         perfc_decr(hl2_table_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_hl2_table(d, smfn);
+        d->arch.hl2_page_count--;
         break;
 
     case PGT_snapshot:
         perfc_decr(snapshot_pages);
+        d->arch.snapshot_page_count--;
         break;
 
     default:
@@ -422,8 +426,6 @@
                page_to_pfn(page), page->u.inuse.type_info);
         break;
     }
-
-    d->arch.shadow_page_count--;
 
     // No TLB flushes are needed the next time this page gets allocated.
     //
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/shadow_public.c      Thu Sep 22 17:42:01 2005
@@ -595,18 +595,21 @@
         perfc_decr(shadow_l1_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l1_table(d, smfn);
+        d->arch.shadow_page_count--;
         break;
 #if defined (__i386__)
     case PGT_l2_shadow:
         perfc_decr(shadow_l2_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_hl2_shadow:
         perfc_decr(hl2_table_pages);
         shadow_demote(d, gpfn, gmfn);
         free_shadow_hl2_table(d, smfn);
+        d->arch.hl2_page_count--;
         break;
 #else
     case PGT_l2_shadow:
@@ -614,12 +617,13 @@
     case PGT_l4_shadow:
         shadow_demote(d, gpfn, gmfn);
         free_shadow_tables(d, smfn, shadow_type_to_level(type));
+        d->arch.shadow_page_count--;
         break;
 
     case PGT_fl1_shadow:
         free_shadow_fl1_table(d, smfn);
+        d->arch.shadow_page_count--;
         break;
-
 #endif
 
     case PGT_snapshot:
@@ -631,8 +635,6 @@
                page_to_pfn(page), page->u.inuse.type_info);
         break;
     }
-
-    d->arch.shadow_page_count--;
 
     // No TLB flushes are needed the next time this page gets allocated.
     //
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx.c        Thu Sep 22 17:42:01 2005
@@ -377,12 +377,13 @@
 
 static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
 {
-    unsigned long eip;
     unsigned long gpa; /* FIXME: PAE */
     int result;
 
-#if VMX_DEBUG
+#if 0 /* keep for debugging */
     {
+        unsigned long eip;
+
         __vmread(GUEST_RIP, &eip);
         VMX_DBG_LOG(DBG_LEVEL_VMMU, 
                     "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
@@ -429,9 +430,9 @@
         
     clts();
     setup_fpu(current);
-    __vmread(CR0_READ_SHADOW, &cr0);
+    __vmread_vcpu(CR0_READ_SHADOW, &cr0);
     if (!(cr0 & X86_CR0_TS)) {
-        __vmread(GUEST_CR0, &cr0);
+        __vmread_vcpu(GUEST_CR0, &cr0);
         cr0 &= ~X86_CR0_TS;
         __vmwrite(GUEST_CR0, cr0);
     }
@@ -470,6 +471,8 @@
         }
 #endif
 
+        /* Unsupportable for virtualised CPUs. */
+        clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
     }
 
     regs->eax = (unsigned long) eax;
@@ -1100,6 +1103,11 @@
                     d->arch.arch_vmx.cpu_cr3, mfn);
     }
 
+    if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
+        if(d->arch.arch_vmx.cpu_cr3)
+            put_page(pfn_to_page(get_mfn_from_pfn(
+                      d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
+
     /*
      * VMX does not implement real-mode virtualization. We emulate
      * real-mode by performing a world switch to VMXAssist whenever
@@ -1124,9 +1132,7 @@
                 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
             }
         }
-        __vmread(GUEST_RIP, &eip);
-        VMX_DBG_LOG(DBG_LEVEL_1,
-                    "Disabling CR0.PE at %%eip 0x%lx\n", eip);
+
         if (vmx_assist(d, VMX_ASSIST_INVOKE)) {
             set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &d->arch.arch_vmx.cpu_state);
             __vmread(GUEST_RIP, &eip);
@@ -1365,17 +1371,17 @@
         clts();
         setup_fpu(current);
 
-        __vmread(GUEST_CR0, &value);
+        __vmread_vcpu(GUEST_CR0, &value);
         value &= ~X86_CR0_TS; /* clear TS */
         __vmwrite(GUEST_CR0, value);
 
-        __vmread(CR0_READ_SHADOW, &value);
+        __vmread_vcpu(CR0_READ_SHADOW, &value);
         value &= ~X86_CR0_TS; /* clear TS */
         __vmwrite(CR0_READ_SHADOW, value);
         break;
     case TYPE_LMSW:
         TRACE_VMEXIT(1,TYPE_LMSW);
-        __vmread(CR0_READ_SHADOW, &value);
+        __vmread_vcpu(CR0_READ_SHADOW, &value);
         value = (value & ~0xF) |
             (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
         return vmx_set_cr0(value);
@@ -1451,17 +1457,13 @@
                 (unsigned long)regs->edx);
 }
 
+volatile unsigned long do_hlt_count;
 /*
  * Need to use this exit to reschedule
  */
-static inline void vmx_vmexit_do_hlt(void)
-{
-#if VMX_DEBUG
-    unsigned long eip;
-    __vmread(GUEST_RIP, &eip);
-#endif
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%lx", eip);
-    raise_softirq(SCHEDULE_SOFTIRQ);
+void vmx_vmexit_do_hlt(void)
+{
+    do_block();
 }
 
 static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
@@ -1511,16 +1513,6 @@
     }
 }
 
-static inline void vmx_vmexit_do_mwait(void)
-{
-#if VMX_DEBUG
-    unsigned long eip;
-    __vmread(GUEST_RIP, &eip);
-#endif
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%lx", eip);
-    raise_softirq(SCHEDULE_SOFTIRQ);
-}
-
 #define BUF_SIZ     256
 #define MAX_LINE    80
 char print_buf[BUF_SIZ];
@@ -1626,9 +1618,13 @@
         return;
     }
 
-    __vmread(GUEST_RIP, &eip);
-    TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
-    TRACE_VMEXIT(0,exit_reason);
+#ifdef TRACE_BUFFER
+    {
+        __vmread(GUEST_RIP, &eip);
+        TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
+        TRACE_VMEXIT(0,exit_reason);
+    }
+#endif
 
     switch (exit_reason) {
     case EXIT_REASON_EXCEPTION_NMI:
@@ -1798,9 +1794,7 @@
         __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_MWAIT_INSTRUCTION:
-        __get_instruction_length(inst_len);
-        __update_guest_eip(inst_len);
-        vmx_vmexit_do_mwait();
+        __vmx_bug(&regs);
         break;
     default:
         __vmx_bug(&regs);       /* should not happen */
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_intercept.c      Thu Sep 22 17:42:01 2005
@@ -28,6 +28,7 @@
 #include <xen/sched.h>
 #include <asm/current.h>
 #include <io_ports.h>
+#include <xen/event.h>
 
 #ifdef CONFIG_VMX
 
@@ -205,6 +206,7 @@
     /* Set the pending intr bit, and send evtchn notification to myself. */
     if (test_and_set_bit(vpit->vector, vpit->intr_bitmap))
         vpit->pending_intr_nr++; /* already set, then count the pending intr */
+    evtchn_set_pending(vpit->v, iopacket_port(vpit->v->domain));
 
     /* pick up missed timer tick */
     if ( missed_ticks > 0 ) {
@@ -281,6 +283,7 @@
         }
 
         vpit->intr_bitmap = intr;
+        vpit->v = d;
 
         vpit->scheduled = NOW() + vpit->period;
         set_ac_timer(&vpit->pit_timer, vpit->scheduled);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_io.c     Thu Sep 22 17:42:01 2005
@@ -891,7 +891,7 @@
     struct vcpu *v = current;
 
     highest_vector = find_highest_pending_irq(v, &intr_type);
-    __vmread(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
+    __vmread_vcpu(CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
 
     if (highest_vector == -1) {
         disable_irq_window(cpu_exec_control);
@@ -948,14 +948,6 @@
 void vmx_do_resume(struct vcpu *d) 
 {
     vmx_stts();
-    if ( vmx_paging_enabled(d) )
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(d->arch.shadow_table));
-    else
-        // paging is not enabled in the guest
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(d->domain->arch.phys_table));
-
-    __vmwrite(HOST_CR3, pagetable_get_paddr(d->arch.monitor_table));
-    __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
 
     if (event_pending(d)) {
         vmx_check_events(d);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c       Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_platform.c       Thu Sep 22 17:42:01 2005
@@ -671,13 +671,13 @@
     if (inst->operand[0] & REGISTER) { /* dest is memory */
         index = operand_index(inst->operand[0]);
         value = get_reg_value(size, index, 0, regs);
-        send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
     } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */
         value = inst->immediate;
-        send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0);
+        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
     } else if (inst->operand[0] & MEMORY) { /* dest is register */
         /* send the request and wait for the value */
-        send_mmio_req(type, gpa, 1, size, 0, IOREQ_READ, 0);
+        send_mmio_req(type, gpa, 1, inst->op_size, 0, IOREQ_READ, 0);
     } else {
         printf("mmio_operands: invalid operand\n");
         domain_crash_synchronous();
diff -r 97dbd9524a7e -r 06d84bf87159 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Thu Sep 22 17:34:14 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Thu Sep 22 17:42:01 2005
@@ -67,9 +67,6 @@
 
     error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, 
                        MONITOR_PIN_BASED_EXEC_CONTROLS);
-
-    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, 
-                       MONITOR_CPU_BASED_EXEC_CONTROLS);
 
     error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
 
@@ -117,12 +114,6 @@
     unsigned long fs_base; 
     unsigned long gs_base; 
 #endif 
-
-    /* control registers */
-    unsigned long cr3;
-    unsigned long cr0;
-    unsigned long cr4;
-    unsigned long dr7;
 };
 
 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
@@ -217,8 +208,32 @@
 /* Update CR3, GDT, LDT, TR */
     unsigned int  error = 0;
     unsigned long pfn = 0;
+    unsigned long cr0, cr4;
     struct pfn_info *page;
     struct cpu_user_regs *regs = guest_cpu_user_regs();
+
+    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : );
+
+    error |= __vmwrite(GUEST_CR0, cr0);
+    cr0 &= ~X86_CR0_PG;
+    error |= __vmwrite(CR0_READ_SHADOW, cr0);
+    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, 
+                       MONITOR_CPU_BASED_EXEC_CONTROLS);
+
+    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : );
+
+#ifdef __x86_64__
+    error |= __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
+#else
+    error |= __vmwrite(GUEST_CR4, cr4);
+#endif
+
+#ifdef __x86_64__
+    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
+#else
+    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE);
+#endif
+    error |= __vmwrite(CR4_READ_SHADOW, cr4);
 
     vmx_stts();
 
@@ -254,7 +269,7 @@
     int error = 0;
     union vmcs_arbytes arbytes;
     unsigned long dr7;
-    unsigned long eflags, shadow_cr;
+    unsigned long eflags;
 
     /* MSR */
     error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
@@ -326,27 +341,7 @@
 
     arbytes.fields.seg_type = 0xb;          /* 32-bit TSS (busy) */
     error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
-
-    error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
-
-    /* Initally PG, PE are not set*/
-    shadow_cr = host_env->cr0;
-    shadow_cr &= ~X86_CR0_PG;
-    error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
     /* CR3 is set in vmx_final_setup_guest */
-#ifdef __x86_64__
-    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
-#else
-    error |= __vmwrite(GUEST_CR4, host_env->cr4);
-#endif
-    shadow_cr = host_env->cr4;
-
-#ifdef __x86_64__
-    shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
-#else
-    shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
-#endif
-    error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
 
     error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
     error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
@@ -403,12 +398,10 @@
     host_env->cs_base = 0;
 
     __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
-    host_env->cr0 = crn;
     error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
 
     /* CR3 is set in vmx_final_setup_hostos */
     __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : ); 
-    host_env->cr4 = crn;
     error |= __vmwrite(HOST_CR4, crn);
 
     error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Thu Sep 22 17:34:14 2005
+++ b/xen/common/grant_table.c  Thu Sep 22 17:42:01 2005
@@ -797,7 +797,7 @@
 #endif
 
 static long
-gnttab_donate(gnttab_donate_t *uop, unsigned int count)
+gnttab_transfer(gnttab_transfer_t *uop, unsigned int count)
 {
     struct domain *d = current->domain;
     struct domain *e;
@@ -805,19 +805,20 @@
     u32 _d, _nd, x, y;
     int i;
     int result = GNTST_okay;
+    grant_entry_t *sha;
 
     for ( i = 0; i < count; i++ )
     {
-        gnttab_donate_t *gop = &uop[i];
+        gnttab_transfer_t *gop = &uop[i];
 #if GRANT_DEBUG
-        printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n",
+        printk("gnttab_transfer: i=%d mfn=%lx domid=%d gref=%08x\n",
                i, gop->mfn, gop->domid, gop->handle);
 #endif
         page = &frame_table[gop->mfn];
         
         if ( unlikely(IS_XEN_HEAP_FRAME(page)))
         { 
-            printk("gnttab_donate: xen heap frame mfn=%lx\n", 
+            printk("gnttab_transfer: xen heap frame mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
@@ -825,7 +826,7 @@
         
         if ( unlikely(!pfn_valid(page_to_pfn(page))) )
         {
-            printk("gnttab_donate: invalid pfn for mfn=%lx\n", 
+            printk("gnttab_transfer: invalid pfn for mfn=%lx\n", 
                    (unsigned long) gop->mfn);
             gop->status = GNTST_bad_virt_addr;
             continue;
@@ -833,7 +834,7 @@
 
         if ( unlikely((e = find_domain_by_id(gop->domid)) == NULL) )
         {
-            printk("gnttab_donate: can't find domain %d\n", gop->domid);
+            printk("gnttab_transfer: can't find domain %d\n", gop->domid);
             gop->status = GNTST_bad_domain;
             continue;
         }
@@ -853,7 +854,7 @@
             x = y;
             if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
                          (1 | PGC_allocated)) || unlikely(_nd != _d)) {
-                printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+                printk("gnttab_transfer: Bad page values %p: ed=%p(%u), sd=%p,"
                        " caf=%08x, taf=%" PRtype_info "\n", 
                        (void *) page_to_pfn(page),
                         d, d->domain_id, unpickle_domptr(_nd), x, 
@@ -888,12 +889,12 @@
          */
         if ( unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
              unlikely(e->tot_pages >= e->max_pages) ||
-             unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle)) )
-        {
-            DPRINTK("gnttab_donate: Transferee has no reservation headroom "
+             unlikely(!gnttab_prepare_for_transfer(e, d, gop->ref)) )
+        {
+            DPRINTK("gnttab_transfer: Transferee has no reservation headroom "
                     "(%d,%d) or provided a bad grant ref (%08x) or "
                     "is dying (%lx)\n",
-                    e->tot_pages, e->max_pages, gop->handle, e->domain_flags);
+                    e->tot_pages, e->max_pages, gop->ref, e->domain_flags);
             spin_unlock(&e->page_alloc_lock);
             put_domain(e);
             gop->status = result = GNTST_general_error;
@@ -908,11 +909,11 @@
         
         spin_unlock(&e->page_alloc_lock);
         
-        /*
-         * Transfer is all done: tell the guest about its new page
-         * frame.
-         */
-        gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+        /* Tell the guest about its new page frame. */
+        sha = &e->grant_table->shared[gop->ref];
+        sha->frame = gop->mfn;
+        wmb();
+        sha->flags |= GTF_transfer_completed;
         
         put_domain(e);
         
@@ -960,11 +961,11 @@
         rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
         break;
 #endif
-    case GNTTABOP_donate:
+    case GNTTABOP_transfer:
         if (unlikely(!array_access_ok(
-            uop, count, sizeof(gnttab_donate_t))))
+            uop, count, sizeof(gnttab_transfer_t))))
             goto out;
-        rc = gnttab_donate(uop, count);
+        rc = gnttab_transfer(uop, count);
         break;
     default:
         rc = -ENOSYS;
@@ -1171,46 +1172,6 @@
     return 0;
 }
 
-void 
-gnttab_notify_transfer(
-    struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
-{
-    grant_entry_t  *sha;
-    unsigned long   pfn;
-
-#if GRANT_DEBUG_VERBOSE
-    DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
-            rd->domain_id, ld->domain_id, ref);
-#endif
-
-    sha = &rd->grant_table->shared[ref];
-
-    spin_lock(&rd->grant_table->lock);
-
-    pfn = sha->frame;
-
-    if ( unlikely(pfn >= max_page ) )
-        DPRINTK("Bad pfn (%lx)\n", pfn);
-    else
-    {
-        set_pfn_from_mfn(frame, pfn);
-
-        if ( unlikely(shadow_mode_log_dirty(ld)))
-             mark_dirty(ld, frame);
-
-        if (shadow_mode_translate(ld))
-            set_mfn_from_pfn(pfn, frame);
-    }
-    sha->frame = __mfn_to_gpfn(rd, frame);
-    sha->domid = rd->domain_id;
-    wmb();
-    sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
-
-    spin_unlock(&rd->grant_table->lock);
-
-    return;
-}
-
 int 
 grant_table_create(
     struct domain *d)
diff -r 97dbd9524a7e -r 06d84bf87159 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Thu Sep 22 17:34:14 2005
+++ b/xen/common/sched_sedf.c   Thu Sep 22 17:42:01 2005
@@ -846,7 +846,7 @@
  *      the domain can't finish it's workload in the period
  *     -in addition to that the domain can be treated prioritised when
  *      extratime is available
- *     -addition: experiments hve shown that this may have a HUGE impact on
+ *     -addition: experiments have shown that this may have a HUGE impact on
  *      performance of other domains, becaus it can lead to excessive context
  *      switches
  
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/shadow.h      Thu Sep 22 17:42:01 2005
@@ -1595,6 +1595,8 @@
         }
     }
 
+    __shadow_get_l2e(v, va, &sl2e);
+
     if ( shadow_mode_refcounts(d) )
     {
         l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/vmx.h Thu Sep 22 17:42:01 2005
@@ -314,6 +314,57 @@
     return 0;
 }
 
+
+static always_inline void __vmwrite_vcpu(unsigned long field, unsigned long 
value)
+{
+    struct vcpu *v = current;
+
+    switch(field) {
+    case CR0_READ_SHADOW:
+       v->arch.arch_vmx.cpu_shadow_cr0 = value;
+       break;
+    case GUEST_CR0:
+       v->arch.arch_vmx.cpu_cr0 = value;
+       break;
+    case CPU_BASED_VM_EXEC_CONTROL:
+       v->arch.arch_vmx.cpu_based_exec_control = value;
+       break;
+    default:
+       printk("__vmwrite_cpu: invalid field %lx\n", field);
+       break;
+    }
+}
+
+static always_inline void __vmread_vcpu(unsigned long field, unsigned long 
*value)
+{
+    struct vcpu *v = current;
+
+    switch(field) {
+    case CR0_READ_SHADOW:
+       *value = v->arch.arch_vmx.cpu_shadow_cr0;
+       break;
+    case GUEST_CR0:
+       *value = v->arch.arch_vmx.cpu_cr0;
+       break;
+    case CPU_BASED_VM_EXEC_CONTROL:
+       *value = v->arch.arch_vmx.cpu_based_exec_control;
+       break;
+    default:
+       printk("__vmread_cpu: invalid field %lx\n", field);
+       break;
+    }
+
+   /* 
+    * __vmwrite() can be used for non-current vcpu, and it's possible that
+    * the vcpu field is not initialized at that case.
+    * 
+    */
+    if (!*value) {
+       __vmread(field, value);
+       __vmwrite_vcpu(field, *value);
+    }
+}
+
 static inline int __vmwrite (unsigned long field, unsigned long value)
 {
     unsigned long eflags;
@@ -326,6 +377,15 @@
     __save_flags(eflags);
     if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
         return -1;
+
+    switch(field) {
+    case CR0_READ_SHADOW:
+    case GUEST_CR0:
+    case CPU_BASED_VM_EXEC_CONTROL:
+       __vmwrite_vcpu(field, value);
+       break;
+    }
+
     return 0;
 }
 
@@ -379,11 +439,12 @@
 {
     unsigned long cr0;
 
-    __vmread(GUEST_CR0, &cr0);
-    if (!(cr0 & X86_CR0_TS))
+    __vmread_vcpu(GUEST_CR0, &cr0);
+    if (!(cr0 & X86_CR0_TS)) {
         __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS);
-
-    __vmread(CR0_READ_SHADOW, &cr0);
+    }
+
+    __vmread_vcpu(CR0_READ_SHADOW, &cr0);
     if (!(cr0 & X86_CR0_TS))
        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
 }
@@ -393,7 +454,7 @@
 {
     unsigned long cr0;
 
-    __vmread(CR0_READ_SHADOW, &cr0);
+    __vmread_vcpu(CR0_READ_SHADOW, &cr0);
     return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
 }
 
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/vmx_virpit.h
--- a/xen/include/asm-x86/vmx_virpit.h  Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/vmx_virpit.h  Thu Sep 22 17:42:01 2005
@@ -35,8 +35,8 @@
 
     unsigned int count;  /* the 16 bit channel count */
     unsigned int init_val; /* the init value for the counter */
-
-} ;
+    struct vcpu *v;
+};
 
 /* to hook the ioreq packet to get the PIT initializaiton info */
 extern void vmx_hooks_assist(struct vcpu *d);
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Thu Sep 22 17:34:14 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h    Thu Sep 22 17:42:01 2005
@@ -74,9 +74,12 @@
 struct arch_vmx_struct {
     struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual */
     unsigned long           flags;  /* VMCS flags */
+    unsigned long           cpu_cr0; /* copy of guest CR0 */
+    unsigned long           cpu_shadow_cr0; /* copy of guest read shadow CR0 */
     unsigned long           cpu_cr2; /* save CR2 */
     unsigned long           cpu_cr3;
     unsigned long           cpu_state;
+    unsigned long           cpu_based_exec_control;
     struct msr_state        msr_content;
     void                   *io_bitmap_a, *io_bitmap_b;
 };
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Thu Sep 22 17:34:14 2005
+++ b/xen/include/public/grant_table.h  Thu Sep 22 17:42:01 2005
@@ -215,18 +215,19 @@
 } gnttab_dump_table_t;
 
 /*
- * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain.  The
+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain.  The
  * foreign domain has previously registered the details of the transfer.
  * These can be identified from <handle>, a grant reference.
  */
-#define GNTTABOP_donate                4
+#define GNTTABOP_transfer                4
 typedef struct {
-    unsigned long mfn;               /*  0 */
-    domid_t     domid;               /*  4 */
-    u16         handle;               /*  8 */
-    s16         status;               /*  10: GNTST_* */
-    u32         __pad;
-} gnttab_donate_t;           /*  14 bytes */
+    /* IN parameters. */
+    unsigned long mfn;
+    domid_t     domid;
+    grant_ref_t ref;
+    /* OUT parameters. */
+    s16         status;
+} gnttab_transfer_t;
 
 /*
  * Bitfield values for update_pin_status.flags.
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Thu Sep 22 17:34:14 2005
+++ b/xen/include/public/io/netif.h     Thu Sep 22 17:42:01 2005
@@ -10,10 +10,11 @@
 #define __XEN_PUBLIC_IO_NETIF_H__
 
 typedef struct netif_tx_request {
-    unsigned long addr;   /* Machine address of packet.  */
+    grant_ref_t gref;      /* Reference to buffer page */
+    u16      offset:15;    /* Offset within buffer page */
     u16      csum_blank:1; /* Proto csum field blank?   */
-    u16      id:15;  /* Echoed in response message. */
-    u16      size;   /* Packet size in bytes.       */
+    u16      id;           /* Echoed in response message. */
+    u16      size;         /* Packet size in bytes.       */
 } netif_tx_request_t;
 
 typedef struct netif_tx_response {
@@ -22,21 +23,15 @@
 } netif_tx_response_t;
 
 typedef struct {
-    u16       id;    /* Echoed in response message.        */
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    grant_ref_t gref;  /* 2: Reference to incoming granted frame */
-#endif
+    u16       id;       /* Echoed in response message.        */
+    grant_ref_t gref;  /* Reference to incoming granted frame */
 } netif_rx_request_t;
 
 typedef struct {
-#ifdef CONFIG_XEN_NETDEV_GRANT
-    u32      addr;   /*  0: Offset in page of start of received packet  */
-#else
-    unsigned long addr; /* Machine address of packet.              */
-#endif
-    u16      csum_valid:1; /* Protocol checksum is validated?       */
-    u16      id:15;
-    s16      status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
+    u16      offset;     /* Offset in page of start of received packet  */
+    u16      csum_valid; /* Protocol checksum is validated?       */
+    u16      id;
+    s16      status;     /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
 } netif_rx_response_t;
 
 /*
@@ -53,18 +48,8 @@
 #define MASK_NETIF_RX_IDX(_i) ((_i)&(NETIF_RX_RING_SIZE-1))
 #define MASK_NETIF_TX_IDX(_i) ((_i)&(NETIF_TX_RING_SIZE-1))
 
-#ifdef __x86_64__
-/*
- * This restriction can be lifted when we move netfront/netback to use
- * grant tables. This will remove memory_t fields from the above structures
- * and thus relax natural alignment restrictions.
- */
-#define NETIF_TX_RING_SIZE 128
-#define NETIF_RX_RING_SIZE 128
-#else
 #define NETIF_TX_RING_SIZE 256
 #define NETIF_RX_RING_SIZE 256
-#endif
 
 /* This structure must fit in a memory page. */
 typedef struct netif_tx_interface {
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/public/io/tpmif.h
--- a/xen/include/public/io/tpmif.h     Thu Sep 22 17:34:14 2005
+++ b/xen/include/public/io/tpmif.h     Thu Sep 22 17:42:01 2005
@@ -20,8 +20,7 @@
     unsigned long addr;   /* Machine address of packet.   */
     int      ref;         /* grant table access reference */
     u16      id;          /* Echoed in response message.  */
-    u16      size:15;     /* Packet size in bytes.        */
-    u16      mapped:1;
+    u16      size;        /* Packet size in bytes.        */
 } tpmif_tx_request_t;
 
 /*
@@ -30,13 +29,16 @@
  */
 typedef u32 TPMIF_RING_IDX;
 
-#define TPMIF_TX_RING_SIZE 16
+#define TPMIF_TX_RING_SIZE 10
 
 /* This structure must fit in a memory page. */
+
 typedef struct {
-    union {
-        tpmif_tx_request_t  req;
-    } ring[TPMIF_TX_RING_SIZE];
+    tpmif_tx_request_t req;
+} tpmif_ring_t;
+
+typedef struct {
+    tpmif_ring_t ring[TPMIF_TX_RING_SIZE];
 } tpmif_tx_interface_t;
 
 #endif
diff -r 97dbd9524a7e -r 06d84bf87159 xen/include/xen/grant_table.h
--- a/xen/include/xen/grant_table.h     Thu Sep 22 17:34:14 2005
+++ b/xen/include/xen/grant_table.h     Thu Sep 22 17:42:01 2005
@@ -106,12 +106,6 @@
 gnttab_prepare_for_transfer(
     struct domain *rd, struct domain *ld, grant_ref_t ref);
 
-/* Notify 'rd' of a completed transfer via an already-locked grant entry. */
-void 
-gnttab_notify_transfer(
-    struct domain *rd, struct domain *ld,
-    grant_ref_t ref, unsigned long frame);
-
 /* Domain death release of granted device mappings of other domains.*/
 void
 gnttab_release_dev_mappings(grant_table_t *gt);
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/architecture.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/architecture.tex       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,140 @@
+\chapter{Virtual Architecture}
+
+On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
+has full access to the physical memory available in the system and is
+responsible for allocating portions of it to the domains.  Guest
+operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
+they see fit. Segmentation is used to prevent the guest OS from
+accessing the portion of the address space that is reserved for Xen.
+We expect most guest operating systems will use ring 1 for their own
+operation and place applications in ring 3.
+
+In this chapter we consider the basic virtual architecture provided by
+Xen: the basic CPU state, exception and interrupt handling, and time.
+Other aspects such as memory and device access are discussed in later
+chapters.
+
+
+\section{CPU state}
+
+All privileged state must be handled by Xen.  The guest OS has no
+direct access to CR3 and is not permitted to update privileged bits in
+EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen;
+these are analogous to system calls but occur from ring 1 to ring 0.
+
+A list of all hypercalls is given in Appendix~\ref{a:hypercalls}.
+
+
+\section{Exceptions}
+
+A virtual IDT is provided --- a domain can submit a table of trap
+handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
+handlers are identical to native x86 handlers, although the page-fault
+handler is somewhat different.
+
+
+\section{Interrupts and events}
+
+Interrupts are virtualized by mapping them to \emph{events}, which are
+delivered asynchronously to the target domain using a callback
+supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
+these events onto its standard interrupt dispatch mechanisms.  Xen is
+responsible for determining the target domain that will handle each
+physical interrupt source. For more details on the binding of event
+sources to events, see Chapter~\ref{c:devices}.
+
+
+\section{Time}
+
+Guest operating systems need to be aware of the passage of both real
+(or wallclock) time and their own `virtual time' (the time for which
+they have been executing). Furthermore, Xen has a notion of time which
+is used for scheduling. The following notions of time are provided:
+
+\begin{description}
+\item[Cycle counter time.]
+
+  This provides a fine-grained time reference.  The cycle counter time
+  is used to accurately extrapolate the other time references.  On SMP
+  machines it is currently assumed that the cycle counter time is
+  synchronized between CPUs.  The current x86-based implementation
+  achieves this within inter-CPU communication latencies.
+
+\item[System time.]
+
+  This is a 64-bit counter which holds the number of nanoseconds that
+  have elapsed since system boot.
+
+\item[Wall clock time.]
+
+  This is the time of day in a Unix-style {\tt struct timeval}
+  (seconds and microseconds since 1 January 1970, adjusted by leap
+  seconds).  An NTP client hosted by {\it domain 0} can keep this
+  value accurate.
+
+\item[Domain virtual time.]
+
+  This progresses at the same pace as system time, but only while a
+  domain is executing --- it stops while a domain is de-scheduled.
+  Therefore the share of the CPU that a domain receives is indicated
+  by the rate at which its virtual time increases.
+
+\end{description}
+
+
+Xen exports timestamps for system time and wall-clock time to guest
+operating systems through a shared page of memory.  Xen also provides
+the cycle counter time at the instant the timestamps were calculated,
+and the CPU frequency in Hertz.  This allows the guest to extrapolate
+system and wall-clock times accurately based on the current cycle
+counter time.
+
+Since all time stamps need to be updated and read \emph{atomically}
+two version numbers are also stored in the shared info page. The first
+is incremented prior to an update, while the second is only
+incremented afterwards. Thus a guest can be sure that it read a
+consistent state by checking the two version numbers are equal.
+
+Xen includes a periodic ticker which sends a timer event to the
+currently executing domain every 10ms.  The Xen scheduler also sends a
+timer event whenever a domain is scheduled; this allows the guest OS
+to adjust for the time that has passed while it has been inactive.  In
+addition, Xen allows each domain to request that they receive a timer
+event sent at a specified system time by using the {\tt
+  set\_timer\_op()} hypercall.  Guest OSes may use this timer to
+implement timeout values when they block.
+
+
+
+%% % akw: demoting this to a section -- not sure if there is any point
+%% % though, maybe just remove it.
+
+\section{Xen CPU Scheduling}
+
+Xen offers a uniform API for CPU schedulers.  It is possible to choose
+from a number of schedulers at boot and it should be easy to add more.
+The BVT, Atropos and Round Robin schedulers are part of the normal Xen
+distribution.  BVT provides proportional fair shares of the CPU to the
+running domains.  Atropos can be used to reserve absolute shares of
+the CPU for each domain.  Round-robin is provided as an example of
+Xen's internal scheduler API.
+
+\paragraph*{Note: SMP host support}
+Xen has always supported SMP host systems.  Domains are statically
+assigned to CPUs, either at creation time or when manually pinning to
+a particular CPU.  The current schedulers then run locally on each CPU
+to decide which of the assigned domains should be run there. The
+user-level control software can be used to perform coarse-grain
+load-balancing between CPUs.
+
+
+%% More information on the characteristics and use of these schedulers
+%% is available in {\tt Sched-HOWTO.txt}.
+
+
+\section{Privileged operations}
+
+Xen exports an extended interface to privileged domains (viz.\ {\it
+  Domain 0}). This allows such domains to build and boot other domains
+on the server, and provides control interfaces for managing
+scheduling, memory, networking, and block devices.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/debugging.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/debugging.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,62 @@
+\chapter{Debugging}
+
+Xen provides tools for debugging both Xen and guest OSes.  Currently, the
+Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
+debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
+Buffer provides a lightweight means to log data about Xen's internal state and
+behaviour at runtime, for later analysis.
+
+\section{Pervasive Debugger}
+
+Information on using the pervasive debugger is available in pdb.txt.
+
+
+\section{Trace Buffer}
+
+The trace buffer provides a means to observe Xen's operation from domain 0.
+Trace events, inserted at key points in Xen's code, record data that can be
+read by the {\tt xentrace} tool.  Recording these events has a low overhead
+and hence the trace buffer may be useful for debugging timing-sensitive
+behaviours.
+
+\subsection{Internal API}
+
+To use the trace buffer functionality from within Xen, you must {\tt \#include
+<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
+events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
+2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
+(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
+will insert the event ID and data into the trace buffer, along with the current
+value of the CPU cycle-counter.  For builds without the trace buffer enabled,
+the macros expand to no-ops and thus can be left in place without incurring
+overheads.
+
+\subsection{Trace-enabled builds}
+
+By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
+is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
+either in {\tt <xen/config.h>} or on the gcc command line.
+
+The size (in pages) of the per-CPU trace buffers can be specified using the
+{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
+buffers will be disabled.
+
+\subsection{Dumping trace data}
+
+When running a trace buffer build of Xen, trace data are written continuously
+into the buffer data areas, with newer data overwriting older data.  This data
+can be captured using the {\tt xentrace} program in domain 0.
+
+The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
+buffers into its address space.  It then periodically polls all the buffers for
+new data, dumping out any new records from each buffer in turn.  As a result,
+for machines with multiple (logical) CPUs, the trace buffer output will not be
+in overall chronological order.
+
+The output from {\tt xentrace} can be post-processed using {\tt
+xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
+{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
+trace points, there is an example format file in {\tt tools/xentrace/formats }.
+
+For more information, see the manual pages for {\tt xentrace}, {\tt
+xentrace\_format} and {\tt xentrace\_cpusplit}.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/devices.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/devices.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,178 @@
+\chapter{Devices}
+\label{c:devices}
+
+Devices such as network and disk are exported to guests using a split
+device driver.  The device driver domain, which accesses the physical
+device directly also runs a \emph{backend} driver, serving requests to
+that device from guests.  Each guest will use a simple \emph{frontend}
+driver, to access the backend.  Communication between these domains is
+composed of two parts: First, data is placed onto a shared memory page
+between the domains.  Second, an event channel between the two domains
+is used to pass notification that data is outstanding.  This
+separation of notification from data transfer allows message batching,
+and results in very efficient device access.
+
+Event channels are used extensively in device virtualization; each
+domain has a number of end-points or \emph{ports} each of which may be
+bound to one of the following \emph{event sources}:
+\begin{itemize}
+  \item a physical interrupt from a real device, 
+  \item a virtual interrupt (callback) from Xen, or 
+  \item a signal from another domain 
+\end{itemize}
+
+Events are lightweight and do not carry much information beyond the
+source of the notification. Hence when performing bulk data transfer,
+events are typically used as synchronization primitives over a shared
+memory transport. Event channels are managed via the {\tt
+  event\_channel\_op()} hypercall; for more details see
+Section~\ref{s:idc}.
+
+This chapter focuses on some individual device interfaces available to
+Xen guests.
+
+
+\section{Network I/O}
+
+Virtual network device services are provided by shared memory
+communication with a backend domain.  From the point of view of other
+domains, the backend may be viewed as a virtual ethernet switch
+element with each domain having one or more virtual network interfaces
+connected to it.
+
+\subsection{Backend Packet Handling}
+
+The backend driver is responsible for a variety of actions relating to
+the transmission and reception of packets from the physical device.
+With regard to transmission, the backend performs these key actions:
+
+\begin{itemize}
+\item {\bf Validation:} To ensure that domains do not attempt to
+  generate invalid (e.g. spoofed) traffic, the backend driver may
+  validate headers ensuring that source MAC and IP addresses match the
+  interface that they have been sent from.
+
+  Validation functions can be configured using standard firewall rules
+  ({\small{\tt iptables}} in the case of Linux).
+  
+\item {\bf Scheduling:} Since a number of domains can share a single
+  physical network interface, the backend must mediate access when
+  several domains each have packets queued for transmission.  This
+  general scheduling function subsumes basic shaping or rate-limiting
+  schemes.
+  
+\item {\bf Logging and Accounting:} The backend domain can be
+  configured with classifier rules that control how packets are
+  accounted or logged.  For example, log messages might be generated
+  whenever a domain attempts to send a TCP packet containing a SYN.
+\end{itemize}
+
+On receipt of incoming packets, the backend acts as a simple
+demultiplexer: Packets are passed to the appropriate virtual interface
+after any necessary logging and accounting have been carried out.
+
+\subsection{Data Transfer}
+
+Each virtual interface uses two ``descriptor rings'', one for
+transmit, the other for receive.  Each descriptor identifies a block
+of contiguous physical memory allocated to the domain.
+
+The transmit ring carries packets to transmit from the guest to the
+backend domain.  The return path of the transmit ring carries messages
+indicating that the contents have been physically transmitted and the
+backend no longer requires the associated pages of memory.
+
+To receive packets, the guest places descriptors of unused pages on
+the receive ring.  The backend will return received packets by
+exchanging these pages in the domain's memory with new pages
+containing the received data, and passing back descriptors regarding
+the new packets on the ring.  This zero-copy approach allows the
+backend to maintain a pool of free pages to receive packets into, and
+then deliver them to appropriate domains after examining their
+headers.
+
+% Real physical addresses are used throughout, with the domain
+% performing translation from pseudo-physical addresses if that is
+% necessary.
+
+If a domain does not keep its receive ring stocked with empty buffers
+then packets destined to it may be dropped.  This provides some
+defence against receive livelock problems because an overload domain
+will cease to receive further data.  Similarly, on the transmit path,
+it provides the application with feedback on the rate at which packets
+are able to leave the system.
+
+Flow control on rings is achieved by including a pair of producer
+indexes on the shared ring page.  Each side will maintain a private
+consumer index indicating the next outstanding message.  In this
+manner, the domains cooperate to divide the ring into two message
+lists, one in each direction.  Notification is decoupled from the
+immediate placement of new messages on the ring; the event channel
+will be used to generate notification when {\em either} a certain
+number of outstanding messages are queued, {\em or} a specified number
+of nanoseconds have elapsed since the oldest message was placed on the
+ring.
+
+%% Not sure if my version is any better -- here is what was here
+%% before: Synchronization between the backend domain and the guest is
+%% achieved using counters held in shared memory that is accessible to
+%% both.  Each ring has associated producer and consumer indices
+%% indicating the area in the ring that holds descriptors that contain
+%% data.  After receiving {\it n} packets or {\t nanoseconds} after
+%% receiving the first packet, the hypervisor sends an event to the
+%% domain.
+
+
+\section{Block I/O}
+
+All guest OS disk access goes through the virtual block device VBD
+interface.  This interface allows domains access to portions of block
+storage devices visible to the the block backend device.  The VBD
+interface is a split driver, similar to the network interface
+described above.  A single shared memory ring is used between the
+frontend and backend drivers, across which read and write messages are
+sent.
+
+Any block device accessible to the backend domain, including
+network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
+can be exported as a VBD.  Each VBD is mapped to a device node in the
+guest, specified in the guest's startup configuration.
+
+Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
+similar functionality can be achieved using the more complete LVM
+system, which is already in widespread use.
+
+\subsection{Data Transfer}
+
+The single ring between the guest and the block backend supports three
+messages:
+
+\begin{description}
+\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to
+  this guest from the backend.  The request includes a descriptor of a
+  free page into which the reply will be written by the backend.
+
+\item [{\small {\tt READ}}:] Read data from the specified block
+  device.  The front end identifies the device and location to read
+  from and attaches pages for the data to be copied to (typically via
+  DMA from the device).  The backend acknowledges completed read
+  requests as they finish.
+
+\item [{\small {\tt WRITE}}:] Write data to the specified block
+  device.  This functions essentially as {\small {\tt READ}}, except
+  that the data moves to the device instead of from it.
+\end{description}
+
+%% um... some old text: In overview, the same style of descriptor-ring
+%% that is used for network packets is used here.  Each domain has one
+%% ring that carries operation requests to the hypervisor and carries
+%% the results back again.
+
+%% Rather than copying data, the backend simply maps the domain's
+%% buffers in order to enable direct DMA to them.  The act of mapping
+%% the buffers also increases the reference counts of the underlying
+%% pages, so that the unprivileged domain cannot try to return them to
+%% the hypervisor, install them as page tables, or any other unsafe
+%% behaviour.
+%%
+%% % block API here
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/further_info.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/further_info.tex       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,49 @@
+\chapter{Further Information}
+
+If you have questions that are not answered by this manual, the
+sources of information listed below may be of interest to you.  Note
+that bug reports, suggestions and contributions related to the
+software (or the documentation) should be sent to the Xen developers'
+mailing list (address below).
+
+
+\section{Other documentation}
+
+If you are mainly interested in using (rather than developing for)
+Xen, the \emph{Xen Users' Manual} is distributed in the {\tt docs/}
+directory of the Xen source distribution.
+
+% Various HOWTOs are also available in {\tt docs/HOWTOS}.
+
+
+\section{Online references}
+
+The official Xen web site is found at:
+\begin{quote}
+{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
+\end{quote}
+
+This contains links to the latest versions of all on-line
+documentation.
+
+
+\section{Mailing lists}
+
+There are currently four official Xen mailing lists:
+
+\begin{description}
+\item[xen-devel@xxxxxxxxxxxxxxxxxxx] Used for development
+  discussions and bug reports.  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-devel}}
+\item[xen-users@xxxxxxxxxxxxxxxxxxx] Used for installation and usage
+  discussions and requests for help.  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-users}}
+\item[xen-announce@xxxxxxxxxxxxxxxxxxx] Used for announcements only.
+  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-announce}}
+\item[xen-changelog@xxxxxxxxxxxxxxxxxxx] Changelog feed
+  from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
+  {\small {\tt http://lists.xensource.com/xen-changelog}}
+\end{description}
+
+Of these, xen-devel is the most active.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/hypercalls.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/hypercalls.tex Thu Sep 22 17:42:01 2005
@@ -0,0 +1,524 @@
+
+\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
+
+\chapter{Xen Hypercalls}
+\label{a:hypercalls}
+
+Hypercalls represent the procedural interface to Xen; this appendix 
+categorizes and describes the current set of hypercalls. 
+
+\section{Invoking Hypercalls} 
+
+Hypercalls are invoked in a manner analogous to system calls in a
+conventional operating system; a software interrupt is issued which
+vectors to an entry point within Xen. On x86\_32 machines the
+instruction required is {\tt int \$82}; the (real) IDT is setup so
+that this may only be issued from within ring 1. The particular 
+hypercall to be invoked is contained in {\tt EAX} --- a list 
+mapping these values to symbolic hypercall names can be found 
+in {\tt xen/include/public/xen.h}. 
+
+On some occasions a set of hypercalls will be required to carry
+out a higher-level function; a good example is when a guest 
+operating wishes to context switch to a new process which 
+requires updating various privileged CPU state. As an optimization
+for these cases, there is a generic mechanism to issue a set of 
+hypercalls as a batch: 
+
+\begin{quote}
+\hypercall{multicall(void *call\_list, int nr\_calls)}
+
+Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
+the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
+call\_list}. Each entry contains the hypercall operation code followed
+by up to 7 word-sized arguments.
+\end{quote}
+
+Note that multicalls are provided purely as an optimization; there is
+no requirement to use them when first porting a guest operating
+system.
+
+
+\section{Virtual CPU Setup} 
+
+At start of day, a guest operating system needs to setup the virtual
+CPU it is executing on. This includes installing vectors for the
+virtual IDT so that the guest OS can handle interrupts, page faults,
+etc. However the very first thing a guest OS must setup is a pair 
+of hypervisor callbacks: these are the entry points which Xen will
+use when it wishes to notify the guest OS of an occurrence. 
+
+\begin{quote}
+\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
+  event\_address, unsigned long failsafe\_selector, unsigned long
+  failsafe\_address) }
+
+Register the normal (``event'') and failsafe callbacks for 
+event processing. In each case the code segment selector and 
+address within that segment are provided. The selectors must
+have RPL 1; in XenLinux we simply use the kernel's CS for both 
+{\tt event\_selector} and {\tt failsafe\_selector}.
+
+The value {\tt event\_address} specifies the address of the guest OSes
+event handling and dispatch routine; the {\tt failsafe\_address}
+specifies a separate entry point which is used only if a fault occurs
+when Xen attempts to use the normal callback. 
+\end{quote} 
+
+
+After installing the hypervisor callbacks, the guest OS can 
+install a `virtual IDT' by using the following hypercall: 
+
+\begin{quote} 
+\hypercall{set\_trap\_table(trap\_info\_t *table)} 
+
+Install one or more entries into the per-domain 
+trap handler table (essentially a software version of the IDT). 
+Each entry in the array pointed to by {\tt table} includes the 
+exception vector number with the corresponding segment selector 
+and entry point. Most guest OSes can use the same handlers on 
+Xen as when running on the real hardware; an exception is the 
+page fault handler (exception vector 14) where a modified 
+stack-frame layout is used. 
+
+
+\end{quote} 
+
+
+
+\section{Scheduling and Timer}
+
+Domains are preemptively scheduled by Xen according to the 
+parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
+In addition, however, a domain may choose to explicitly 
+control certain behavior with the following hypercall: 
+
+\begin{quote} 
+\hypercall{sched\_op(unsigned long op)} 
+
+Request scheduling operation from hypervisor. The options are: {\it
+yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
+calling domain runnable but may cause a reschedule if other domains
+are runnable.  {\it block} removes the calling domain from the run
+queue and cause is to sleeps until an event is delivered to it.  {\it
+shutdown} is used to end the domain's execution; the caller can
+additionally specify whether the domain should reboot, halt or
+suspend.
+\end{quote} 
+
+To aid the implementation of a process scheduler within a guest OS,
+Xen provides a virtual programmable timer:
+
+\begin{quote}
+\hypercall{set\_timer\_op(uint64\_t timeout)} 
+
+Request a timer event to be sent at the specified system time (time 
+in nanoseconds since system boot). The hypercall actually passes the 
+64-bit timeout value as a pair of 32-bit values. 
+
+\end{quote} 
+
+Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
+allows block-with-timeout semantics. 
+
+
+\section{Page Table Management} 
+
+Since guest operating systems have read-only access to their page 
+tables, Xen must be involved when making any changes. The following
+multi-purpose hypercall can be used to modify page-table entries, 
+update the machine-to-physical mapping table, flush the TLB, install 
+a new page-table base pointer, and more.
+
+\begin{quote} 
+\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
+
+Update the page table for the domain; a set of {\tt count} updates are
+submitted for processing in a batch, with {\tt success\_count} being 
+updated to report the number of successful updates.  
+
+Each element of {\tt req[]} contains a pointer (address) and value; 
+the least significant 2-bits of the pointer are used to distinguish 
+the type of update requested as follows:
+\begin{description} 
+
+\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
+page table entry to the associated value; Xen will check that the
+update is safe, as described in Chapter~\ref{c:memory}.
+
+\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
+  machine-to-physical table. The calling domain must own the machine
+  page in question (or be privileged).
+
+\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
+The set of additional MMU operations is considerable, and includes
+updating {\tt cr3} (or just re-installing it for a TLB flush),
+flushing the cache, installing a new LDT, or pinning \& unpinning
+page-table pages (to ensure their reference count doesn't drop to zero
+which would require a revalidation of all entries).
+
+Further extended commands are used to deal with granting and 
+acquiring page ownership; see Section~\ref{s:idc}. 
+
+
+\end{description}
+
+More details on the precise format of all commands can be 
+found in {\tt xen/include/public/xen.h}. 
+
+
+\end{quote}
+
+Explicitly updating batches of page table entries is extremely
+efficient, but can require a number of alterations to the guest
+OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
+recommended for new OS ports.
+
+Regardless of which page table update mode is being used, however,
+there are some occasions (notably handling a demand page fault) where
+a guest OS will wish to modify exactly one PTE rather than a
+batch. This is catered for by the following:
+
+\begin{quote} 
+\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
+val, \\ unsigned long flags)}
+
+Update the currently installed PTE for the page {\tt page\_nr} to 
+{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
+is safe before applying it. The {\tt flags} determine which kind
+of TLB flush, if any, should follow the update. 
+
+\end{quote} 
+
+Finally, sufficiently privileged domains may occasionally wish to manipulate 
+the pages of others: 
+\begin{quote}
+
+\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
+unsigned long val, unsigned long flags, uint16\_t domid)}
+
+Identical to {\tt update\_va\_mapping()} save that the pages being
+mapped must belong to the domain {\tt domid}. 
+
+\end{quote}
+
+This privileged operation is currently used by backend virtual device
+drivers to safely map pages containing I/O data. 
+
+
+
+\section{Segmentation Support}
+
+Xen allows guest OSes to install a custom GDT if they require it; 
+this is context switched transparently whenever a domain is 
+[de]scheduled.  The following hypercall is effectively a 
+`safe' version of {\tt lgdt}: 
+
+\begin{quote}
+\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
+
+Install a global descriptor table for a domain; {\tt frame\_list} is
+an array of up to 16 machine page frames within which the GDT resides,
+with {\tt entries} being the actual number of descriptor-entry
+slots. All page frames must be mapped read-only within the guest's
+address space, and the table must be large enough to contain Xen's
+reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
+
+\end{quote}
+
+Many guest OSes will also wish to install LDTs; this is achieved by
+using {\tt mmu\_update()} with an extended command, passing the
+linear address of the LDT base along with the number of entries. No
+special safety checks are required; Xen needs to perform this task
+simply since {\tt lldt} requires CPL 0.
+
+
+Xen also allows guest operating systems to update just an 
+individual segment descriptor in the GDT or LDT:  
+
+\begin{quote}
+\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
+unsigned long word2)}
+
+Update the GDT/LDT entry at machine address {\tt ma}; the new
+8-byte descriptor is stored in {\tt word1} and {\tt word2}.
+Xen performs a number of checks to ensure the descriptor is 
+valid. 
+
+\end{quote}
+
+Guest OSes can use the above in place of context switching entire 
+LDTs (or the GDT) when the number of changing descriptors is small. 
+
+\section{Context Switching} 
+
+When a guest OS wishes to context switch between two processes, 
+it can use the page table and segmentation hypercalls described
+above to perform the the bulk of the privileged work. In addition, 
+however, it will need to invoke Xen to switch the kernel (ring 1) 
+stack pointer: 
+
+\begin{quote} 
+\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
+
+Request kernel stack switch from hypervisor; {\tt ss} is the new 
+stack segment, which {\tt esp} is the new stack pointer. 
+
+\end{quote} 
+
+A final useful hypercall for context switching allows ``lazy'' 
+save and restore of floating point state: 
+
+\begin{quote}
+\hypercall{fpu\_taskswitch(void)} 
+
+This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
+control register; this means that the next attempt to use floating
+point will cause a trap which the guest OS can trap. Typically it will
+then save/restore the FP state, and clear the {\tt TS} bit. 
+\end{quote} 
+
+This is provided as an optimization only; guest OSes can also choose
+to save and restore FP state on all context switches for simplicity. 
+
+
+\section{Physical Memory Management}
+
+As mentioned previously, each domain has a maximum and current 
+memory allocation. The maximum allocation, set at domain creation 
+time, cannot be modified. However a domain can choose to reduce 
+and subsequently grow its current allocation by using the
+following call: 
+
+\begin{quote} 
+\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
+  unsigned long nr\_extents, unsigned int extent\_order)}
+
+Increase or decrease current memory allocation (as determined by 
+the value of {\tt op}). Each invocation provides a list of 
+extents each of which is $2^s$ pages in size, 
+where $s$ is the value of {\tt extent\_order}. 
+
+\end{quote} 
+
+In addition to simply reducing or increasing the current memory
+allocation via a `balloon driver', this call is also useful for 
+obtaining contiguous regions of machine memory when required (e.g. 
+for certain PCI devices, or if using superpages).  
+
+
+\section{Inter-Domain Communication}
+\label{s:idc} 
+
+Xen provides a simple asynchronous notification mechanism via
+\emph{event channels}. Each domain has a set of end-points (or
+\emph{ports}) which may be bound to an event source (e.g. a physical
+IRQ, a virtual IRQ, or an port in another domain). When a pair of
+end-points in two different domains are bound together, then a `send'
+operation on one will cause an event to be received by the destination
+domain.
+
+The control and use of event channels involves the following hypercall: 
+
+\begin{quote}
+\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
+
+Inter-domain event-channel management; {\tt op} is a discriminated 
+union which allows the following 7 operations: 
+
+\begin{description} 
+
+\item[\it alloc\_unbound:] allocate a free (unbound) local
+  port and prepare for connection from a specified domain. 
+\item[\it bind\_virq:] bind a local port to a virtual 
+IRQ; any particular VIRQ can be bound to at most one port per domain. 
+\item[\it bind\_pirq:] bind a local port to a physical IRQ;
+once more, a given pIRQ can be bound to at most one port per
+domain. Furthermore the calling domain must be sufficiently
+privileged.
+\item[\it bind\_interdomain:] construct an interdomain event 
+channel; in general, the target domain must have previously allocated 
+an unbound port for this channel, although this can be bypassed by 
+privileged domains during domain setup. 
+\item[\it close:] close an interdomain event channel. 
+\item[\it send:] send an event to the remote end of a 
+interdomain event channel. 
+\item[\it status:] determine the current status of a local port. 
+\end{description} 
+
+For more details see
+{\tt xen/include/public/event\_channel.h}. 
+
+\end{quote} 
+
+Event channels are the fundamental communication primitive between 
+Xen domains and seamlessly support SMP. However they provide little
+bandwidth for communication {\sl per se}, and hence are typically 
+married with a piece of shared memory to produce effective and 
+high-performance inter-domain communication. 
+
+Safe sharing of memory pages between guest OSes is carried out by
+granting access on a per page basis to individual domains. This is
+achieved by using the {\tt grant\_table\_op()} hypercall.
+
+\begin{quote}
+\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
+
+Grant or remove access to a particular page to a particular domain. 
+
+\end{quote} 
+
+This is not currently widely in use by guest operating systems, but 
+we intend to integrate support more fully in the near future. 
+
+\section{PCI Configuration} 
+
+Domains with physical device access (i.e.\ driver domains) receive
+limited access to certain PCI devices (bus address space and
+interrupts). However many guest operating systems attempt to 
+determine the PCI configuration by directly access the PCI BIOS, 
+which cannot be allowed for safety. 
+
+Instead, Xen provides the following hypercall: 
+
+\begin{quote}
+\hypercall{physdev\_op(void *physdev\_op)}
+
+Perform a PCI configuration option; depending on the value 
+of {\tt physdev\_op} this can be a PCI config read, a PCI config 
+write, or a small number of other queries. 
+
+\end{quote} 
+
+
+For examples of using {\tt physdev\_op()}, see the 
+Xen-specific PCI code in the linux sparse tree. 
+
+\section{Administrative Operations}
+\label{s:dom0ops}
+
+A large number of control operations are available to a sufficiently
+privileged domain (typically domain 0). These allow the creation and
+management of new domains, for example. A complete list is given 
+below: for more details on any or all of these, please see 
+{\tt xen/include/public/dom0\_ops.h} 
+
+
+\begin{quote}
+\hypercall{dom0\_op(dom0\_op\_t *op)} 
+
+Administrative domain operations for domain management. The options are:
+
+\begin{description} 
+\item [\it DOM0\_CREATEDOMAIN:] create a new domain
+
+\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
+queue. 
+
+\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
+  once again. 
+
+\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
+with a domain
+
+\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
+
+\item [\it DOM0\_SCHEDCTL:]
+
+\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
+
+\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
+
+\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
+
+\item [\it DOM0\_GETPAGEFRAMEINFO:] 
+
+\item [\it DOM0\_GETPAGEFRAMEINFO2:]
+
+\item [\it DOM0\_IOPL:] set I/O privilege level
+
+\item [\it DOM0\_MSR:] read or write model specific registers
+
+\item [\it DOM0\_DEBUG:] interactively invoke the debugger
+
+\item [\it DOM0\_SETTIME:] set system time
+
+\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
+
+\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
+
+\item [\it DOM0\_GETTBUFS:] get information about the size and location of
+                      the trace buffers (only on trace-buffer enabled builds)
+
+\item [\it DOM0\_PHYSINFO:] get information about the host machine
+
+\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
+
+\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
+
+\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
+
+\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a 
domain
+
+\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
+
+\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
+\end{description} 
+\end{quote} 
+
+Most of the above are best understood by looking at the code 
+implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
+the user-space tools that use them (mostly in {\tt tools/libxc}). 
+
+\section{Debugging Hypercalls} 
+
+A few additional hypercalls are mainly useful for debugging: 
+
+\begin{quote} 
+\hypercall{console\_io(int cmd, int count, char *str)}
+
+Use Xen to interact with the console; operations are:
+
+{\it CONSOLEIO\_write}: Output count characters from buffer str.
+
+{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
+\end{quote} 
+
+A pair of hypercalls allows access to the underlying debug registers: 
+\begin{quote}
+\hypercall{set\_debugreg(int reg, unsigned long value)}
+
+Set debug register {\tt reg} to {\tt value} 
+
+\hypercall{get\_debugreg(int reg)}
+
+Return the contents of the debug register {\tt reg}
+\end{quote}
+
+And finally: 
+\begin{quote}
+\hypercall{xen\_version(int cmd)}
+
+Request Xen version number.
+\end{quote} 
+
+This is useful to ensure that user-space tools are in sync 
+with the underlying hypervisor. 
+
+\section{Deprecated Hypercalls}
+
+Xen is under constant development and refinement; as such there 
+are plans to improve the way in which various pieces of functionality 
+are exposed to guest OSes. 
+
+\begin{quote} 
+\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
+
+Toggle various memory management modes (in particular wrritable page
+tables and superpage support). 
+
+\end{quote} 
+
+This is likely to be replaced with mode values in the shared 
+information page since this is more resilient for resumption 
+after migration or checkpoint. 
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/memory.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/memory.tex     Thu Sep 22 17:42:01 2005
@@ -0,0 +1,162 @@
+\chapter{Memory}
+\label{c:memory} 
+
+Xen is responsible for managing the allocation of physical memory to
+domains, and for ensuring safe use of the paging and segmentation
+hardware.
+
+
+\section{Memory Allocation}
+
+Xen resides within a small fixed portion of physical memory; it also
+reserves the top 64MB of every virtual address space. The remaining
+physical memory is available for allocation to domains at a page
+granularity.  Xen tracks the ownership and use of each page, which
+allows it to enforce secure partitioning between domains.
+
+Each domain has a maximum and current physical memory allocation.  A
+guest OS may run a `balloon driver' to dynamically adjust its current
+memory allocation up to its limit.
+
+
+%% XXX SMH: I use machine and physical in the next section (which is
+%% kinda required for consistency with code); wonder if this section
+%% should use same terms?
+%%
+%% Probably. 
+%%
+%% Merging this and below section at some point prob makes sense.
+
+\section{Pseudo-Physical Memory}
+
+Since physical memory is allocated and freed on a page granularity,
+there is no guarantee that a domain will receive a contiguous stretch
+of physical memory. However most operating systems do not have good
+support for operating in a fragmented physical address space. To aid
+porting such operating systems to run on top of Xen, we make a
+distinction between \emph{machine memory} and \emph{pseudo-physical
+  memory}.
+
+Put simply, machine memory refers to the entire amount of memory
+installed in the machine, including that reserved by Xen, in use by
+various domains, or currently unallocated. We consider machine memory
+to comprise a set of 4K \emph{machine page frames} numbered
+consecutively starting from 0. Machine frame numbers mean the same
+within Xen or any domain.
+
+Pseudo-physical memory, on the other hand, is a per-domain
+abstraction. It allows a guest operating system to consider its memory
+allocation to consist of a contiguous range of physical page frames
+starting at physical frame 0, despite the fact that the underlying
+machine page frames may be sparsely allocated and in any order.
+
+To achieve this, Xen maintains a globally readable {\it
+  machine-to-physical} table which records the mapping from machine
+page frames to pseudo-physical ones. In addition, each domain is
+supplied with a {\it physical-to-machine} table which performs the
+inverse mapping. Clearly the machine-to-physical table has size
+proportional to the amount of RAM installed in the machine, while each
+physical-to-machine table has size proportional to the memory
+allocation of the given domain.
+
+Architecture dependent code in guest operating systems can then use
+the two tables to provide the abstraction of pseudo-physical memory.
+In general, only certain specialized parts of the operating system
+(such as page table management) needs to understand the difference
+between machine and pseudo-physical addresses.
+
+
+\section{Page Table Updates}
+
+In the default mode of operation, Xen enforces read-only access to
+page tables and requires guest operating systems to explicitly request
+any modifications.  Xen validates all such requests and only applies
+updates that it deems safe.  This is necessary to prevent domains from
+adding arbitrary mappings to their page tables.
+
+To aid validation, Xen associates a type and reference count with each
+memory page. A page has one of the following mutually-exclusive types
+at any point in time: page directory ({\sf PD}), page table ({\sf
+  PT}), local descriptor table ({\sf LDT}), global descriptor table
+({\sf GDT}), or writable ({\sf RW}). Note that a guest OS may always
+create readable mappings of its own memory regardless of its current
+type.
+
+%%% XXX: possibly explain more about ref count 'lifecyle' here?
+This mechanism is used to maintain the invariants required for safety;
+for example, a domain cannot have a writable mapping to any part of a
+page table as this would require the page concerned to simultaneously
+be of types {\sf PT} and {\sf RW}.
+
+
+% \section{Writable Page Tables}
+
+Xen also provides an alternative mode of operation in which guests be
+have the illusion that their page tables are directly writable.  Of
+course this is not really the case, since Xen must still validate
+modifications to ensure secure partitioning. To this end, Xen traps
+any write attempt to a memory page of type {\sf PT} (i.e., that is
+currently part of a page table).  If such an access occurs, Xen
+temporarily allows write access to that page while at the same time
+\emph{disconnecting} it from the page table that is currently in use.
+This allows the guest to safely make updates to the page because the
+newly-updated entries cannot be used by the MMU until Xen revalidates
+and reconnects the page.  Reconnection occurs automatically in a
+number of situations: for example, when the guest modifies a different
+page-table page, when the domain is preempted, or whenever the guest
+uses Xen's explicit page-table update interfaces.
+
+Finally, Xen also supports a form of \emph{shadow page tables} in
+which the guest OS uses a independent copy of page tables which are
+unknown to the hardware (i.e.\ which are never pointed to by {\tt
+  cr3}). Instead Xen propagates changes made to the guest's tables to
+the real ones, and vice versa. This is useful for logging page writes
+(e.g.\ for live migration or checkpoint). A full version of the shadow
+page tables also allows guest OS porting with less effort.
+
+
+\section{Segment Descriptor Tables}
+
+On boot a guest is supplied with a default GDT, which does not reside
+within its own memory allocation.  If the guest wishes to use other
+than the default `flat' ring-1 and ring-3 segments that this GDT
+provides, it must register a custom GDT and/or LDT with Xen, allocated
+from its own memory. Note that a number of GDT entries are reserved by
+Xen -- any custom GDT must also include sufficient space for these
+entries.
+
+For example, the following hypercall is used to specify a new GDT:
+
+\begin{quote}
+  int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em
+    entries})
+
+  \emph{frame\_list}: An array of up to 16 machine page frames within
+  which the GDT resides.  Any frame registered as a GDT frame may only
+  be mapped read-only within the guest's address space (e.g., no
+  writable mappings, no use as a page-table page, and so on).
+
+  \emph{entries}: The number of descriptor-entry slots in the GDT.
+  Note that the table must be large enough to contain Xen's reserved
+  entries; thus we must have `{\em entries $>$
+    LAST\_RESERVED\_GDT\_ENTRY}\ '.  Note also that, after registering
+  the GDT, slots \emph{FIRST\_} through
+  \emph{LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest
+  and may be overwritten by Xen.
+\end{quote}
+
+The LDT is updated via the generic MMU update mechanism (i.e., via the
+{\tt mmu\_update()} hypercall.
+
+\section{Start of Day}
+
+The start-of-day environment for guest operating systems is rather
+different to that provided by the underlying hardware. In particular,
+the processor is already executing in protected mode with paging
+enabled.
+
+{\it Domain 0} is created and booted by Xen itself. For all subsequent
+domains, the analogue of the boot-loader is the {\it domain builder},
+user-space software running in {\it domain 0}. The domain builder is
+responsible for building the initial page tables for a domain and
+loading its kernel image at the appropriate virtual address.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/interface/scheduling.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/interface/scheduling.tex Thu Sep 22 17:42:01 2005
@@ -0,0 +1,268 @@
+\chapter{Scheduling API}  
+
+The scheduling API is used by both the schedulers described above and should
+also be used by any new schedulers.  It provides a generic interface and also
+implements much of the ``boilerplate'' code.
+
+Schedulers conforming to this API are described by the following
+structure:
+
+\begin{verbatim}
+struct scheduler
+{
+    char *name;             /* full name for this scheduler      */
+    char *opt_name;         /* option name for this scheduler    */
+    unsigned int sched_id;  /* ID for this scheduler             */
+
+    int          (*init_scheduler) ();
+    int          (*alloc_task)     (struct task_struct *);
+    void         (*add_task)       (struct task_struct *);
+    void         (*free_task)      (struct task_struct *);
+    void         (*rem_task)       (struct task_struct *);
+    void         (*wake_up)        (struct task_struct *);
+    void         (*do_block)       (struct task_struct *);
+    task_slice_t (*do_schedule)    (s_time_t);
+    int          (*control)        (struct sched_ctl_cmd *);
+    int          (*adjdom)         (struct task_struct *,
+                                    struct sched_adjdom_cmd *);
+    s32          (*reschedule)     (struct task_struct *);
+    void         (*dump_settings)  (void);
+    void         (*dump_cpu_state) (int);
+    void         (*dump_runq_el)   (struct task_struct *);
+};
+\end{verbatim}
+
+The only method that {\em must} be implemented is
+{\tt do\_schedule()}.  However, if there is not some implementation for the
+{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
+
+The fields of the above structure are described in more detail below.
+
+\subsubsection{name}
+
+The name field should point to a descriptive ASCII string.
+
+\subsubsection{opt\_name}
+
+This field is the value of the {\tt sched=} boot-time option that will select
+this scheduler.
+
+\subsubsection{sched\_id}
+
+This is an integer that uniquely identifies this scheduler.  There should be a
+macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
+
+\subsubsection{init\_scheduler}
+
+\paragraph*{Purpose}
+
+This is a function for performing any scheduler-specific initialisation.  For
+instance, it might allocate memory for per-CPU scheduler data and initialise it
+appropriately.
+
+\paragraph*{Call environment}
+
+This function is called after the initialisation performed by the generic
+layer.  The function is called exactly once, for the scheduler that has been
+selected.
+
+\paragraph*{Return values}
+
+This should return negative on failure --- this will cause an
+immediate panic and the system will fail to boot.
+
+\subsubsection{alloc\_task}
+
+\paragraph*{Purpose}
+Called when a {\tt task\_struct} is allocated by the generic scheduler
+layer.  A particular scheduler implementation may use this method to
+allocate per-task data for this task.  It may use the {\tt
+sched\_priv} pointer in the {\tt task\_struct} to point to this data.
+
+\paragraph*{Call environment}
+The generic layer guarantees that the {\tt sched\_priv} field will
+remain intact from the time this method is called until the task is
+deallocated (so long as the scheduler implementation does not change
+it explicitly!).
+
+\paragraph*{Return values}
+Negative on failure.
+
+\subsubsection{add\_task}
+
+\paragraph*{Purpose}
+
+Called when a task is initially added by the generic layer.
+
+\paragraph*{Call environment}
+
+The fields in the {\tt task\_struct} are now filled out and available for use.
+Schedulers should implement appropriate initialisation of any per-task private
+information in this method.
+
+\subsubsection{free\_task}
+
+\paragraph*{Purpose}
+
+Schedulers should free the space used by any associated private data
+structures.
+
+\paragraph*{Call environment}
+
+This is called when a {\tt task\_struct} is about to be deallocated.
+The generic layer will have done generic task removal operations and
+(if implemented) called the scheduler's {\tt rem\_task} method before
+this method is called.
+
+\subsubsection{rem\_task}
+
+\paragraph*{Purpose}
+
+This is called when a task is being removed from scheduling (but is
+not yet being freed).
+
+\subsubsection{wake\_up}
+
+\paragraph*{Purpose}
+
+Called when a task is woken up, this method should put the task on the runqueue
+(or do the scheduler-specific equivalent action).
+
+\paragraph*{Call environment}
+
+The task is already set to state RUNNING.
+
+\subsubsection{do\_block}
+
+\paragraph*{Purpose}
+
+This function is called when a task is blocked.  This function should
+not remove the task from the runqueue.
+
+\paragraph*{Call environment}
+
+The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
+TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
+  do\_schedule} method will be made after this method returns, in
+order to select the next task to run.
+
+\subsubsection{do\_schedule}
+
+This method must be implemented.
+
+\paragraph*{Purpose}
+
+The method is called each time a new task must be chosen for scheduling on the
+current CPU.  The current time as passed as the single argument (the current
+task can be found using the {\tt current} macro).
+
+This method should select the next task to run on this CPU and set it's minimum
+time to run as well as returning the data described below.
+
+This method should also take the appropriate action if the previous
+task has blocked, e.g. removing it from the runqueue.
+
+\paragraph*{Call environment}
+
+The other fields in the {\tt task\_struct} are updated by the generic layer,
+which also performs all Xen-specific tasks and performs the actual task switch
+(unless the previous task has been chosen again).
+
+This method is called with the {\tt schedule\_lock} held for the current CPU
+and local interrupts disabled.
+
+\paragraph*{Return values}
+
+Must return a {\tt struct task\_slice} describing what task to run and how long
+for (at maximum).
+
+\subsubsection{control}
+
+\paragraph*{Purpose}
+
+This method is called for global scheduler control operations.  It takes a
+pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
+source data from or populate with data, depending on the value of the
+{\tt direction} field.
+
+\paragraph*{Call environment}
+
+The generic layer guarantees that when this method is called, the
+caller selected the correct scheduler ID, hence the scheduler's
+implementation does not need to sanity-check these parts of the call.
+
+\paragraph*{Return values}
+
+This function should return the value to be passed back to user space, hence it
+should either be 0 or an appropriate errno value.
+
+\subsubsection{sched\_adjdom}
+
+\paragraph*{Purpose}
+
+This method is called to adjust the scheduling parameters of a particular
+domain, or to query their current values.  The function should check
+the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
+order to determine which of these operations is being performed.
+
+\paragraph*{Call environment}
+
+The generic layer guarantees that the caller has specified the correct
+control interface version and scheduler ID and that the supplied {\tt
+task\_struct} will not be deallocated during the call (hence it is not
+necessary to {\tt get\_task\_struct}).
+
+\paragraph*{Return values}
+
+This function should return the value to be passed back to user space, hence it
+should either be 0 or an appropriate errno value.
+
+\subsubsection{reschedule}
+
+\paragraph*{Purpose}
+
+This method is called to determine if a reschedule is required as a result of a
+particular task.
+
+\paragraph*{Call environment}
+The generic layer will cause a reschedule if the current domain is the idle
+task or it has exceeded its minimum time slice before a reschedule.  The
+generic layer guarantees that the task passed is not currently running but is
+on the runqueue.
+
+\paragraph*{Return values}
+
+Should return a mask of CPUs to cause a reschedule on.
+
+\subsubsection{dump\_settings}
+
+\paragraph*{Purpose}
+
+If implemented, this should dump any private global settings for this
+scheduler to the console.
+
+\paragraph*{Call environment}
+
+This function is called with interrupts enabled.
+
+\subsubsection{dump\_cpu\_state}
+
+\paragraph*{Purpose}
+
+This method should dump any private settings for the specified CPU.
+
+\paragraph*{Call environment}
+
+This function is called with interrupts disabled and the {\tt schedule\_lock}
+for the specified CPU held.
+
+\subsubsection{dump\_runq\_el}
+
+\paragraph*{Purpose}
+
+This method should dump any private settings for the specified task.
+
+\paragraph*{Call environment}
+
+This function is called with interrupts disabled and the {\tt schedule\_lock}
+for the task's CPU held.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/build.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/build.tex   Thu Sep 22 17:42:01 2005
@@ -0,0 +1,170 @@
+\chapter{Build, Boot and Debug Options} 
+
+This chapter describes the build- and boot-time options which may be
+used to tailor your Xen system.
+
+
+\section{Xen Build Options}
+
+Xen provides a number of build-time options which should be set as
+environment variables or passed on make's command-line.
+
+\begin{description}
+\item[verbose=y] Enable debugging messages when Xen detects an
+  unexpected condition.  Also enables console output from all domains.
+\item[debug=y] Enable debug assertions.  Implies {\bf verbose=y}.
+  (Primarily useful for tracing bugs in Xen).
+\item[debugger=y] Enable the in-Xen debugger. This can be used to
+  debug Xen, guest OSes, and applications.
+\item[perfc=y] Enable performance counters for significant events
+  within Xen. The counts can be reset or displayed on Xen's console
+  via console control keys.
+\item[trace=y] Enable per-cpu trace buffers which log a range of
+  events within Xen for collection by control software.
+\end{description}
+
+
+\section{Xen Boot Options}
+\label{s:xboot}
+
+These options are used to configure Xen's behaviour at runtime.  They
+should be appended to Xen's command line, either manually or by
+editing \path{grub.conf}.
+
+\begin{description}
+\item [ noreboot ] Don't reboot the machine automatically on errors.
+  This is useful to catch debug output if you aren't catching console
+  messages via the serial line.
+\item [ nosmp ] Disable SMP support.  This option is implied by
+  `ignorebiostables'.
+\item [ watchdog ] Enable NMI watchdog which can report certain
+  failures.
+\item [ noirqbalance ] Disable software IRQ balancing and affinity.
+  This can be used on systems such as Dell 1850/2850 that have
+  workarounds in hardware for IRQ-routing issues.
+\item [ badpage=$<$page number$>$,$<$page number$>$, \ldots ] Specify
+  a list of pages not to be allocated for use because they contain bad
+  bytes. For example, if your memory tester says that byte 0x12345678
+  is bad, you would place `badpage=0x12345' on Xen's command line.
+\item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
+  com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\
+  Xen supports up to two 16550-compatible serial ports.  For example:
+  `com1=9600, 8n1, 0x408, 5' maps COM1 to a 9600-baud port, 8 data
+  bits, no parity, 1 stop bit, I/O port base 0x408, IRQ 5.  If some
+  configuration options are standard (e.g., I/O base and IRQ), then
+  only a prefix of the full configuration string need be specified. If
+  the baud rate is pre-configured (e.g., by the bootloader) then you
+  can specify `auto' in place of a numeric baud rate.
+\item [ console=$<$specifier list$>$ ] Specify the destination for Xen
+  console I/O.  This is a comma-separated list of, for example:
+  \begin{description}
+  \item[ vga ] Use VGA console and allow keyboard input.
+  \item[ com1 ] Use serial port com1.
+  \item[ com2H ] Use serial port com2. Transmitted chars will have the
+    MSB set. Received chars must have MSB set.
+  \item[ com2L] Use serial port com2. Transmitted chars will have the
+    MSB cleared. Received chars must have MSB cleared.
+  \end{description}
+  The latter two examples allow a single port to be shared by two
+  subsystems (e.g.\ console and debugger). Sharing is controlled by
+  MSB of each transmitted/received character.  [NB. Default for this
+  option is `com1,vga']
+\item [ sync\_console ] Force synchronous console output. This is
+  useful if you system fails unexpectedly before it has sent all
+  available output to the console. In most cases Xen will
+  automatically enter synchronous mode when an exceptional event
+  occurs, but this option provides a manual fallback.
+\item [ conswitch=$<$switch-char$><$auto-switch-char$>$ ] Specify how
+  to switch serial-console input between Xen and DOM0. The required
+  sequence is CTRL-$<$switch-char$>$ pressed three times. Specifying
+  the backtick character disables switching.  The
+  $<$auto-switch-char$>$ specifies whether Xen should auto-switch
+  input to DOM0 when it boots --- if it is `x' then auto-switching is
+  disabled.  Any other value, or omitting the character, enables
+  auto-switching.  [NB. Default switch-char is `a'.]
+\item [ nmi=xxx ]
+  Specify what to do with an NMI parity or I/O error. \\
+  `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
+  `nmi=dom0':   Inform DOM0 of the NMI. \\
+  `nmi=ignore': Ignore the NMI.
+\item [ mem=xxx ] Set the physical RAM address limit. Any RAM
+  appearing beyond this physical address in the memory map will be
+  ignored. This parameter may be specified with a B, K, M or G suffix,
+  representing bytes, kilobytes, megabytes and gigabytes respectively.
+  The default unit, if no suffix is specified, is kilobytes.
+\item [ dom0\_mem=xxx ] Set the amount of memory to be allocated to
+  domain0. In Xen 3.x the parameter may be specified with a B, K, M or
+  G suffix, representing bytes, kilobytes, megabytes and gigabytes
+  respectively; if no suffix is specified, the parameter defaults to
+  kilobytes. In previous versions of Xen, suffixes were not supported
+  and the value is always interpreted as kilobytes.
+\item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
+  pages (default 1).  Note that the trace buffers are only enabled in
+  debug builds.  Most users can ignore this feature completely.
+\item [ sched=xxx ] Select the CPU scheduler Xen should use.  The
+  current possibilities are `bvt' (default), `atropos' and `rrobin'.
+  For more information see Section~\ref{s:sched}.
+\item [ apic\_verbosity=debug,verbose ] Print more detailed
+  information about local APIC and IOAPIC configuration.
+\item [ lapic ] Force use of local APIC even when left disabled by
+  uniprocessor BIOS.
+\item [ nolapic ] Ignore local APIC in a uniprocessor system, even if
+  enabled by the BIOS.
+\item [ apic=bigsmp,default,es7000,summit ] Specify NUMA platform.
+  This can usually be probed automatically.
+\end{description}
+
+In addition, the following options may be specified on the Xen command
+line. Since domain 0 shares responsibility for booting the platform,
+Xen will automatically propagate these options to its command line.
+These options are taken from Linux's command-line syntax with
+unchanged semantics.
+
+\begin{description}
+\item [ acpi=off,force,strict,ht,noirq,\ldots ] Modify how Xen (and
+  domain 0) parses the BIOS ACPI tables.
+\item [ acpi\_skip\_timer\_override ] Instruct Xen (and domain~0) to
+  ignore timer-interrupt override instructions specified by the BIOS
+  ACPI tables.
+\item [ noapic ] Instruct Xen (and domain~0) to ignore any IOAPICs
+  that are present in the system, and instead continue to use the
+  legacy PIC.
+\end{description} 
+
+
+\section{XenLinux Boot Options}
+
+In addition to the standard Linux kernel boot options, we support:
+\begin{description}
+\item[ xencons=xxx ] Specify the device node to which the Xen virtual
+  console driver is attached. The following options are supported:
+  \begin{center}
+    \begin{tabular}{l}
+      `xencons=off': disable virtual console \\
+      `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
+      `xencons=ttyS': attach console to /dev/ttyS0
+    \end{tabular}
+\end{center}
+The default is ttyS for dom0 and tty for all other domains.
+\end{description}
+
+
+\section{Debugging}
+\label{s:keys}
+
+Xen has a set of debugging features that can be useful to try and
+figure out what's going on. Hit `h' on the serial line (if you
+specified a baud rate on the Xen command line) or ScrollLock-h on the
+keyboard to get a list of supported commands.
+
+If you have a crash you'll likely get a crash dump containing an EIP
+(PC) which, along with an \path{objdump -d image}, can be useful in
+figuring out what's happened.  Debug a Xenlinux image just as you
+would any other Linux kernel.
+
+%% We supply a handy debug terminal program which you can find in
+%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/} This should
+%% be built and executed on another machine that is connected via a
+%% null modem cable. Documentation is included.  Alternatively, if the
+%% Xen machine is connected to a serial-port server then we supply a
+%% dumb TCP terminal client, {\tt xencons}.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/control_software.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/control_software.tex        Thu Sep 22 17:42:01 2005
@@ -0,0 +1,115 @@
+\chapter{Control Software} 
+
+The Xen control software includes the \xend\ node control daemon
+(which must be running), the xm command line tools, and the prototype
+xensv web interface.
+
+\section{\Xend\ (node control daemon)}
+\label{s:xend}
+
+The Xen Daemon (\Xend) performs system management functions related to
+virtual machines.  It forms a central point of control for a machine
+and can be controlled using an HTTP-based protocol.  \Xend\ must be
+running in order to start and manage virtual machines.
+
+\Xend\ must be run as root because it needs access to privileged
+system management functions.  A small set of commands may be issued on
+the \xend\ command line:
+
+\begin{tabular}{ll}
+  \verb!# xend start! & start \xend, if not already running \\
+  \verb!# xend stop!  & stop \xend\ if already running       \\
+  \verb!# xend restart! & restart \xend\ if running, otherwise start it \\
+  % \verb!# xend trace_start! & start \xend, with very detailed debug logging 
\\
+  \verb!# xend status! & indicates \xend\ status by its return code
+\end{tabular}
+
+A SysV init script called {\tt xend} is provided to start \xend\ at
+boot time.  {\tt make install} installs this script in
+\path{/etc/init.d}.  To enable it, you have to make symbolic links in
+the appropriate runlevel directories or use the {\tt chkconfig} tool,
+where available.
+
+Once \xend\ is running, more sophisticated administration can be done
+using the xm tool (see Section~\ref{s:xm}) and the experimental Xensv
+web interface (see Section~\ref{s:xensv}).
+
+As \xend\ runs, events will be logged to \path{/var/log/xend.log} and,
+if the migration assistant daemon (\path{xfrd}) has been started,
+\path{/var/log/xfrd.log}. These may be of use for troubleshooting
+problems.
+
+\section{Xm (command line interface)}
+\label{s:xm}
+
+The xm tool is the primary tool for managing Xen from the console.
+The general format of an xm command line is:
+
+\begin{verbatim}
+# xm command [switches] [arguments] [variables]
+\end{verbatim}
+
+The available \emph{switches} and \emph{arguments} are dependent on
+the \emph{command} chosen.  The \emph{variables} may be set using
+declarations of the form {\tt variable=value} and command line
+declarations override any of the values in the configuration file
+being used, including the standard variables described above and any
+custom variables (for instance, the \path{xmdefconfig} file uses a
+{\tt vmid} variable).
+
+The available commands are as follows:
+
+\begin{description}
+\item[set-mem] Request a domain to adjust its memory footprint.
+\item[create] Create a new domain.
+\item[destroy] Kill a domain immediately.
+\item[list] List running domains.
+\item[shutdown] Ask a domain to shutdown.
+\item[dmesg] Fetch the Xen (not Linux!) boot output.
+\item[consoles] Lists the available consoles.
+\item[console] Connect to the console for a domain.
+\item[help] Get help on xm commands.
+\item[save] Suspend a domain to disk.
+\item[restore] Restore a domain from disk.
+\item[pause] Pause a domain's execution.
+\item[unpause] Un-pause a domain.
+\item[pincpu] Pin a domain to a CPU.
+\item[bvt] Set BVT scheduler parameters for a domain.
+\item[bvt\_ctxallow] Set the BVT context switching allowance for the
+  system.
+\item[atropos] Set the atropos parameters for a domain.
+\item[rrobin] Set the round robin time slice for the system.
+\item[info] Get information about the Xen host.
+\item[call] Call a \xend\ HTTP API function directly.
+\end{description}
+
+For a detailed overview of switches, arguments and variables to each
+command try
+\begin{quote}
+\begin{verbatim}
+# xm help command
+\end{verbatim}
+\end{quote}
+
+\section{Xensv (web control interface)}
+\label{s:xensv}
+
+Xensv is the experimental web control interface for managing a Xen
+machine.  It can be used to perform some (but not yet all) of the
+management tasks that can be done using the xm tool.
+
+It can be started using:
+\begin{quote}
+  \verb_# xensv start_
+\end{quote}
+and stopped using:
+\begin{quote}
+  \verb_# xensv stop_
+\end{quote}
+
+By default, Xensv will serve out the web interface on port 8080.  This
+can be changed by editing
+\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
+
+Once Xensv is running, the web interface can be used to create and
+manage running domains.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/debian.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/debian.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,154 @@
+\chapter{Installing Xen / XenLinux on Debian}
+
+The Debian project provides a tool called \path{debootstrap} which
+allows a base Debian system to be installed into a filesystem without
+requiring the host system to have any Debian-specific software (such
+as \path{apt}).
+
+Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
+Xen domain:
+
+\begin{enumerate}
+
+\item Set up Xen and test that it's working, as described earlier in
+  this manual.
+
+\item Create disk images for rootfs and swap. Alternatively, you might
+  create dedicated partitions, LVM logical volumes, etc.\ if that
+  suits your setup.
+\begin{verbatim}
+dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
+dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
+\end{verbatim}
+
+  If you're going to use this filesystem / disk image only as a
+  `template' for other vm disk images, something like 300 MB should be
+  enough. (of course it depends what kind of packages you are planning
+  to install to the template)
+
+\item Create the filesystem and initialise the swap image
+\begin{verbatim}
+mkfs.ext3 /path/diskimage
+mkswap /path/swapimage
+\end{verbatim}
+
+\item Mount the disk image for installation
+\begin{verbatim}
+mount -o loop /path/diskimage /mnt/disk
+\end{verbatim}
+
+\item Install \path{debootstrap}. Make sure you have debootstrap
+  installed on the host.  If you are running Debian Sarge (3.1 /
+  testing) or unstable you can install it by running \path{apt-get
+    install debootstrap}.  Otherwise, it can be downloaded from the
+  Debian project website.
+
+\item Install Debian base to the disk image:
+\begin{verbatim}
+debootstrap --arch i386 sarge /mnt/disk  \
+            http://ftp.<countrycode>.debian.org/debian
+\end{verbatim}
+
+  You can use any other Debian http/ftp mirror you want.
+
+\item When debootstrap completes successfully, modify settings:
+\begin{verbatim}
+chroot /mnt/disk /bin/bash
+\end{verbatim}
+
+Edit the following files using vi or nano and make needed changes:
+\begin{verbatim}
+/etc/hostname
+/etc/hosts
+/etc/resolv.conf
+/etc/network/interfaces
+/etc/networks
+\end{verbatim}
+
+Set up access to the services, edit:
+\begin{verbatim}
+/etc/hosts.deny
+/etc/hosts.allow
+/etc/inetd.conf
+\end{verbatim}
+
+Add Debian mirror to:   
+\begin{verbatim}
+/etc/apt/sources.list
+\end{verbatim}
+
+Create fstab like this:
+\begin{verbatim}
+/dev/sda1       /       ext3    errors=remount-ro       0       1
+/dev/sda2       none    swap    sw                      0       0
+proc            /proc   proc    defaults                0       0
+\end{verbatim}
+
+Logout
+
+\item Unmount the disk image
+\begin{verbatim}
+umount /mnt/disk
+\end{verbatim}
+
+\item Create Xen 2.0 configuration file for the new domain. You can
+  use the example-configurations coming with Xen as a template.
+
+  Make sure you have the following set up:
+\begin{verbatim}
+disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
+root = "/dev/sda1 ro"
+\end{verbatim}
+
+\item Start the new domain
+\begin{verbatim}
+xm create -f domain_config_file
+\end{verbatim}
+
+Check that the new domain is running:
+\begin{verbatim}
+xm list
+\end{verbatim}
+
+\item Attach to the console of the new domain.  You should see
+  something like this when starting the new domain:
+
+\begin{verbatim}
+Started domain testdomain2, console on port 9626
+\end{verbatim}
+        
+  There you can see the ID of the console: 26. You can also list the
+  consoles with \path{xm consoles} (ID is the last two digits of the
+  port number.)
+
+  Attach to the console:
+
+\begin{verbatim}
+xm console 26
+\end{verbatim}
+
+  or by telnetting to the port 9626 of localhost (the xm console
+  program works better).
+
+\item Log in and run base-config
+
+  As a default there's no password for the root.
+
+  Check that everything looks OK, and the system started without
+  errors.  Check that the swap is active, and the network settings are
+  correct.
+
+  Run \path{/usr/sbin/base-config} to set up the Debian settings.
+
+  Set up the password for root using passwd.
+
+\item Done. You can exit the console by pressing {\path{Ctrl + ]}}
+
+\end{enumerate}
+
+
+If you need to create new domains, you can just copy the contents of
+the `template'-image to the new disk images, either by mounting the
+template and the new image, and using \path{cp -a} or \path{tar} or by
+simply copying the image file.  Once this is done, modify the
+image-specific settings (hostname, network settings, etc).
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/domain_configuration.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/domain_configuration.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,281 @@
+\chapter{Domain Configuration}
+\label{cha:config}
+
+The following contains the syntax of the domain configuration files
+and description of how to further specify networking, driver domain
+and general scheduling behavior.
+
+
+\section{Configuration Files}
+\label{s:cfiles}
+
+Xen configuration files contain the following standard variables.
+Unless otherwise stated, configuration items should be enclosed in
+quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2}
+for concrete examples of the syntax.
+
+\begin{description}
+\item[kernel] Path to the kernel image.
+\item[ramdisk] Path to a ramdisk image (optional).
+  % \item[builder] The name of the domain build function (e.g.
+  %   {\tt'linux'} or {\tt'netbsd'}.
+\item[memory] Memory size in megabytes.
+\item[cpu] CPU to run this domain on, or {\tt -1} for auto-allocation.
+\item[console] Port to export the domain console on (default 9600 +
+  domain ID).
+\item[nics] Number of virtual network interfaces.
+\item[vif] List of MAC addresses (random addresses are assigned if not
+  given) and bridges to use for the domain's network interfaces, e.g.\ 
+\begin{verbatim}
+vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
+        'bridge=xen-br1' ]
+\end{verbatim}
+  to assign a MAC address and bridge to the first interface and assign
+  a different bridge to the second interface, leaving \xend\ to choose
+  the MAC address.
+\item[disk] List of block devices to export to the domain, e.g.\ \\
+  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
+  exports physical device \path{/dev/hda1} to the domain as
+  \path{/dev/sda1} with read-only access. Exporting a disk read-write
+  which is currently mounted is dangerous -- if you are \emph{certain}
+  you wish to do this, you can specify \path{w!} as the mode.
+\item[dhcp] Set to {\tt `dhcp'} if you want to use DHCP to configure
+  networking.
+\item[netmask] Manually configured IP netmask.
+\item[gateway] Manually configured IP gateway.
+\item[hostname] Set the hostname for the virtual machine.
+\item[root] Specify the root device parameter on the kernel command
+  line.
+\item[nfs\_server] IP address for the NFS server (if any).
+\item[nfs\_root] Path of the root filesystem on the NFS server (if
+  any).
+\item[extra] Extra string to append to the kernel command line (if
+  any)
+\item[restart] Three possible options:
+  \begin{description}
+  \item[always] Always restart the domain, no matter what its exit
+    code is.
+  \item[never] Never restart the domain.
+  \item[onreboot] Restart the domain iff it requests reboot.
+  \end{description}
+\end{description}
+
+For additional flexibility, it is also possible to include Python
+scripting commands in configuration files.  An example of this is the
+\path{xmexample2} file, which uses Python code to handle the
+\path{vmid} variable.
+
+
+%\part{Advanced Topics}
+
+
+\section{Network Configuration}
+
+For many users, the default installation should work ``out of the
+box''.  More complicated network setups, for instance with multiple
+Ethernet interfaces and/or existing bridging setups will require some
+special configuration.
+
+The purpose of this section is to describe the mechanisms provided by
+\xend\ to allow a flexible configuration for Xen's virtual networking.
+
+\subsection{Xen virtual network topology}
+
+Each domain network interface is connected to a virtual network
+interface in dom0 by a point to point link (effectively a ``virtual
+crossover cable'').  These devices are named {\tt
+  vif$<$domid$>$.$<$vifid$>$} (e.g.\ {\tt vif1.0} for the first
+interface in domain~1, {\tt vif3.1} for the second interface in
+domain~3).
+
+Traffic on these virtual interfaces is handled in domain~0 using
+standard Linux mechanisms for bridging, routing, rate limiting, etc.
+Xend calls on two shell scripts to perform initial configuration of
+the network and configuration of new virtual interfaces.  By default,
+these scripts configure a single bridge for all the virtual
+interfaces.  Arbitrary routing / bridging configurations can be
+configured by customizing the scripts, as described in the following
+section.
+
+\subsection{Xen networking scripts}
+
+Xen's virtual networking is configured by two shell scripts (by
+default \path{network} and \path{vif-bridge}).  These are called
+automatically by \xend\ when certain events occur, with arguments to
+the scripts providing further contextual information.  These scripts
+are found by default in \path{/etc/xen/scripts}.  The names and
+locations of the scripts can be configured in
+\path{/etc/xen/xend-config.sxp}.
+
+\begin{description}
+\item[network:] This script is called whenever \xend\ is started or
+  stopped to respectively initialize or tear down the Xen virtual
+  network. In the default configuration initialization creates the
+  bridge `xen-br0' and moves eth0 onto that bridge, modifying the
+  routing accordingly. When \xend\ exits, it deletes the Xen bridge
+  and removes eth0, restoring the normal IP and routing configuration.
+
+  %% In configurations where the bridge already exists, this script
+  %% could be replaced with a link to \path{/bin/true} (for instance).
+
+\item[vif-bridge:] This script is called for every domain virtual
+  interface and can configure firewalling rules and add the vif to the
+  appropriate bridge. By default, this adds and removes VIFs on the
+  default Xen bridge.
+\end{description}
+
+For more complex network setups (e.g.\ where routing is required or
+integrate with existing bridges) these scripts may be replaced with
+customized variants for your site's preferred configuration.
+
+%% There are two possible types of privileges: IO privileges and
+%% administration privileges.
+
+
+\section{Driver Domain Configuration}
+
+I/O privileges can be assigned to allow a domain to directly access
+PCI devices itself.  This is used to support driver domains.
+
+Setting back-end privileges is currently only supported in SXP format
+config files.  To allow a domain to function as a back-end for others,
+somewhere within the {\tt vm} element of its configuration file must
+be a {\tt back-end} element of the form {\tt (back-end ({\em type}))}
+where {\tt \em type} may be either {\tt netif} or {\tt blkif},
+according to the type of virtual device this domain will service.
+%% After this domain has been built, \xend will connect all new and
+%% existing {\em virtual} devices (of the appropriate type) to that
+%% back-end.
+
+Note that a block back-end cannot currently import virtual block
+devices from other domains, and a network back-end cannot import
+virtual network devices from other domains.  Thus (particularly in the
+case of block back-ends, which cannot import a virtual block device as
+their root filesystem), you may need to boot a back-end domain from a
+ramdisk or a network device.
+
+Access to PCI devices may be configured on a per-device basis.  Xen
+will assign the minimal set of hardware privileges to a domain that
+are required to control its devices.  This can be configured in either
+format of configuration file:
+
+\begin{itemize}
+\item SXP Format: Include device elements of the form: \\
+  \centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em 
z})))}} \\
+  inside the top-level {\tt vm} element.  Each one specifies the
+  address of a device this domain is allowed to access --- the numbers
+  \emph{x},\emph{y} and \emph{z} may be in either decimal or
+  hexadecimal format.
+\item Flat Format: Include a list of PCI device addresses of the
+  format: \\
+  \centerline{{\tt pci = ['x,y,z', \ldots]}} \\
+  where each element in the list is a string specifying the components
+  of the PCI device address, separated by commas.  The components
+  ({\tt \em x}, {\tt \em y} and {\tt \em z}) of the list may be
+  formatted as either decimal or hexadecimal.
+\end{itemize}
+
+%% \section{Administration Domains}
+
+%% Administration privileges allow a domain to use the `dom0
+%% operations' (so called because they are usually available only to
+%% domain 0).  A privileged domain can build other domains, set
+%% scheduling parameters, etc.
+
+% Support for other administrative domains is not yet available...
+% perhaps we should plumb it in some time
+
+
+\section{Scheduler Configuration}
+\label{s:sched}
+
+Xen offers a boot time choice between multiple schedulers.  To select
+a scheduler, pass the boot parameter \emph{sched=sched\_name} to Xen,
+substituting the appropriate scheduler name.  Details of the
+schedulers and their parameters are included below; future versions of
+the tools will provide a higher-level interface to these tools.
+
+It is expected that system administrators configure their system to
+use the scheduler most appropriate to their needs.  Currently, the BVT
+scheduler is the recommended choice.
+
+\subsection{Borrowed Virtual Time}
+
+{\tt sched=bvt} (the default) \\
+
+BVT provides proportional fair shares of the CPU time.  It has been
+observed to penalize domains that block frequently (e.g.\ I/O
+intensive domains), but this can be compensated for by using warping.
+
+\subsubsection{Global Parameters}
+
+\begin{description}
+\item[ctx\_allow] The context switch allowance is similar to the
+  ``quantum'' in traditional schedulers.  It is the minimum time that
+  a scheduled domain will be allowed to run before being preempted.
+\end{description}
+
+\subsubsection{Per-domain parameters}
+
+\begin{description}
+\item[mcuadv] The MCU (Minimum Charging Unit) advance determines the
+  proportional share of the CPU that a domain receives.  It is set
+  inversely proportionally to a domain's sharing weight.
+\item[warp] The amount of ``virtual time'' the domain is allowed to
+  warp backwards.
+\item[warpl] The warp limit is the maximum time a domain can run
+  warped for.
+\item[warpu] The unwarp requirement is the minimum time a domain must
+  run unwarped for before it can warp again.
+\end{description}
+
+\subsection{Atropos}
+
+{\tt sched=atropos} \\
+
+Atropos is a soft real time scheduler.  It provides guarantees about
+absolute shares of the CPU, with a facility for sharing slack CPU time
+on a best-effort basis. It can provide timeliness guarantees for
+latency-sensitive domains.
+
+Every domain has an associated period and slice.  The domain should
+receive `slice' nanoseconds every `period' nanoseconds.  This allows
+the administrator to configure both the absolute share of the CPU a
+domain receives and the frequency with which it is scheduled.
+
+%% When domains unblock, their period is reduced to the value of the
+%% latency hint (the slice is scaled accordingly so that they still
+%% get the same proportion of the CPU).  For each subsequent period,
+%% the slice and period times are doubled until they reach their
+%% original values.
+
+Note: don't over-commit the CPU when using Atropos (i.e.\ don't reserve
+more CPU than is available --- the utilization should be kept to
+slightly less than 100\% in order to ensure predictable behavior).
+
+\subsubsection{Per-domain parameters}
+
+\begin{description}
+\item[period] The regular time interval during which a domain is
+  guaranteed to receive its allocation of CPU time.
+\item[slice] The length of time per period that a domain is guaranteed
+  to run for (in the absence of voluntary yielding of the CPU).
+\item[latency] The latency hint is used to control how soon after
+  waking up a domain it should be scheduled.
+\item[xtratime] This is a boolean flag that specifies whether a domain
+  should be allowed a share of the system slack time.
+\end{description}
+
+\subsection{Round Robin}
+
+{\tt sched=rrobin} \\
+
+The round robin scheduler is included as a simple demonstration of
+Xen's internal scheduler API.  It is not intended for production use.
+
+\subsubsection{Global Parameters}
+
+\begin{description}
+\item[rr\_slice] The maximum time each domain runs before the next
+  scheduling decision is made.
+\end{description}
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/domain_filesystem.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/domain_filesystem.tex       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,243 @@
+\chapter{Domain Filesystem Storage}
+
+It is possible to directly export any Linux block device in dom0 to
+another domain, or to export filesystems / devices to virtual machines
+using standard network protocols (e.g.\ NBD, iSCSI, NFS, etc.).  This
+chapter covers some of the possibilities.
+
+
+\section{Exporting Physical Devices as VBDs}
+\label{s:exporting-physical-devices-as-vbds}
+
+One of the simplest configurations is to directly export individual
+partitions from domain~0 to other domains. To achieve this use the
+\path{phy:} specifier in your domain configuration file. For example a
+line like
+\begin{quote}
+  \verb_disk = ['phy:hda3,sda1,w']_
+\end{quote}
+specifies that the partition \path{/dev/hda3} in domain~0 should be
+exported read-write to the new domain as \path{/dev/sda1}; one could
+equally well export it as \path{/dev/hda} or \path{/dev/sdb5} should
+one wish.
+
+In addition to local disks and partitions, it is possible to export
+any device that Linux considers to be ``a disk'' in the same manner.
+For example, if you have iSCSI disks or GNBD volumes imported into
+domain~0 you can export these to other domains using the \path{phy:}
+disk syntax. E.g.:
+\begin{quote}
+  \verb_disk = ['phy:vg/lvm1,sda2,w']_
+\end{quote}
+
+\begin{center}
+  \framebox{\bf Warning: Block device sharing}
+\end{center}
+\begin{quote}
+  Block devices should typically only be shared between domains in a
+  read-only fashion otherwise the Linux kernel's file systems will get
+  very confused as the file system structure may change underneath
+  them (having the same ext3 partition mounted \path{rw} twice is a
+  sure fire way to cause irreparable damage)!  \Xend\ will attempt to
+  prevent you from doing this by checking that the device is not
+  mounted read-write in domain~0, and hasn't already been exported
+  read-write to another domain.  If you want read-write sharing,
+  export the directory to other domains via NFS from domain~0 (or use
+  a cluster file system such as GFS or ocfs2).
+\end{quote}
+
+
+\section{Using File-backed VBDs}
+
+It is also possible to use a file in Domain~0 as the primary storage
+for a virtual machine.  As well as being convenient, this also has the
+advantage that the virtual block device will be \emph{sparse} ---
+space will only really be allocated as parts of the file are used.  So
+if a virtual machine uses only half of its disk space then the file
+really takes up half of the size allocated.
+
+For example, to create a 2GB sparse file-backed virtual block device
+(actually only consumes 1KB of disk):
+\begin{quote}
+  \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
+\end{quote}
+
+Make a file system in the disk file:
+\begin{quote}
+  \verb_# mkfs -t ext3 vm1disk_
+\end{quote}
+
+(when the tool asks for confirmation, answer `y')
+
+Populate the file system e.g.\ by copying from the current root:
+\begin{quote}
+\begin{verbatim}
+# mount -o loop vm1disk /mnt
+# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
+# mkdir /mnt/{proc,sys,home,tmp}
+\end{verbatim}
+\end{quote}
+
+Tailor the file system by editing \path{/etc/fstab},
+\path{/etc/hostname}, etc.\ Don't forget to edit the files in the
+mounted file system, instead of your domain~0 filesystem, e.g.\ you
+would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab}.  For
+this example put \path{/dev/sda1} to root in fstab.
+
+Now unmount (this is important!):
+\begin{quote}
+  \verb_# umount /mnt_
+\end{quote}
+
+In the configuration file set:
+\begin{quote}
+  \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
+\end{quote}
+
+As the virtual machine writes to its `disk', the sparse file will be
+filled in and consume more space up to the original 2GB.
+
+{\bf Note that file-backed VBDs may not be appropriate for backing
+  I/O-intensive domains.}  File-backed VBDs are known to experience
+substantial slowdowns under heavy I/O workloads, due to the I/O
+handling by the loopback block device used to support file-backed VBDs
+in dom0.  Better I/O performance can be achieved by using either
+LVM-backed VBDs (Section~\ref{s:using-lvm-backed-vbds}) or physical
+devices as VBDs (Section~\ref{s:exporting-physical-devices-as-vbds}).
+
+Linux supports a maximum of eight file-backed VBDs across all domains
+by default.  This limit can be statically increased by using the
+\emph{max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is
+compiled as a module in the dom0 kernel, or by using the
+\emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP is compiled
+directly into the dom0 kernel.
+
+
+\section{Using LVM-backed VBDs}
+\label{s:using-lvm-backed-vbds}
+
+A particularly appealing solution is to use LVM volumes as backing for
+domain file-systems since this allows dynamic growing/shrinking of
+volumes as well as snapshot and other features.
+
+To initialize a partition to support LVM volumes:
+\begin{quote}
+\begin{verbatim}
+# pvcreate /dev/sda10           
+\end{verbatim} 
+\end{quote}
+
+Create a volume group named `vg' on the physical partition:
+\begin{quote}
+\begin{verbatim}
+# vgcreate vg /dev/sda10
+\end{verbatim} 
+\end{quote}
+
+Create a logical volume of size 4GB named `myvmdisk1':
+\begin{quote}
+\begin{verbatim}
+# lvcreate -L4096M -n myvmdisk1 vg
+\end{verbatim}
+\end{quote}
+
+You should now see that you have a \path{/dev/vg/myvmdisk1} Make a
+filesystem, mount it and populate it, e.g.:
+\begin{quote}
+\begin{verbatim}
+# mkfs -t ext3 /dev/vg/myvmdisk1
+# mount /dev/vg/myvmdisk1 /mnt
+# cp -ax / /mnt
+# umount /mnt
+\end{verbatim}
+\end{quote}
+
+Now configure your VM with the following disk configuration:
+\begin{quote}
+\begin{verbatim}
+ disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
+\end{verbatim}
+\end{quote}
+
+LVM enables you to grow the size of logical volumes, but you'll need
+to resize the corresponding file system to make use of the new space.
+Some file systems (e.g.\ ext3) now support online resize.  See the LVM
+manuals for more details.
+
+You can also use LVM for creating copy-on-write (CoW) clones of LVM
+volumes (known as writable persistent snapshots in LVM terminology).
+This facility is new in Linux 2.6.8, so isn't as stable as one might
+hope.  In particular, using lots of CoW LVM disks consumes a lot of
+dom0 memory, and error conditions such as running out of disk space
+are not handled well. Hopefully this will improve in future.
+
+To create two copy-on-write clone of the above file system you would
+use the following commands:
+
+\begin{quote}
+\begin{verbatim}
+# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
+# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
+\end{verbatim}
+\end{quote}
+
+Each of these can grow to have 1GB of differences from the master
+volume. You can grow the amount of space for storing the differences
+using the lvextend command, e.g.:
+\begin{quote}
+\begin{verbatim}
+# lvextend +100M /dev/vg/myclonedisk1
+\end{verbatim}
+\end{quote}
+
+Don't let the `differences volume' ever fill up otherwise LVM gets
+rather confused. It may be possible to automate the growing process by
+using \path{dmsetup wait} to spot the volume getting full and then
+issue an \path{lvextend}.
+
+In principle, it is possible to continue writing to the volume that
+has been cloned (the changes will not be visible to the clones), but
+we wouldn't recommend this: have the cloned volume as a `pristine'
+file system install that isn't mounted directly by any of the virtual
+machines.
+
+
+\section{Using NFS Root}
+
+First, populate a root filesystem in a directory on the server
+machine. This can be on a distinct physical machine, or simply run
+within a virtual machine on the same node.
+
+Now configure the NFS server to export this filesystem over the
+network by adding a line to \path{/etc/exports}, for instance:
+
+\begin{quote}
+  \begin{small}
+\begin{verbatim}
+/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
+\end{verbatim}
+  \end{small}
+\end{quote}
+
+Finally, configure the domain to use NFS root.  In addition to the
+normal variables, you should make sure to set the following values in
+the domain's configuration file:
+
+\begin{quote}
+  \begin{small}
+\begin{verbatim}
+root       = '/dev/nfs'
+nfs_server = '2.3.4.5'       # substitute IP address of server
+nfs_root   = '/path/to/root' # path to root FS on the server
+\end{verbatim}
+  \end{small}
+\end{quote}
+
+The domain will need network access at boot time, so either statically
+configure an IP address using the config variables \path{ip},
+\path{netmask}, \path{gateway}, \path{hostname}; or enable DHCP
+(\path{dhcp='dhcp'}).
+
+Note that the Linux NFS root implementation is known to have stability
+problems under high load (this is not a Xen-specific problem), so this
+configuration may not be appropriate for critical servers.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/domain_mgmt.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/domain_mgmt.tex     Thu Sep 22 17:42:01 2005
@@ -0,0 +1,203 @@
+\chapter{Domain Management Tools}
+
+The previous chapter described a simple example of how to configure
+and start a domain.  This chapter summarises the tools available to
+manage running domains.
+
+
+\section{Command-line Management}
+
+Command line management tasks are also performed using the \path{xm}
+tool.  For online help for the commands available, type:
+\begin{quote}
+  \verb_# xm help_
+\end{quote}
+
+You can also type \path{xm help $<$command$>$} for more information on
+a given command.
+
+\subsection{Basic Management Commands}
+
+The most important \path{xm} commands are:
+\begin{quote}
+  \verb_# xm list_: Lists all domains running.\\
+  \verb_# xm consoles_: Gives information about the domain consoles.\\
+  \verb_# xm console_: Opens a console to a domain (e.g.\
+  \verb_# xm console myVM_)
+\end{quote}
+
+\subsection{\tt xm list}
+
+The output of \path{xm list} is in rows of the following format:
+\begin{center} {\tt name domid memory cpu state cputime console}
+\end{center}
+
+\begin{quote}
+  \begin{description}
+  \item[name] The descriptive name of the virtual machine.
+  \item[domid] The number of the domain ID this virtual machine is
+    running in.
+  \item[memory] Memory size in megabytes.
+  \item[cpu] The CPU this domain is running on.
+  \item[state] Domain state consists of 5 fields:
+    \begin{description}
+    \item[r] running
+    \item[b] blocked
+    \item[p] paused
+    \item[s] shutdown
+    \item[c] crashed
+    \end{description}
+  \item[cputime] How much CPU time (in seconds) the domain has used so
+    far.
+  \item[console] TCP port accepting connections to the domain's
+    console.
+  \end{description}
+\end{quote}
+
+The \path{xm list} command also supports a long output format when the
+\path{-l} switch is used.  This outputs the fulls details of the
+running domains in \xend's SXP configuration format.
+
+For example, suppose the system is running the ttylinux domain as
+described earlier.  The list command should produce output somewhat
+like the following:
+\begin{verbatim}
+# xm list
+Name              Id  Mem(MB)  CPU  State  Time(s)  Console
+Domain-0           0      251    0  r----    172.2        
+ttylinux           5       63    0  -b---      3.0    9605
+\end{verbatim}
+
+Here we can see the details for the ttylinux domain, as well as for
+domain~0 (which, of course, is always running).  Note that the console
+port for the ttylinux domain is 9605.  This can be connected to by TCP
+using a terminal program (e.g. \path{telnet} or, better,
+\path{xencons}).  The simplest way to connect is to use the
+\path{xm~console} command, specifying the domain name or ID.  To
+connect to the console of the ttylinux domain, we could use any of the
+following:
+\begin{verbatim}
+# xm console ttylinux
+# xm console 5
+# xencons localhost 9605
+\end{verbatim}
+
+\section{Domain Save and Restore}
+
+The administrator of a Xen system may suspend a virtual machine's
+current state into a disk file in domain~0, allowing it to be resumed
+at a later time.
+
+The ttylinux domain described earlier can be suspended to disk using
+the command:
+\begin{verbatim}
+# xm save ttylinux ttylinux.xen
+\end{verbatim}
+
+This will stop the domain named `ttylinux' and save its current state
+into a file called \path{ttylinux.xen}.
+
+To resume execution of this domain, use the \path{xm restore} command:
+\begin{verbatim}
+# xm restore ttylinux.xen
+\end{verbatim}
+
+This will restore the state of the domain and restart it.  The domain
+will carry on as before and the console may be reconnected using the
+\path{xm console} command, as above.
+
+\section{Live Migration}
+
+Live migration is used to transfer a domain between physical hosts
+whilst that domain continues to perform its usual activities --- from
+the user's perspective, the migration should be imperceptible.
+
+To perform a live migration, both hosts must be running Xen / \xend\
+and the destination host must have sufficient resources (e.g.\ memory
+capacity) to accommodate the domain after the move. Furthermore we
+currently require both source and destination machines to be on the
+same L2 subnet.
+
+Currently, there is no support for providing automatic remote access
+to filesystems stored on local disk when a domain is migrated.
+Administrators should choose an appropriate storage solution (i.e.\
+SAN, NAS, etc.) to ensure that domain filesystems are also available
+on their destination node. GNBD is a good method for exporting a
+volume from one machine to another. iSCSI can do a similar job, but is
+more complex to set up.
+
+When a domain migrates, it's MAC and IP address move with it, thus it
+is only possible to migrate VMs within the same layer-2 network and IP
+subnet. If the destination node is on a different subnet, the
+administrator would need to manually configure a suitable etherip or
+IP tunnel in the domain~0 of the remote node.
+
+A domain may be migrated using the \path{xm migrate} command.  To live
+migrate a domain to another machine, we would use the command:
+
+\begin{verbatim}
+# xm migrate --live mydomain destination.ournetwork.com
+\end{verbatim}
+
+Without the \path{--live} flag, \xend\ simply stops the domain and
+copies the memory image over to the new node and restarts it. Since
+domains can have large allocations this can be quite time consuming,
+even on a Gigabit network. With the \path{--live} flag \xend\ attempts
+to keep the domain running while the migration is in progress,
+resulting in typical `downtimes' of just 60--300ms.
+
+For now it will be necessary to reconnect to the domain's console on
+the new machine using the \path{xm console} command.  If a migrated
+domain has any open network connections then they will be preserved,
+so SSH connections do not have this limitation.
+
+
+\section{Managing Domain Memory}
+
+XenLinux domains have the ability to relinquish / reclaim machine
+memory at the request of the administrator or the user of the domain.
+
+\subsection{Setting memory footprints from dom0}
+
+The machine administrator can request that a domain alter its memory
+footprint using the \path{xm set-mem} command.  For instance, we can
+request that our example ttylinux domain reduce its memory footprint
+to 32 megabytes.
+
+\begin{verbatim}
+# xm set-mem ttylinux 32
+\end{verbatim}
+
+We can now see the result of this in the output of \path{xm list}:
+
+\begin{verbatim}
+# xm list
+Name              Id  Mem(MB)  CPU  State  Time(s)  Console
+Domain-0           0      251    0  r----    172.2        
+ttylinux           5       31    0  -b---      4.3    9605
+\end{verbatim}
+
+The domain has responded to the request by returning memory to Xen. We
+can restore the domain to its original size using the command line:
+
+\begin{verbatim}
+# xm set-mem ttylinux 64
+\end{verbatim}
+
+\subsection{Setting memory footprints from within a domain}
+
+The virtual file \path{/proc/xen/balloon} allows the owner of a domain
+to adjust their own memory footprint.  Reading the file (e.g.\
+\path{cat /proc/xen/balloon}) prints out the current memory footprint
+of the domain.  Writing the file (e.g.\ \path{echo new\_target >
+  /proc/xen/balloon}) requests that the kernel adjust the domain's
+memory footprint to a new value.
+
+\subsection{Setting memory limits}
+
+Xen associates a memory size limit with each domain.  By default, this
+is the amount of memory the domain is originally started with,
+preventing the domain from ever growing beyond this size.  To permit a
+domain to grow beyond its original allocation or to prevent a domain
+you've shrunk from reclaiming the memory it relinquished, use the
+\path{xm maxmem} command.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/glossary.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/glossary.tex        Thu Sep 22 17:42:01 2005
@@ -0,0 +1,79 @@
+\chapter{Glossary of Terms}
+
+\begin{description}
+
+\item[Atropos] One of the CPU schedulers provided by Xen.  Atropos
+  provides domains with absolute shares of the CPU, with timeliness
+  guarantees and a mechanism for sharing out `slack time'.
+
+\item[BVT] The BVT scheduler is used to give proportional fair shares
+  of the CPU to domains.
+
+\item[Exokernel] A minimal piece of privileged code, similar to a {\bf
+    microkernel} but providing a more `hardware-like' interface to the
+  tasks it manages.  This is similar to a paravirtualising VMM like
+  {\bf Xen} but was designed as a new operating system structure,
+  rather than specifically to run multiple conventional OSs.
+
+\item[Domain] A domain is the execution context that contains a
+  running {\bf virtual machine}.  The relationship between virtual
+  machines and domains on Xen is similar to that between programs and
+  processes in an operating system: a virtual machine is a persistent
+  entity that resides on disk (somewhat like a program).  When it is
+  loaded for execution, it runs in a domain.  Each domain has a {\bf
+    domain ID}.
+
+\item[Domain 0] The first domain to be started on a Xen machine.
+  Domain 0 is responsible for managing the system.
+
+\item[Domain ID] A unique identifier for a {\bf domain}, analogous to
+  a process ID in an operating system.
+
+\item[Full virtualisation] An approach to virtualisation which
+  requires no modifications to the hosted operating system, providing
+  the illusion of a complete system of real hardware devices.
+
+\item[Hypervisor] An alternative term for {\bf VMM}, used because it
+  means `beyond supervisor', since it is responsible for managing
+  multiple `supervisor' kernels.
+
+\item[Live migration] A technique for moving a running virtual machine
+  to another physical host, without stopping it or the services
+  running on it.
+
+\item[Microkernel] A small base of code running at the highest
+  hardware privilege level.  A microkernel is responsible for sharing
+  CPU and memory (and sometimes other devices) between less privileged
+  tasks running on the system.  This is similar to a VMM, particularly
+  a {\bf paravirtualising} VMM but typically addressing a different
+  problem space and providing different kind of interface.
+
+\item[NetBSD/Xen] A port of NetBSD to the Xen architecture.
+
+\item[Paravirtualisation] An approach to virtualisation which requires
+  modifications to the operating system in order to run in a virtual
+  machine.  Xen uses paravirtualisation but preserves binary
+  compatibility for user space applications.
+
+\item[Shadow pagetables] A technique for hiding the layout of machine
+  memory from a virtual machine's operating system.  Used in some {\bf
+    VMMs} to provide the illusion of contiguous physical memory, in
+  Xen this is used during {\bf live migration}.
+
+\item[Virtual Machine] The environment in which a hosted operating
+  system runs, providing the abstraction of a dedicated machine.  A
+  virtual machine may be identical to the underlying hardware (as in
+  {\bf full virtualisation}, or it may differ, as in {\bf
+    paravirtualisation}).
+
+\item[VMM] Virtual Machine Monitor - the software that allows multiple
+  virtual machines to be multiplexed on a single physical machine.
+
+\item[Xen] Xen is a paravirtualising virtual machine monitor,
+  developed primarily by the Systems Research Group at the University
+  of Cambridge Computer Laboratory.
+
+\item[XenLinux] Official name for the port of the Linux kernel that
+  runs on Xen.
+
+\end{description}
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/installation.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/installation.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,394 @@
+\chapter{Installation}
+
+The Xen distribution includes three main components: Xen itself, ports
+of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the userspace
+tools required to manage a Xen-based system.  This chapter describes
+how to install the Xen~2.0 distribution from source.  Alternatively,
+there may be pre-built packages available as part of your operating
+system distribution.
+
+
+\section{Prerequisites}
+\label{sec:prerequisites}
+
+The following is a full list of prerequisites.  Items marked `$\dag$'
+are required by the \xend\ control tools, and hence required if you
+want to run more than one virtual machine; items marked `$*$' are only
+required if you wish to build from source.
+\begin{itemize}
+\item A working Linux distribution using the GRUB bootloader and
+  running on a P6-class (or newer) CPU.
+\item [$\dag$] The \path{iproute2} package.
+\item [$\dag$] The Linux bridge-utils\footnote{Available from {\tt
+      http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
+\item [$\dag$] An installation of Twisted~v1.3 or
+  above\footnote{Available from {\tt http://www.twistedmatrix.com}}.
+  There may be a binary package available for your distribution;
+  alternatively it can be installed by running `{\sl make
+    install-twisted}' in the root of the Xen source tree.
+\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
+\item [$*$] Development installation of libcurl (e.g., libcurl-devel)
+\item [$*$] Development installation of zlib (e.g., zlib-dev).
+\item [$*$] Development installation of Python v2.2 or later (e.g.,
+  python-dev).
+\item [$*$] \LaTeX\ and transfig are required to build the
+  documentation.
+\end{itemize}
+
+Once you have satisfied the relevant prerequisites, you can now
+install either a binary or source distribution of Xen.
+
+
+\section{Installing from Binary Tarball}
+
+Pre-built tarballs are available for download from the Xen download
+page
+\begin{quote} {\tt http://xen.sf.net}
+\end{quote}
+
+Once you've downloaded the tarball, simply unpack and install:
+\begin{verbatim}
+# tar zxvf xen-2.0-install.tgz
+# cd xen-2.0-install
+# sh ./install.sh
+\end{verbatim}
+
+Once you've installed the binaries you need to configure your system
+as described in Section~\ref{s:configure}.
+
+
+\section{Installing from Source}
+
+This section describes how to obtain, build, and install Xen from
+source.
+
+\subsection{Obtaining the Source}
+
+The Xen source tree is available as either a compressed source tar
+ball or as a clone of our master BitKeeper repository.
+
+\begin{description}
+\item[Obtaining the Source Tarball]\mbox{} \\
+  Stable versions (and daily snapshots) of the Xen source tree are
+  available as compressed tarballs from the Xen download page
+  \begin{quote} {\tt http://xen.sf.net}
+  \end{quote}
+
+\item[Using BitKeeper]\mbox{} \\
+  If you wish to install Xen from a clone of our latest BitKeeper
+  repository then you will need to install the BitKeeper tools.
+  Download instructions for BitKeeper can be obtained by filling out
+  the form at:
+  \begin{quote} {\tt http://www.bitmover.com/cgi-bin/download.cgi}
+\end{quote}
+The public master BK repository for the 2.0 release lives at:
+\begin{quote} {\tt bk://xen.bkbits.net/xen-2.0.bk}
+\end{quote} 
+You can use BitKeeper to download it and keep it updated with the
+latest features and fixes.
+
+Change to the directory in which you want to put the source code, then
+run:
+\begin{verbatim}
+# bk clone bk://xen.bkbits.net/xen-2.0.bk
+\end{verbatim}
+
+Under your current directory, a new directory named \path{xen-2.0.bk}
+has been created, which contains all the source code for Xen, the OS
+ports, and the control tools. You can update your repository with the
+latest changes at any time by running:
+\begin{verbatim}
+# cd xen-2.0.bk # to change into the local repository
+# bk pull       # to update the repository
+\end{verbatim}
+\end{description}
+
+% \section{The distribution}
+%
+% The Xen source code repository is structured as follows:
+%
+% \begin{description}
+% \item[\path{tools/}] Xen node controller daemon (Xend), command line
+%   tools, control libraries
+% \item[\path{xen/}] The Xen VMM.
+% \item[\path{linux-*-xen-sparse/}] Xen support for Linux.
+% \item[\path{linux-*-patches/}] Experimental patches for Linux.
+% \item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
+% \item[\path{docs/}] Various documentation files for users and
+%   developers.
+% \item[\path{extras/}] Bonus extras.
+% \end{description}
+
+\subsection{Building from Source}
+
+The top-level Xen Makefile includes a target `world' that will do the
+following:
+
+\begin{itemize}
+\item Build Xen.
+\item Build the control tools, including \xend.
+\item Download (if necessary) and unpack the Linux 2.6 source code,
+  and patch it for use with Xen.
+\item Build a Linux kernel to use in domain 0 and a smaller
+  unprivileged kernel, which can optionally be used for unprivileged
+  virtual machines.
+\end{itemize}
+
+After the build has completed you should have a top-level directory
+called \path{dist/} in which all resulting targets will be placed; of
+particular interest are the two kernels XenLinux kernel images, one
+with a `-xen0' extension which contains hardware device drivers and
+drivers for Xen's virtual devices, and one with a `-xenU' extension
+that just contains the virtual ones. These are found in
+\path{dist/install/boot/} along with the image for Xen itself and the
+configuration files used during the build.
+
+The NetBSD port can be built using:
+\begin{quote}
+\begin{verbatim}
+# make netbsd20
+\end{verbatim}
+\end{quote}
+NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
+The snapshot is downloaded as part of the build process, if it is not
+yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
+process also downloads a toolchain which includes all the tools
+necessary to build the NetBSD kernel under Linux.
+
+To customize further the set of kernels built you need to edit the
+top-level Makefile. Look for the line:
+
+\begin{quote}
+\begin{verbatim}
+KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
+\end{verbatim}
+\end{quote}
+
+You can edit this line to include any set of operating system kernels
+which have configurations in the top-level \path{buildconfigs/}
+directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
+kernel containing only virtual device drivers.
+
+%% Inspect the Makefile if you want to see what goes on during a
+%% build.  Building Xen and the tools is straightforward, but XenLinux
+%% is more complicated.  The makefile needs a `pristine' Linux kernel
+%% tree to which it will then add the Xen architecture files.  You can
+%% tell the makefile the location of the appropriate Linux compressed
+%% tar file by
+%% setting the LINUX\_SRC environment variable, e.g. \\
+%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
+%% placing the tar file somewhere in the search path of {\tt
+%%   LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the
+%% makefile can't find a suitable kernel tar file it attempts to
+%% download it from kernel.org (this won't work if you're behind a
+%% firewall).
+
+%% After untaring the pristine kernel tree, the makefile uses the {\tt
+%%   mkbuildtree} script to add the Xen patches to the kernel.
+
+
+%% The procedure is similar to build the Linux 2.4 port: \\
+%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
+
+
+%% \framebox{\parbox{5in}{
+%%     {\bf Distro specific:} \\
+%%     {\it Gentoo} --- if not using udev (most installations,
+%%     currently), you'll need to enable devfs and devfs mount at boot
+%%     time in the xen0 config.  }}
+
+\subsection{Custom XenLinux Builds}
+
+% If you have an SMP machine you may wish to give the {\tt '-j4'}
+% argument to make to get a parallel build.
+
+If you wish to build a customized XenLinux kernel (e.g. to support
+additional devices or enable distribution-required features), you can
+use the standard Linux configuration mechanisms, specifying that the
+architecture being built for is \path{xen}, e.g:
+\begin{quote}
+\begin{verbatim}
+# cd linux-2.6.11-xen0
+# make ARCH=xen xconfig
+# cd ..
+# make
+\end{verbatim}
+\end{quote}
+
+You can also copy an existing Linux configuration (\path{.config})
+into \path{linux-2.6.11-xen0} and execute:
+\begin{quote}
+\begin{verbatim}
+# make ARCH=xen oldconfig
+\end{verbatim}
+\end{quote}
+
+You may be prompted with some Xen-specific options; we advise
+accepting the defaults for these options.
+
+Note that the only difference between the two types of Linux kernel
+that are built is the configuration file used for each.  The `U'
+suffixed (unprivileged) versions don't contain any of the physical
+hardware device drivers, leading to a 30\% reduction in size; hence
+you may prefer these for your non-privileged domains.  The `0'
+suffixed privileged versions can be used to boot the system, as well
+as in driver domains and unprivileged domains.
+
+\subsection{Installing the Binaries}
+
+The files produced by the build process are stored under the
+\path{dist/install/} directory. To install them in their default
+locations, do:
+\begin{quote}
+\begin{verbatim}
+# make install
+\end{verbatim}
+\end{quote}
+
+Alternatively, users with special installation requirements may wish
+to install them manually by copying the files to their appropriate
+destinations.
+
+%% Files in \path{install/boot/} include:
+%% \begin{itemize}
+%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
+%% \item \path{install/boot/vmlinuz-2.6-xen0} Link to domain 0
+%%   XenLinux kernel
+%% \item \path{install/boot/vmlinuz-2.6-xenU} Link to unprivileged
+%%   XenLinux kernel
+%% \end{itemize}
+
+The \path{dist/install/boot} directory will also contain the config
+files used for building the XenLinux kernels, and also versions of Xen
+and XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6}
+and \path{vmlinux-syms-2.6.11.11-xen0}) which are essential for
+interpreting crash dumps.  Retain these files as the developers may
+wish to see them if you post on the mailing list.
+
+
+\section{Configuration}
+\label{s:configure}
+
+Once you have built and installed the Xen distribution, it is simple
+to prepare the machine for booting and running Xen.
+
+\subsection{GRUB Configuration}
+
+An entry should be added to \path{grub.conf} (often found under
+\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
+This file is sometimes called \path{menu.lst}, depending on your
+distribution.  The entry should look something like the following:
+
+{\small
+\begin{verbatim}
+title Xen 2.0 / XenLinux 2.6
+  kernel /boot/xen-2.0.gz dom0_mem=131072
+  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
+\end{verbatim}
+}
+
+The kernel line tells GRUB where to find Xen itself and what boot
+parameters should be passed to it (in this case, setting domain 0's
+memory allocation in kilobytes and the settings for the serial port).
+For more details on the various Xen boot parameters see
+Section~\ref{s:xboot}.
+
+The module line of the configuration describes the location of the
+XenLinux kernel that Xen should start and the parameters that should
+be passed to it (these are standard Linux parameters, identifying the
+root device and specifying it be initially mounted read only and
+instructing that console output be sent to the screen).  Some
+distributions such as SuSE do not require the \path{ro} parameter.
+
+%% \framebox{\parbox{5in}{
+%%     {\bf Distro specific:} \\
+%%     {\it SuSE} --- Omit the {\tt ro} option from the XenLinux
+%%     kernel command line, since the partition won't be remounted rw
+%%     during boot.  }}
+
+
+If you want to use an initrd, just add another \path{module} line to
+the configuration, as usual:
+
+{\small
+\begin{verbatim}
+  module /boot/my_initrd.gz
+\end{verbatim}
+}
+
+As always when installing a new kernel, it is recommended that you do
+not delete existing menu options from \path{menu.lst} --- you may want
+to boot your old Linux kernel in future, particularly if you have
+problems.
+
+\subsection{Serial Console (optional)}
+
+%% kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
+%% module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro
+
+
+In order to configure Xen serial console output, it is necessary to
+add an boot option to your GRUB config; e.g.\ replace the above kernel
+line with:
+\begin{quote}
+{\small
+\begin{verbatim}
+   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
+\end{verbatim}}
+\end{quote}
+
+This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 1
+stop bit and no parity. Modify these parameters for your set up.
+
+One can also configure XenLinux to share the serial console; to
+achieve this append ``\path{console=ttyS0}'' to your module line.
+
+If you wish to be able to log in over the XenLinux serial console it
+is necessary to add a line into \path{/etc/inittab}, just as per
+regular Linux. Simply add the line:
+\begin{quote} {\small {\tt c:2345:respawn:/sbin/mingetty ttyS0}}
+\end{quote}
+
+and you should be able to log in. Note that to successfully log in as
+root over the serial line will require adding \path{ttyS0} to
+\path{/etc/securetty} in most modern distributions.
+
+\subsection{TLS Libraries}
+
+Users of the XenLinux 2.6 kernel should disable Thread Local Storage
+(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
+attempting to run with a XenLinux kernel\footnote{If you boot without
+  first disabling TLS, you will get a warning message during the boot
+  process. In this case, simply perform the rename after the machine
+  is up and then run \texttt{/sbin/ldconfig} to make it take effect.}.
+You can always reenable it by restoring the directory to its original
+location (i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
+
+The reason for this is that the current TLS implementation uses
+segmentation in a way that is not permissible under Xen.  If TLS is
+not disabled, an emulation mode is used within Xen which reduces
+performance substantially.
+
+We hope that this issue can be resolved by working with Linux
+distribution vendors to implement a minor backward-compatible change
+to the TLS library.
+
+
+\section{Booting Xen}
+
+It should now be possible to restart the system and use Xen.  Reboot
+as usual but choose the new Xen option when the Grub screen appears.
+
+What follows should look much like a conventional Linux boot.  The
+first portion of the output comes from Xen itself, supplying low level
+information about itself and the machine it is running on.  The
+following portion of the output comes from XenLinux.
+
+You may see some errors during the XenLinux boot.  These are not
+necessarily anything to worry about --- they may result from kernel
+configuration differences between your XenLinux kernel and the one you
+usually use.
+
+When the boot completes, you should be able to log into your system as
+usual.  If you are unable to log in to your system running Xen, you
+should still be able to reboot with your normal Linux kernel.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/introduction.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/introduction.tex    Thu Sep 22 17:42:01 2005
@@ -0,0 +1,143 @@
+\chapter{Introduction}
+
+
+Xen is a \emph{paravirtualising} virtual machine monitor (VMM), or
+`hypervisor', for the x86 processor architecture.  Xen can securely
+execute multiple virtual machines on a single physical system with
+close-to-native performance.  The virtual machine technology
+facilitates enterprise-grade functionality, including:
+
+\begin{itemize}
+\item Virtual machines with performance close to native hardware.
+\item Live migration of running virtual machines between physical
+  hosts.
+\item Excellent hardware support (supports most Linux device drivers).
+\item Sandboxed, re-startable device drivers.
+\end{itemize}
+
+Paravirtualisation permits very high performance virtualisation, even
+on architectures like x86 that are traditionally very hard to
+virtualise.
+
+The drawback of this approach is that it requires operating systems to
+be \emph{ported} to run on Xen.  Porting an OS to run on Xen is
+similar to supporting a new hardware platform, however the process is
+simplified because the paravirtual machine architecture is very
+similar to the underlying native hardware. Even though operating
+system kernels must explicitly support Xen, a key feature is that user
+space applications and libraries \emph{do not} require modification.
+
+Xen support is available for increasingly many operating systems:
+right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
+A FreeBSD port is undergoing testing and will be incorporated into the
+release soon. Other OS ports, including Plan 9, are in progress.  We
+hope that that arch-xen patches will be incorporated into the
+mainstream releases of these operating systems in due course (as has
+already happened for NetBSD).
+
+Possible usage scenarios for Xen include:
+
+\begin{description}
+\item [Kernel development.] Test and debug kernel modifications in a
+  sandboxed virtual machine --- no need for a separate test machine.
+\item [Multiple OS configurations.] Run multiple operating systems
+  simultaneously, for instance for compatibility or QA purposes.
+\item [Server consolidation.] Move multiple servers onto a single
+  physical host with performance and fault isolation provided at
+  virtual machine boundaries.
+\item [Cluster computing.] Management at VM granularity provides more
+  flexibility than separately managing each physical host, but better
+  control and isolation than single-system image solutions,
+  particularly by using live migration for load balancing.
+\item [Hardware support for custom OSes.] Allow development of new
+  OSes while benefiting from the wide-ranging hardware support of
+  existing OSes such as Linux.
+\end{description}
+
+
+\section{Structure of a Xen-Based System}
+
+A Xen system has multiple layers, the lowest and most privileged of
+which is Xen itself. 
+
+Xen in turn may host multiple \emph{guest} operating systems, each of
+which is executed within a secure virtual machine (in Xen terminology,
+a \emph{domain}). Domains are scheduled by Xen to make effective use
+of the available physical CPUs.  Each guest OS manages its own
+applications, which includes responsibility for scheduling each
+application within the time allotted to the VM by Xen.
+
+The first domain, \emph{domain 0}, is created automatically when the
+system boots and has special management privileges. Domain 0 builds
+other domains and manages their virtual devices. It also performs
+administrative tasks such as suspending, resuming and migrating other
+virtual machines.
+
+Within domain 0, a process called \emph{xend} runs to manage the
+system.  \Xend is responsible for managing virtual machines and
+providing access to their consoles.  Commands are issued to \xend over
+an HTTP interface, either from a command-line tool or from a web
+browser.
+
+
+\section{Hardware Support}
+
+Xen currently runs only on the x86 architecture, requiring a `P6' or
+newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
+Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
+supported, and we also have basic support for HyperThreading (SMT),
+although this remains a topic for ongoing research. A port
+specifically for x86/64 is in progress, although Xen already runs on
+such systems in 32-bit legacy mode. In addition a port to the IA64
+architecture is approaching completion. We hope to add other
+architectures such as PPC and ARM in due course.
+
+Xen can currently use up to 4GB of memory.  It is possible for x86
+machines to address up to 64GB of physical memory but there are no
+current plans to support these systems: The x86/64 port is the planned
+route to supporting larger memory sizes.
+
+Xen offloads most of the hardware support issues to the guest OS
+running in Domain~0.  Xen itself contains only the code required to
+detect and start secondary processors, set up interrupt routing, and
+perform PCI bus enumeration.  Device drivers run within a privileged
+guest OS rather than within Xen itself. This approach provides
+compatibility with the majority of device hardware supported by Linux.
+The default XenLinux build contains support for relatively modern
+server-class network and disk hardware, but you can add support for
+other hardware by configuring your XenLinux kernel in the normal way.
+
+
+\section{History}
+
+Xen was originally developed by the Systems Research Group at the
+University of Cambridge Computer Laboratory as part of the XenoServers
+project, funded by the UK-EPSRC.
+
+XenoServers aim to provide a `public infrastructure for global
+distributed computing', and Xen plays a key part in that, allowing us
+to efficiently partition a single machine to enable multiple
+independent clients to run their operating systems and applications in
+an environment providing protection, resource isolation and
+accounting.  The project web page contains further information along
+with pointers to papers and technical reports:
+\path{http://www.cl.cam.ac.uk/xeno}
+
+Xen has since grown into a fully-fledged project in its own right,
+enabling us to investigate interesting research issues regarding the
+best techniques for virtualising resources such as the CPU, memory,
+disk and network.  The project has been bolstered by support from
+Intel Research Cambridge, and HP Labs, who are now working closely
+with us.
+
+Xen was first described in a paper presented at SOSP in
+2003\footnote{\tt
+  http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the
+first public release (1.0) was made that October.  Since then, Xen has
+significantly matured and is now used in production scenarios on many
+sites.
+
+Xen 2.0 features greatly enhanced hardware support, configuration
+flexibility, usability and a larger complement of supported operating
+systems. This latest release takes Xen a step closer to becoming the
+definitive open source solution for virtualisation.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/redhat.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/redhat.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,61 @@
+\chapter{Installing Xen / XenLinux on Red~Hat or Fedora Core}
+
+When using Xen / XenLinux on a standard Linux distribution there are a
+couple of things to watch out for:
+
+Note that, because domains greater than 0 don't have any privileged
+access at all, certain commands in the default boot sequence will fail
+e.g.\ attempts to update the hwclock, change the console font, update
+the keytable map, start apmd (power management), or gpm (mouse
+cursor).  Either ignore the errors (they should be harmless), or
+remove them from the startup scripts.  Deleting the following links
+are a good start: {\path{S24pcmcia}}, {\path{S09isdn}},
+{\path{S17keytable}}, {\path{S26apmd}}, {\path{S85gpm}}.
+
+If you want to use a single root file system that works cleanly for
+both domain~0 and unprivileged domains, a useful trick is to use
+different `init' run levels. For example, use run level 3 for
+domain~0, and run level 4 for other domains. This enables different
+startup scripts to be run in depending on the run level number passed
+on the kernel command line.
+
+If using NFS root files systems mounted either from an external server
+or from domain0 there are a couple of other gotchas.  The default
+{\path{/etc/sysconfig/iptables}} rules block NFS, so part way through
+the boot sequence things will suddenly go dead.
+
+If you're planning on having a separate NFS {\path{/usr}} partition,
+the RH9 boot scripts don't make life easy - they attempt to mount NFS
+file systems way to late in the boot process. The easiest way I found
+to do this was to have a {\path{/linuxrc}} script run ahead of
+{\path{/sbin/init}} that mounts {\path{/usr}}:
+
+\begin{quote}
+  \begin{small}\begin{verbatim}
+ #!/bin/bash
+ /sbin/ipconfig lo 127.0.0.1
+ /sbin/portmap
+ /bin/mount /usr
+ exec /sbin/init "$@" <>/dev/console 2>&1
+\end{verbatim}\end{small}
+\end{quote}
+
+%% $ XXX SMH: font lock fix :-)
+
+The one slight complication with the above is that
+{\path{/sbin/portmap}} is dynamically linked against
+{\path{/usr/lib/libwrap.so.0}} Since this is in {\path{/usr}}, it
+won't work. This can be solved by copying the file (and link) below
+the {\path{/usr}} mount point, and just let the file be `covered' when
+the mount happens.
+
+In some installations, where a shared read-only {\path{/usr}} is being
+used, it may be desirable to move other large directories over into
+the read-only {\path{/usr}}. For example, you might replace
+{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with links into
+{\path{/usr/root/bin}}, {\path{/usr/root/lib}} and
+{\path{/usr/root/sbin}} respectively. This creates other problems for
+running the {\path{/linuxrc}} script, requiring bash, portmap, mount,
+ifconfig, and a handful of other shared libraries to be copied below
+the mount point --- a simple statically-linked C program would solve
+this problem.
diff -r 97dbd9524a7e -r 06d84bf87159 docs/src/user/start_addl_dom.tex
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/docs/src/user/start_addl_dom.tex  Thu Sep 22 17:42:01 2005
@@ -0,0 +1,172 @@
+\chapter{Starting Additional Domains}
+
+The first step in creating a new domain is to prepare a root
+filesystem for it to boot from.  Typically, this might be stored in a
+normal partition, an LVM or other volume manager partition, a disk
+file or on an NFS server.  A simple way to do this is simply to boot
+from your standard OS install CD and install the distribution into
+another partition on your hard drive.
+
+To start the \xend\ control daemon, type
+\begin{quote}
+  \verb!# xend start!
+\end{quote}
+
+If you wish the daemon to start automatically, see the instructions in
+Section~\ref{s:xend}. Once the daemon is running, you can use the
+\path{xm} tool to monitor and maintain the domains running on your
+system. This chapter provides only a brief tutorial. We provide full
+details of the \path{xm} tool in the next chapter.
+
+% \section{From the web interface}
+%
+% Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv}
+% for more details) using the command: \\
+% \verb_# xensv start_ \\
+% This will also start Xend (see Chapter~\ref{cha:xend} for more
+% information).
+%
+% The domain management interface will then be available at {\tt
+%   http://your\_machine:8080/}.  This provides a user friendly wizard
+% for starting domains and functions for managing running domains.
+%
+% \section{From the command line}
+
+
+\section{Creating a Domain Configuration File}
+
+Before you can start an additional domain, you must create a
+configuration file. We provide two example files which you can use as
+a starting point:
+\begin{itemize}
+\item \path{/etc/xen/xmexample1} is a simple template configuration
+  file for describing a single VM.
+
+\item \path{/etc/xen/xmexample2} file is a template description that
+  is intended to be reused for multiple virtual machines.  Setting the
+  value of the \path{vmid} variable on the \path{xm} command line
+  fills in parts of this template.
+\end{itemize}
+
+Copy one of these files and edit it as appropriate.  Typical values
+you may wish to edit include:
+
+\begin{quote}
+\begin{description}
+\item[kernel] Set this to the path of the kernel you compiled for use
+  with Xen (e.g.\ \path{kernel = `/boot/vmlinuz-2.6-xenU'})
+\item[memory] Set this to the size of the domain's memory in megabytes
+  (e.g.\ \path{memory = 64})
+\item[disk] Set the first entry in this list to calculate the offset
+  of the domain's root partition, based on the domain ID.  Set the
+  second to the location of \path{/usr} if you are sharing it between
+  domains (e.g.\ \path{disk = [`phy:your\_hard\_drive\%d,sda1,w' \%
+    (base\_partition\_number + vmid),
+    `phy:your\_usr\_partition,sda6,r' ]}
+\item[dhcp] Uncomment the dhcp variable, so that the domain will
+  receive its IP address from a DHCP server (e.g.\ \path{dhcp=`dhcp'})
+\end{description}
+\end{quote}
+
+You may also want to edit the {\bf vif} variable in order to choose
+the MAC address of the virtual ethernet interface yourself.  For
+example:
+\begin{quote}
+\verb_vif = [`mac=00:06:AA:F6:BB:B3']_
+\end{quote}
+If you do not set this variable, \xend\ will automatically generate a
+random MAC address from an unused range.
+
+
+\section{Booting the Domain}
+
+The \path{xm} tool provides a variety of commands for managing
+domains.  Use the \path{create} command to start new domains. Assuming
+you've created a configuration file \path{myvmconf} based around
+\path{/etc/xen/xmexample2}, to start a domain with virtual machine
+ID~1 you should type:
+
+\begin{quote}
+\begin{verbatim}
+# xm create -c myvmconf vmid=1
+\end{verbatim}
+\end{quote}
+
+The \path{-c} switch causes \path{xm} to turn into the domain's
+console after creation.  The \path{vmid=1} sets the \path{vmid}
+variable used in the \path{myvmconf} file.
+
+You should see the console boot messages from the new domain appearing
+in the terminal in which you typed the command, culminating in a login
+prompt.
+
+
+\section{Example: ttylinux}
+
+Ttylinux is a very small Linux distribution, designed to require very
+few resources.  We will use it as a concrete example of how to start a
+Xen domain.  Most users will probably want to install a full-featured
+distribution once they have mastered the basics\footnote{ttylinux is
+  maintained by Pascal Schmidt. You can download source packages from
+  the distribution's home page: {\tt
+    http://www.minimalinux.org/ttylinux/}}.
+
+\begin{enumerate}
+\item Download and extract the ttylinux disk image from the Files
+  section of the project's SourceForge site (see
+  \path{http://sf.net/projects/xen/}).
+\item Create a configuration file like the following:
+\begin{verbatim}
+kernel = "/boot/vmlinuz-2.6-xenU"
+memory = 64
+name = "ttylinux"
+nics = 1
+ip = "1.2.3.4"
+disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
+root = "/dev/sda1 ro"
+\end{verbatim}
+\item Now start the domain and connect to its console:
+\begin{verbatim}
+xm create configfile -c
+\end{verbatim}
+\item Login as root, password root.
+\end{enumerate}
+
+
+\section{Starting / Stopping Domains Automatically}
+
+It is possible to have certain domains start automatically at boot
+time and to have dom0 wait for all running domains to shutdown before
+it shuts down the system.
+
+To specify a domain is to start at boot-time, place its configuration
+file (or a link to it) under \path{/etc/xen/auto/}.
+
+A Sys-V style init script for Red Hat and LSB-compliant systems is
+provided and will be automatically copied to \path{/etc/init.d/}
+during install.  You can then enable it in the appropriate way for
+your distribution.
+
+For instance, on Red Hat:
+
+\begin{quote}
+  \verb_# chkconfig --add xendomains_
+\end{quote}
+
+By default, this will start the boot-time domains in runlevels 3, 4
+and 5.
+
+You can also use the \path{service} command to run this script
+manually, e.g:
+
+\begin{quote}
+  \verb_# service xendomains start_
+
+  Starts all the domains with config files under /etc/xen/auto/.
+\end{quote}
+
+\begin{quote}
+  \verb_# service xendomains stop_
+
+  Shuts down ALL running Xen domains.
+\end{quote}
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/xen/util.c
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/util.c   Thu Sep 22 17:42:01 2005
@@ -0,0 +1,73 @@
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <asm-xen/driver_util.h>
+
+static int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+       /* generic_page_range() does all the hard work. */
+       return 0;
+}
+
+struct vm_struct *alloc_vm_area(unsigned long size)
+{
+       struct vm_struct *area;
+
+       area = get_vm_area(size, VM_IOREMAP);
+       if (area == NULL)
+               return NULL;
+
+       /*
+        * This ensures that page tables are constructed for this region
+        * of kernel virtual address space and mapped into init_mm.
+        */
+       if (generic_page_range(&init_mm, (unsigned long)area->addr,
+                              area->size, f, NULL)) {
+               free_vm_area(area);
+               return NULL;
+       }
+
+       return area;
+}
+
+void free_vm_area(struct vm_struct *area)
+{
+       BUG_ON(remove_vm_area(area->addr) != area);
+       kfree(area);
+}
+
+void lock_vm_area(struct vm_struct *area)
+{
+       unsigned long i;
+       char c;
+
+       /*
+        * Prevent context switch to a lazy mm that doesn't have this area
+        * mapped into its page tables.
+        */
+       preempt_disable();
+
+       /*
+        * Ensure that the page tables are mapped into the current mm. The
+        * page-fault path will copy the page directory pointers from init_mm.
+        */
+       for (i = 0; i < area->size; i += PAGE_SIZE)
+               (void)__get_user(c, (char *)area->addr + i);
+}
+
+void unlock_vm_area(struct vm_struct *area)
+{
+       preempt_enable();
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/include/asm-xen/driver_util.h
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/driver_util.h        Thu Sep 22 
17:42:01 2005
@@ -0,0 +1,16 @@
+
+#ifndef __ASM_XEN_DRIVER_UTIL_H__
+#define __ASM_XEN_DRIVER_UTIL_H__
+
+#include <linux/config.h>
+#include <linux/vmalloc.h>
+
+/* Allocate/destroy a 'vmalloc' VM area. */
+extern struct vm_struct *alloc_vm_area(unsigned long size);
+extern void free_vm_area(struct vm_struct *area);
+
+/* Lock an area so that PTEs are accessible in the current address space. */
+extern void lock_vm_area(struct vm_struct *area);
+extern void unlock_vm_area(struct vm_struct *area);
+
+#endif /* __ASM_XEN_DRIVER_UTIL_H__ */
diff -r 97dbd9524a7e -r 06d84bf87159 patches/linux-2.6.12/tpm_partial_read.patch
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/patches/linux-2.6.12/tpm_partial_read.patch       Thu Sep 22 17:42:01 2005
@@ -0,0 +1,74 @@
+--- ref-linux-2.6.12/drivers/char/tpm/tpm.c    2005-06-17 15:48:29.000000000 
-0400
++++ linux-2.6-xen-sparse/drivers/char/tpm/tpm.c        2005-09-15 
14:56:05.000000000 -0400
+@@ -473,6 +401,7 @@ ssize_t tpm_write(struct file * file, co
+       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
+ 
+       atomic_set(&chip->data_pending, out_size);
++      atomic_set(&chip->data_position, 0);
+       up(&chip->buffer_mutex);
+ 
+       /* Set a timeout by which the reader must come claim the result */
+@@ -494,29 +423,34 @@ ssize_t tpm_read(struct file * file, cha
+ {
+       struct tpm_chip *chip = file->private_data;
+       int ret_size = -ENODATA;
++      int pos, pending = 0;
+ 
+-      if (atomic_read(&chip->data_pending) != 0) {    /* Result available */
++      down(&chip->buffer_mutex);
++      ret_size = atomic_read(&chip->data_pending);
++      if ( ret_size > 0 ) {   /* Result available */
++              if (size < ret_size)
++                      ret_size = size;
++
++              pos = atomic_read(&chip->data_position);
++
++              if (copy_to_user((void __user *) buf,
++                               &chip->data_buffer[pos], ret_size)) {
++                      ret_size = -EFAULT;
++              } else {
++                      pending = atomic_read(&chip->data_pending) - ret_size;
++                      if ( pending ) {
++                              atomic_set( &chip->data_pending, pending );
++                              atomic_set( &chip->data_position, pos+ret_size 
);
++                      }
++              }
++      }
++      up(&chip->buffer_mutex);
++
++      if ( ret_size <= 0 || pending == 0 ) {
++              atomic_set( &chip->data_pending, 0 );
+               down(&chip->timer_manipulation_mutex);
+               del_singleshot_timer_sync(&chip->user_read_timer);
+               up(&chip->timer_manipulation_mutex);
+-
+-              down(&chip->buffer_mutex);
+-
+-              ret_size = atomic_read(&chip->data_pending);
+-              atomic_set(&chip->data_pending, 0);
+-
+-              if (ret_size == 0)      /* timeout just occurred */
+-                      ret_size = -ETIME;
+-              else if (ret_size > 0) {        /* relay data */
+-                      if (size < ret_size)
+-                              ret_size = size;
+-
+-                      if (copy_to_user((void __user *) buf,
+-                                       chip->data_buffer, ret_size)) {
+-                              ret_size = -EFAULT;
+-                      }
+-              }
+-              up(&chip->buffer_mutex);
+       }
+ 
+       return ret_size;
+--- ref-linux-2.6.12/drivers/char/tpm/tpm.h    2005-06-17 15:48:29.000000000 
-0400
++++ linux-2.6-xen-sparse/drivers/char/tpm/tpm.h        2005-09-15 
14:56:05.000000000 -0400
+@@ -54,6 +54,7 @@ struct tpm_chip {
+       /* Data passed to and from the tpm via the read/write calls */
+       u8 *data_buffer;
+       atomic_t data_pending;
++      atomic_t data_position;
+       struct semaphore buffer_mutex;
+ 
+       struct timer_list user_read_timer;      /* user needs to claim result */
diff -r 97dbd9524a7e -r 06d84bf87159 tools/debugger/gdb/README
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/debugger/gdb/README Thu Sep 22 17:42:01 2005
@@ -0,0 +1,29 @@
+
+DomU GDB server for 32-bit (PAE and non-PAE) systems
+----------------------------------------------------
+
+Lines marked below with [*] are optional, if you want full
+source-level debugging of your kernel image.
+
+To build the GDB server:
+ 1. Run ./gdbbuild from within this directory.
+ 2. Copy ./gdb-6.2.1-linux-i386-xen/gdb/gdbserver/gdbserver-xen
+    to your test machine.
+
+To build a debuggable guest kernel image:
+ 1. cd linux-2.6.12-xenU
+ 2. ARCH=xen make menuconfig
+ 3. From within the configurator, enable the following options:
+    # Kernel hacking -> Compile the kernel with debug info [*]
+                     -> Compile the kernel with frame pointers
+ 4. (Re)build and (re)install your xenU kernel image.
+
+To debug a running guest:
+ 1. Use 'xm list' to discover its domain id ($domid). 
+ 2. Run 'gdbserver-xen 127.0.0.1:9999 --attach $domid'
+ 3. Run 'gdb /path/to/vmlinux-syms-2.6.xx-xenU'
+ 4. From within the gdb client session:
+    # directory /path/to/linux-2.6.xx-xenU [*]
+    # target remote 127.0.0.1:9999
+    # bt
+    # disass
diff -r 97dbd9524a7e -r 06d84bf87159 tools/firmware/vmxassist/acpi_madt.c
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/firmware/vmxassist/acpi_madt.c      Thu Sep 22 17:42:01 2005
@@ -0,0 +1,145 @@
+/*
+ * acpi_madt.c: Update ACPI MADT table for multiple processor guest.
+ *
+ * Yu Ke, ke.yu@xxxxxxxxx
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "../acpi/acpi2_0.h"
+#include "../acpi/acpi_madt.h"
+
+#define NULL ((void*)0)
+
+extern int puts(const char *s);
+
+#define VCPU_MAGIC 0x76637075 /* "vcpu" */
+
+/* xc_vmx_builder wrote vcpu block at 0x9F800. Return it. */
+static int 
+get_vcpus(void)
+{
+       unsigned long *vcpus;
+
+       vcpus = (unsigned long *)0x9F800;
+       if (vcpus[0] != VCPU_MAGIC) {
+               puts("Bad vcpus magic, set vcpu number=1\n");
+               return 1;
+       }
+
+       return vcpus[1];
+}
+
+static void *
+acpi_madt_get_madt(unsigned char *acpi_start)
+{
+       ACPI_2_0_RSDP *rsdp=NULL;
+       ACPI_2_0_RSDT *rsdt=NULL;
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
+
+       rsdp = (ACPI_2_0_RSDP *)(acpi_start + sizeof(ACPI_2_0_FACS));
+       if (rsdp->Signature != ACPI_2_0_RSDP_SIGNATURE) {
+               puts("Bad RSDP signature\n");
+               return NULL;
+       }
+
+       rsdt= (ACPI_2_0_RSDT *)
+               (acpi_start + rsdp->RsdtAddress - ACPI_PHYSICAL_ADDRESS);
+       if (rsdt->Header.Signature != ACPI_2_0_RSDT_SIGNATURE) {
+               puts("Bad RSDT signature\n");
+               return NULL;
+       }
+
+       madt = (ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *)
+               ( acpi_start+ rsdt->Entry[1] - ACPI_PHYSICAL_ADDRESS);
+       if (madt->Header.Header.Signature !=
+           ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE_SIGNATURE) {
+               puts("Bad MADT signature \n");
+               return NULL;
+       }
+
+       return madt;
+}
+
+static void 
+set_checksum(void *start, int checksum_offset, int len)
+{
+       unsigned char sum = 0;  
+       unsigned char *ptr;
+
+       ptr = start;
+       ptr[checksum_offset] = 0;
+       while (len--)
+               sum += *ptr++;
+
+       ptr = start;
+       ptr[checksum_offset] = -sum;
+}
+
+static int 
+acpi_madt_set_local_apics(
+       int nr_vcpu, 
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
+{
+       int i;
+
+       if ((nr_vcpu > MAX_VIRT_CPUS) || (nr_vcpu < 0) || !madt)
+               return -1;
+
+       for (i = 0; i < nr_vcpu; i++) {
+               madt->LocalApic[i].Type            = ACPI_PROCESSOR_LOCAL_APIC;
+               madt->LocalApic[i].Length          = sizeof 
(ACPI_LOCAL_APIC_STRUCTURE);
+               madt->LocalApic[i].AcpiProcessorId = i;
+               madt->LocalApic[i].ApicId          = i;
+               madt->LocalApic[i].Flags           = 1; 
+       }
+
+       madt->Header.Header.Length =
+               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) - 
+               (MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
+
+       return 0;                            
+}
+
+#define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
+
+int acpi_madt_update(unsigned char *acpi_start)
+{
+       int rc;
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
+
+       madt = acpi_madt_get_madt(acpi_start);
+       if (!madt)
+               return -1;
+
+       rc = acpi_madt_set_local_apics(get_vcpus(), madt);
+       if (rc != 0)
+               return rc;
+
+       set_checksum(
+               madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
+               madt->Header.Header.Length);
+
+       return 0;              
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 97dbd9524a7e -r 06d84bf87159 
tools/python/xen/xend/server/DevController.py
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/python/xen/xend/server/DevController.py     Thu Sep 22 17:42:01 2005
@@ -0,0 +1,203 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+
+from xen.xend import sxp
+from xen.xend.XendError import VmError
+from xen.xend.XendLogging import log
+from xen.xend.xenstore.xstransact import xstransact
+
+
+class DevController:
+    """Abstract base class for a device controller.  Device controllers create
+    appropriate entries in the store to trigger the creation, reconfiguration,
+    and destruction of devices in guest domains.  Each subclass of
+    DevController is responsible for a particular device-class, and
+    understands the details of configuration specific to that device-class.
+
+    DevController itself provides the functionality common to all device
+    creation tasks, as well as providing an interface to XendDomainInfo for
+    triggering those events themselves.
+    """
+
+    # Set when registered.
+    deviceClass = None
+
+
+    ## public:
+
+    def __init__(self, vm):
+        self.vm = vm
+
+
+    def createDevice(self, config):
+        """Trigger the creation of a device with the given configuration.
+
+        @return The ID for the newly created device.
+        """
+        (devid, back, front) = self.getDeviceDetails(config)
+
+        self.writeDetails(config, devid, back, front)
+
+        return devid
+
+
+    def reconfigureDevice(self, devid, config):
+        """Reconfigure the specified device.
+
+        The implementation here just raises VmError.  This may be overridden
+        by those subclasses that can reconfigure their devices.
+        """
+        raise VmError('%s devices may not be reconfigured' % self.deviceClass)
+
+
+    def destroyDevice(self, devid):
+        """Destroy the specified device.
+
+        The implementation here simply deletes the appropriate paths from
+        the store.  This may be overridden by subclasses who need to perform
+        other tasks on destruction.
+        """
+
+        frontpath = self.frontendPath(devid)
+        backpath = xstransact.Read("%s/backend" % frontpath)
+
+        xstransact.Remove(frontpath)
+        xstransact.Remove(backpath)
+
+
+    def sxpr(self, devid):
+        """@return an s-expression describing the specified device.
+        """
+        return [self.deviceClass, ['dom', self.vm.getDomid(),
+                                   'id', devid]]
+
+
+    ## protected:
+
+    def getDeviceDetails(self, config):
+        """Compute the details for creation of a device corresponding to the
+        given configuration.  These details consist of a tuple of (devID,
+        backDetails, frontDetails), where devID is the ID for the new device,
+        and backDetails and frontDetails are the device configuration
+        specifics for the backend and frontend respectively.
+
+        backDetails and frontDetails should be dictionaries, the keys and
+        values of which will be used as paths in the store.  There is no need
+        for these dictionaries to include the references from frontend to
+        backend, nor vice versa, as these will be handled by DevController.
+
+        Abstract; must be implemented by every subclass.
+
+        @return (devID, backDetails, frontDetails), as specified above.
+        """
+
+        raise NotImplementedError()
+
+
+    def getDomid(self):
+        """Stub to {@link XendDomainInfo.getDomid}, for use by our
+        subclasses.
+        """
+        return self.vm.getDomid()
+
+
+    def allocateDeviceID(self):
+        """Allocate a device ID, allocating them consecutively on a
+        per-domain, per-device-class basis, and using the store to record the
+        next available ID.
+
+        This method is available to our subclasses, though it is not
+        compulsory to use it; subclasses may prefer to allocate IDs based upon
+        the device configuration instead.
+        """
+        path = self.frontendMiscPath()
+        t = xstransact(path)
+        try:
+            result = t.read("nextDeviceID")
+            if result:
+                result = int(result)
+            else:
+                result = 1
+            t.write("nextDeviceID", str(result + 1))
+            t.commit()
+            return result
+        except:
+            t.abort()
+            raise
+
+
+    ## private:
+
+    def writeDetails(self, config, devid, backDetails, frontDetails):
+        """Write the details in the store to trigger creation of a device.
+        The backend domain ID is taken from the given config, paths for
+        frontend and backend are computed, and these are written to the store
+        appropriately, including references from frontend to backend and vice
+        versa.
+
+        @param config The configuration of the device, as given to
+        {@link #createDevice}.
+        @param devid        As returned by {@link #getDeviceDetails}.
+        @param backDetails  As returned by {@link #getDeviceDetails}.
+        @param frontDetails As returned by {@link #getDeviceDetails}.
+        """
+
+        import xen.xend.XendDomain
+        backdom = xen.xend.XendDomain.instance().domain_lookup_by_name(
+            sxp.child_value(config, 'backend', '0'))
+
+        frontpath = self.frontendPath(devid)
+        backpath  = self.backendPath(backdom, devid)
+        
+        frontDetails.update({
+            'backend' : backpath,
+            'backend-id' : "%i" % backdom.getDomid()
+            })
+
+
+        backDetails.update({
+            'domain' : self.vm.getName(),
+            'frontend' : frontpath,
+            'frontend-id' : "%i" % self.vm.getDomid()
+            })
+
+        log.debug('DevController: writing %s to %s.', str(frontDetails),
+                  frontpath)
+        log.debug('DevController: writing %s to %s.', str(backDetails),
+                  backpath)
+
+        xstransact.Write(frontpath, frontDetails)
+        xstransact.Write(backpath, backDetails)
+
+
+    def backendPath(self, backdom, devid):
+        """@param backdom [XendDomainInfo] The backend domain info."""
+
+        return "%s/backend/%s/%s/%d" % (backdom.getPath(),
+                                        self.deviceClass,
+                                        self.vm.getUuid(), devid)
+
+
+    def frontendPath(self, devid):
+        return "%s/device/%s/%d" % (self.vm.getPath(), self.deviceClass,
+                                    devid)
+
+
+    def frontendMiscPath(self):
+        return "%s/device-misc/%s" % (self.vm.getPath(), self.deviceClass)
diff -r 97dbd9524a7e -r 06d84bf87159 tools/vtpm/tpm_emulator-0.2b-x86_64.patch
--- /dev/null   Thu Sep 22 17:34:14 2005
+++ b/tools/vtpm/tpm_emulator-0.2b-x86_64.patch Thu Sep 22 17:42:01 2005
@@ -0,0 +1,499 @@
+diff -uprN tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c 
tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c
+--- tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c       2005-08-15 
00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c        2005-09-19 
14:10:29.000000000 -0700
+@@ -79,7 +79,7 @@ void __attribute__ ((regparm(0))) *kerne
+ {
+   void *ret  = (void*)kmalloc(size, GFP_KERNEL);
+   if (!ret) panic(KERN_CRIT TPM_MODULE_NAME 
+-    "GMP: cannot allocate memory (size=%u)\n", size);
++    "GMP: cannot allocate memory (size=%Zu)\n", size);
+   return ret;
+ }
+ 
+@@ -88,7 +88,7 @@ void __attribute__ ((regparm(0))) *kerne
+ {
+   void *ret = (void*)kmalloc(new_size, GFP_KERNEL);
+   if (!ret) panic(KERN_CRIT TPM_MODULE_NAME "GMP: Cannot reallocate memory "
+-    "(old_size=%u new_size=%u)\n", old_size, new_size);
++    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
+   memcpy(ret, oldptr, old_size);
+   kfree(oldptr);
+   return ret;
+diff -uprN tpm_emulator-0.2/linux_module.c 
tpm_emulator-0.2-x86_64/linux_module.c
+--- tpm_emulator-0.2/linux_module.c    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/linux_module.c     2005-09-19 14:10:29.000000000 
-0700
+@@ -66,7 +66,7 @@ static int tpm_release(struct inode *ino
+ 
+ static ssize_t tpm_read(struct file *file, char *buf, size_t count, loff_t 
*ppos)
+ {
+-  debug("%s(%d)", __FUNCTION__, count);
++  debug("%s(%Zu)", __FUNCTION__, count);
+   down(&tpm_mutex);
+   if (tpm_response.data != NULL) {
+     count = min(count, (size_t)tpm_response.size - (size_t)*ppos);
+@@ -81,7 +81,7 @@ static ssize_t tpm_read(struct file *fil
+ 
+ static ssize_t tpm_write(struct file *file, const char *buf, size_t count, 
loff_t *ppos)
+ {
+-  debug("%s(%d)", __FUNCTION__, count);
++  debug("%s(%Zu)", __FUNCTION__, count);
+   down(&tpm_mutex);
+   *ppos = 0;
+   if (tpm_response.data != NULL) kfree(tpm_response.data);
+diff -uprN tpm_emulator-0.2/linux_module.h 
tpm_emulator-0.2-x86_64/linux_module.h
+--- tpm_emulator-0.2/linux_module.h    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/linux_module.h     2005-09-19 14:10:29.000000000 
-0700
+@@ -28,8 +28,10 @@
+ 
+ /* module settings */
+ 
++#ifndef STR
+ #define STR(s) __STR__(s)
+ #define __STR__(s) #s
++#endif
+ #include "tpm_version.h"
+ 
+ #define TPM_DEVICE_MINOR      224
+diff -uprN tpm_emulator-0.2/Makefile tpm_emulator-0.2-x86_64/Makefile
+--- tpm_emulator-0.2/Makefile  2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/Makefile   2005-09-19 14:10:29.000000000 -0700
+@@ -7,6 +7,7 @@
+ KERNEL_RELEASE := $(shell uname -r)
+ KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
+ MOD_SUBDIR     := misc
++COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
+ 
+ # module settings
+ MODULE_NAME    := tpm_emulator
+@@ -17,8 +18,14 @@ VERSION_BUILD  := $(shell date +"%s")
+ # enable/disable DEBUG messages
+ EXTRA_CFLAGS   += -DDEBUG -g  
+ 
++ifeq ($(COMPILE_ARCH),x86_64)
++LIBDIR = lib64
++else
++LIBDIR = lib
++endif
++
+ # GNU MP configuration
+-GMP_LIB        := /usr/lib/libgmp.a
++GMP_LIB        := /usr/$(LIBDIR)/libgmp.a
+ GMP_HEADER     := /usr/include/gmp.h
+ 
+ # sources and objects
+diff -uprN tpm_emulator-0.2/README tpm_emulator-0.2-x86_64/README
+--- tpm_emulator-0.2/README    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/README     2005-09-19 14:21:43.000000000 -0700
+@@ -45,6 +45,12 @@ Example:
+ GMP_LIB        := /usr/lib/libgmp.a
+ GMP_HEADER     := /usr/include/gmp.h
+ 
++GNU MP Library on 64 bit Systems
++--------------------------------------------------------------------------
++Some 64-bit kernels have problems with importing the user-space gmp 
++library (/usr/lib*/libgmp.a) into kernel space.  These kernels will require
++that the gmp library be recompiled for kernel space with -mcmodel=kernel.
++
+ Installation
+ --------------------------------------------------------------------------
+ The compilation and installation process uses the build environment for 
+diff -uprN tpm_emulator-0.2/tpm/tpm_credentials.c 
tpm_emulator-0.2-x86_64/tpm/tpm_credentials.c
+--- tpm_emulator-0.2/tpm/tpm_credentials.c     2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_credentials.c      2005-09-19 
14:10:29.000000000 -0700
+@@ -47,16 +47,16 @@ int tpm_compute_pubkey_checksum(TPM_NONC
+ 
+ TPM_RESULT tpm_get_pubek(TPM_PUBKEY *pubEndorsementKey)
+ {
+-  UINT32 key_length;
++  size_t key_length;
+   if (!tpmData.permanent.data.endorsementKey.size) return TPM_NO_ENDORSEMENT;
+   /* setup TPM_PUBKEY structure */
+-  key_length = tpmData.permanent.data.endorsementKey.size;
+-  pubEndorsementKey->pubKey.keyLength = key_length >> 3;
++  pubEndorsementKey->pubKey.keyLength = 
tpmData.permanent.data.endorsementKey.size >> 3;
+   pubEndorsementKey->pubKey.key = 
tpm_malloc(pubEndorsementKey->pubKey.keyLength);
+   if (pubEndorsementKey->pubKey.key == NULL) return TPM_FAIL;
+   rsa_export_modulus(&tpmData.permanent.data.endorsementKey,
+-    pubEndorsementKey->pubKey.key,
+-    &pubEndorsementKey->pubKey.keyLength);
++                   pubEndorsementKey->pubKey.key,
++                   &key_length);
++  pubEndorsementKey->pubKey.keyLength = key_length;
+   pubEndorsementKey->algorithmParms.algorithmID = TPM_ALG_RSA;
+   pubEndorsementKey->algorithmParms.encScheme = TPM_ES_RSAESOAEP_SHA1_MGF1;
+   pubEndorsementKey->algorithmParms.sigScheme = TPM_SS_NONE;
+@@ -169,6 +169,7 @@ TPM_RESULT TPM_OwnerReadInternalPub(TPM_
+ {
+   TPM_RESULT res;
+   TPM_KEY_DATA *srk = &tpmData.permanent.data.srk;
++  size_t key_length;
+   info("TPM_OwnerReadInternalPub()");
+   /* verify authorization */
+   res = tpm_verify_auth(auth1, tpmData.permanent.data.ownerAuth, 
TPM_KH_OWNER);
+@@ -180,7 +181,8 @@ TPM_RESULT TPM_OwnerReadInternalPub(TPM_
+     publicPortion->pubKey.key = tpm_malloc(publicPortion->pubKey.keyLength);
+     if (publicPortion->pubKey.key == NULL) return TPM_FAIL;
+     rsa_export_modulus(&srk->key, publicPortion->pubKey.key, 
+-      &publicPortion->pubKey.keyLength);
++      &key_length);
++    publicPortion->pubKey.keyLength = key_length;
+     publicPortion->algorithmParms.algorithmID = TPM_ALG_RSA;
+     publicPortion->algorithmParms.encScheme = srk->encScheme;
+     publicPortion->algorithmParms.sigScheme = srk->sigScheme;
+diff -uprN tpm_emulator-0.2/tpm/tpm_crypto.c 
tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c
+--- tpm_emulator-0.2/tpm/tpm_crypto.c  2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c   2005-09-19 14:10:29.000000000 
-0700
+@@ -182,7 +182,8 @@ TPM_RESULT TPM_CertifyKey(TPM_KEY_HANDLE
+   TPM_KEY_DATA *cert, *key;
+   sha1_ctx_t sha1_ctx;
+   BYTE *buf, *p;
+-  UINT32 length;
++  UINT32 length32;
++  size_t length;
+   info("TPM_CertifyKey()");
+   /* get keys */
+   cert = tpm_get_key(certHandle);
+@@ -264,14 +265,15 @@ TPM_RESULT TPM_CertifyKey(TPM_KEY_HANDLE
+   /* compute the digest of the CERTIFY_INFO[2] structure and sign it */
+   length = sizeof_TPM_CERTIFY_INFO((*certifyInfo));
+   p = buf = tpm_malloc(length);
++  length32=(UINT32) length;
+   if (buf == NULL
+-      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length, certifyInfo)) {
++      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length32, certifyInfo)) {
+     free_TPM_KEY_PARMS(certifyInfo->algorithmParms);
+     return TPM_FAIL;
+   }
+   length = sizeof_TPM_CERTIFY_INFO((*certifyInfo));
+   sha1_init(&sha1_ctx);
+-  sha1_update(&sha1_ctx, buf, length);
++  sha1_update(&sha1_ctx, buf, (size_t) length);
+   sha1_final(&sha1_ctx, buf);
+   res = tpm_sign(cert, auth1, FALSE, buf, SHA1_DIGEST_LENGTH, outData, 
outDataSize);
+   tpm_free(buf);
+@@ -292,7 +294,8 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
+   TPM_KEY_DATA *cert, *key;
+   sha1_ctx_t sha1_ctx;
+   BYTE *buf, *p;
+-  UINT32 length;
++  size_t length;
++  UINT32 length32;
+   info("TPM_CertifyKey2()");
+   /* get keys */
+   cert = tpm_get_key(certHandle);
+@@ -362,8 +365,9 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
+   /* compute the digest of the CERTIFY_INFO[2] structure and sign it */
+   length = sizeof_TPM_CERTIFY_INFO((*certifyInfo));
+   p = buf = tpm_malloc(length);
++  length32 = (UINT32) length;
+   if (buf == NULL
+-      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length, certifyInfo)) {
++      || tpm_marshal_TPM_CERTIFY_INFO(&p, &length32, certifyInfo)) {
+     free_TPM_KEY_PARMS(certifyInfo->algorithmParms);
+     return TPM_FAIL;
+   }
+diff -uprN tpm_emulator-0.2/tpm/tpm_data.c 
tpm_emulator-0.2-x86_64/tpm/tpm_data.c
+--- tpm_emulator-0.2/tpm/tpm_data.c    2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_data.c     2005-09-19 14:10:29.000000000 
-0700
+@@ -179,7 +179,7 @@ static int read_from_file(uint8_t **data
+ int tpm_store_permanent_data(void)
+ {
+   uint8_t *buf, *ptr;
+-  size_t buf_length, len;
++  UINT32 buf_length, len;
+ 
+   /* marshal data */
+   buf_length = len = sizeof_TPM_STCLEAR_FLAGS(tpmData.stclear.flags)
+@@ -207,13 +207,14 @@ int tpm_store_permanent_data(void)
+ int tpm_restore_permanent_data(void)
+ {
+   uint8_t *buf, *ptr;
+-  size_t buf_length, len;
++  size_t buf_length;
++  UINT32 len;
+   TPM_VERSION ver;
+ 
+   /* read data */
+   if (read_from_file(&buf, &buf_length)) return -1;
+   ptr = buf;
+-  len = buf_length;
++  len = (uint32_t) buf_length;
+   /* unmarshal data */
+   if (tpm_unmarshal_TPM_VERSION(&ptr, &len, &ver)
+       || memcmp(&ver, &tpmData.permanent.data.version, sizeof(TPM_VERSION))
+diff -uprN tpm_emulator-0.2/tpm/tpm_marshalling.c 
tpm_emulator-0.2-x86_64/tpm/tpm_marshalling.c
+--- tpm_emulator-0.2/tpm/tpm_marshalling.c     2005-08-15 00:58:57.000000000 
-0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_marshalling.c      2005-09-19 
14:10:29.000000000 -0700
+@@ -981,7 +981,7 @@ int tpm_unmarshal_TPM_STANY_FLAGS(BYTE *
+ 
+ int tpm_marshal_RSA(BYTE **ptr, UINT32 *length, rsa_private_key_t *v)
+ {
+-  UINT32 m_len, e_len, q_len;
++  size_t m_len, e_len, q_len;
+   if (*length < sizeof_RSA((*v))) return -1;
+   if (v->size > 0) {
+     rsa_export_modulus(v, &(*ptr)[6], &m_len);
+diff -uprN tpm_emulator-0.2/tpm/tpm_owner.c 
tpm_emulator-0.2-x86_64/tpm/tpm_owner.c
+--- tpm_emulator-0.2/tpm/tpm_owner.c   2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_owner.c    2005-09-19 14:10:29.000000000 
-0700
+@@ -108,7 +108,7 @@ TPM_RESULT TPM_TakeOwnership(TPM_PROTOCO
+   TPM_RESULT res;
+   rsa_private_key_t *ek = &tpmData.permanent.data.endorsementKey;
+   TPM_KEY_DATA *srk = &tpmData.permanent.data.srk;
+-  UINT32 buf_size = ek->size >> 3;
++  size_t buf_size = ek->size >> 3, key_length; 
+   BYTE buf[buf_size];
+ 
+   info("TPM_TakeOwnership()");
+@@ -172,7 +172,8 @@ TPM_RESULT TPM_TakeOwnership(TPM_PROTOCO
+     return TPM_FAIL;
+   }
+   rsa_export_modulus(&srk->key, srkPub->pubKey.key,
+-    &srkPub->pubKey.keyLength);
++                   &key_length);
++  srkPub->pubKey.keyLength = (UINT32) key_length;
+   /* setup tpmProof and set state to owned */
+   tpm_get_random_bytes(tpmData.permanent.data.tpmProof.nonce, 
+     sizeof(tpmData.permanent.data.tpmProof.nonce));
+diff -uprN tpm_emulator-0.2/tpm/tpm_storage.c 
tpm_emulator-0.2-x86_64/tpm/tpm_storage.c
+--- tpm_emulator-0.2/tpm/tpm_storage.c 2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm/tpm_storage.c  2005-09-19 14:10:29.000000000 
-0700
+@@ -58,6 +58,7 @@ int encrypt_sealed_data(TPM_KEY_DATA *ke
+                         BYTE *enc, UINT32 *enc_size)
+ {
+   UINT32 len;
++  size_t enc_size32 = *enc_size;
+   BYTE *buf, *ptr;
+   rsa_public_key_t pub_key;
+   int scheme;
+@@ -72,7 +73,7 @@ int encrypt_sealed_data(TPM_KEY_DATA *ke
+   if (buf == NULL
+       || tpm_marshal_TPM_SEALED_DATA(&ptr, &len, seal)
+       || rsa_encrypt(&pub_key, scheme, buf, sizeof_TPM_SEALED_DATA((*seal)),
+-                     enc, enc_size)) {
++                     enc, &enc_size32)) {
+     tpm_free(buf);
+     rsa_release_public_key(&pub_key);
+     return -1;
+@@ -85,7 +86,8 @@ int encrypt_sealed_data(TPM_KEY_DATA *ke
+ int decrypt_sealed_data(TPM_KEY_DATA *key, BYTE *enc, UINT32 enc_size,
+                         TPM_SEALED_DATA *seal, BYTE **buf) 
+ {
+-  UINT32 len;
++  size_t len;
++  UINT32 len32;
+   BYTE *ptr;
+   int scheme;
+   switch (key->encScheme) {
+@@ -96,8 +98,12 @@ int decrypt_sealed_data(TPM_KEY_DATA *ke
+   len = enc_size;
+   *buf = ptr = tpm_malloc(len);
+   if (*buf == NULL
+-      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len)
+-      || tpm_unmarshal_TPM_SEALED_DATA(&ptr, &len, seal)) {
++      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len) ){
++    tpm_free(*buf);
++    return -1;
++  }
++  len32 = len;
++  if (tpm_unmarshal_TPM_SEALED_DATA(&ptr, &len32, seal)) {
+     tpm_free(*buf);
+     return -1;
+   }
+@@ -237,11 +243,12 @@ TPM_RESULT TPM_Unseal(TPM_KEY_HANDLE par
+ 
+ TPM_RESULT TPM_UnBind(TPM_KEY_HANDLE keyHandle, UINT32 inDataSize,
+                       BYTE *inData, TPM_AUTH *auth1, 
+-                      UINT32 *outDataSize, BYTE **outData)
++                      UINT32 *outDataSize32, BYTE **outData)
+ {
+   TPM_RESULT res;
+   TPM_KEY_DATA *key;
+   int scheme;
++  size_t outDataSize;
+   info("TPM_UnBind()");
+   /* get key */
+   key = tpm_get_key(keyHandle);
+@@ -258,8 +265,8 @@ TPM_RESULT TPM_UnBind(TPM_KEY_HANDLE key
+   /* the size of the input data muss be greater than zero */
+   if (inDataSize == 0) return TPM_BAD_PARAMETER;
+   /* decrypt data */
+-  *outDataSize = inDataSize;
+-  *outData = tpm_malloc(*outDataSize);
++  outDataSize = inDataSize;
++  *outData = tpm_malloc(outDataSize);
+   if (*outData == NULL) return TPM_FAIL;
+   switch (key->encScheme) {
+     case TPM_ES_RSAESOAEP_SHA1_MGF1: scheme = RSA_ES_OAEP_SHA1; break;
+@@ -267,20 +274,21 @@ TPM_RESULT TPM_UnBind(TPM_KEY_HANDLE key
+     default: tpm_free(*outData); return TPM_DECRYPT_ERROR;
+   }
+   if (rsa_decrypt(&key->key, scheme, inData, inDataSize, 
+-      *outData, outDataSize)) {
++                *outData, &outDataSize) ) { 
+     tpm_free(*outData);
+     return TPM_DECRYPT_ERROR;
+   }
+   /* verify data if it is of type TPM_BOUND_DATA */
+   if (key->encScheme == TPM_ES_RSAESOAEP_SHA1_MGF1 
+       || key->keyUsage != TPM_KEY_LEGACY) {
+-    if (*outDataSize < 5 || memcmp(*outData, "\x01\x01\00\x00\x02", 5) != 0) {
++    if (outDataSize < 5 || memcmp(*outData, "\x01\x01\00\x00\x02", 5) != 0) {
+       tpm_free(*outData);
+       return TPM_DECRYPT_ERROR;
+     }
+-    *outDataSize -= 5;
+-    memmove(*outData, &(*outData)[5], *outDataSize);   
+-  } 
++    outDataSize -= 5;
++    memmove(*outData, &(*outData)[5], outDataSize);   
++  }
++  *outDataSize32 = (UINT32) outDataSize; 
+   return TPM_SUCCESS;
+ }
+ 
+@@ -311,12 +319,13 @@ static int verify_key_digest(TPM_KEY *ke
+ }
+ 
+ int encrypt_private_key(TPM_KEY_DATA *key, TPM_STORE_ASYMKEY *store,
+-                        BYTE *enc, UINT32 *enc_size)
++                        BYTE *enc, UINT32 *enc_size32)
+ {
+   UINT32 len;
+   BYTE *buf, *ptr;
+   rsa_public_key_t pub_key;
+   int scheme;
++  size_t enc_size;
+   switch (key->encScheme) {
+     case TPM_ES_RSAESOAEP_SHA1_MGF1: scheme = RSA_ES_OAEP_SHA1; break;
+     case TPM_ES_RSAESPKCSv15: scheme = RSA_ES_PKCSV15; break;
+@@ -328,11 +337,12 @@ int encrypt_private_key(TPM_KEY_DATA *ke
+   if (buf == NULL
+       || tpm_marshal_TPM_STORE_ASYMKEY(&ptr, &len, store)
+       || rsa_encrypt(&pub_key, scheme, buf, 
sizeof_TPM_STORE_ASYMKEY((*store)),
+-                     enc, enc_size)) {
++                     enc, &enc_size)) {
+     tpm_free(buf);
+     rsa_release_public_key(&pub_key);
+     return -1;
+   }
++  *enc_size32 = (UINT32) enc_size;
+   tpm_free(buf);
+   rsa_release_public_key(&pub_key);
+   return 0;
+@@ -341,7 +351,8 @@ int encrypt_private_key(TPM_KEY_DATA *ke
+ int decrypt_private_key(TPM_KEY_DATA *key, BYTE *enc, UINT32 enc_size, 
+                         TPM_STORE_ASYMKEY *store, BYTE **buf) 
+ {
+-  UINT32 len;
++  UINT32 len32;
++  size_t len;
+   BYTE *ptr;
+   int scheme;
+   switch (key->encScheme) {
+@@ -352,11 +363,16 @@ int decrypt_private_key(TPM_KEY_DATA *ke
+   len = enc_size;
+   *buf = ptr = tpm_malloc(len);
+   if (*buf == NULL
+-      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len)
+-      || tpm_unmarshal_TPM_STORE_ASYMKEY(&ptr, &len, store)) {
++      || rsa_decrypt(&key->key, scheme, enc, enc_size, *buf, &len) ) {
++    tpm_free(*buf);
++    return -1;
++  }
++  len32 = (UINT32) len;
++  if (tpm_unmarshal_TPM_STORE_ASYMKEY(&ptr, &len32, store)) {  
+     tpm_free(*buf);
+     return -1;
+   }
++
+   return 0;
+ }
+ 
+@@ -371,7 +387,7 @@ TPM_RESULT TPM_CreateWrapKey(TPM_KEY_HAN
+   TPM_SESSION_DATA *session;
+   TPM_STORE_ASYMKEY store;
+   rsa_private_key_t rsa;
+-  UINT32 key_length;
++  size_t key_length;
+ 
+   info("TPM_CreateWrapKey()");
+   /* get parent key */
+@@ -428,11 +444,11 @@ TPM_RESULT TPM_CreateWrapKey(TPM_KEY_HAN
+   }
+   if (compute_key_digest(wrappedKey, &store.pubDataDigest)) return TPM_FAIL;
+   /* generate key and store it */
+-  key_length = keyInfo->algorithmParms.parms.rsa.keyLength;
+-  if (rsa_generate_key(&rsa, key_length)) return TPM_FAIL;
+-  wrappedKey->pubKey.keyLength = key_length >> 3;
++  if (rsa_generate_key(&rsa, keyInfo->algorithmParms.parms.rsa.keyLength)) 
++    return TPM_FAIL;
++  wrappedKey->pubKey.keyLength = keyInfo->algorithmParms.parms.rsa.keyLength 
>> 3;
+   wrappedKey->pubKey.key = tpm_malloc(wrappedKey->pubKey.keyLength);
+-  store.privKey.keyLength = key_length >> 4;
++  store.privKey.keyLength = keyInfo->algorithmParms.parms.rsa.keyLength >> 4;
+   store.privKey.key = tpm_malloc(store.privKey.keyLength);
+   wrappedKey->encDataSize = parent->key.size >> 3;
+   wrappedKey->encData = tpm_malloc(wrappedKey->encDataSize);
+@@ -444,9 +460,11 @@ TPM_RESULT TPM_CreateWrapKey(TPM_KEY_HAN
+     tpm_free(wrappedKey->encData);
+     return TPM_FAIL;
+   }
+-  rsa_export_modulus(&rsa, wrappedKey->pubKey.key, 
+-    &wrappedKey->pubKey.keyLength);
+-  rsa_export_prime1(&rsa, store.privKey.key, &store.privKey.keyLength);
++  rsa_export_modulus(&rsa, wrappedKey->pubKey.key,
++                   &key_length);
++  wrappedKey->pubKey.keyLength = (UINT32) key_length;
++  rsa_export_prime1(&rsa, store.privKey.key, &key_length);
++  store.privKey.keyLength = (UINT32) key_length;
+   rsa_release_private_key(&rsa);
+   /* encrypt private key data */
+   if (encrypt_private_key(parent, &store, wrappedKey->encData, 
+@@ -560,6 +578,7 @@ TPM_RESULT TPM_LoadKey(TPM_KEY_HANDLE pa
+ 
+ int tpm_setup_key_parms(TPM_KEY_DATA *key, TPM_KEY_PARMS *parms)
+ {
++  size_t key_length;
+   parms->algorithmID = TPM_ALG_RSA;
+   parms->encScheme = key->encScheme;
+   parms->sigScheme = key->sigScheme;
+@@ -569,7 +588,8 @@ int tpm_setup_key_parms(TPM_KEY_DATA *ke
+   parms->parms.rsa.exponent = tpm_malloc(parms->parms.rsa.exponentSize);
+   if (parms->parms.rsa.exponent == NULL) return -1;
+   rsa_export_exponent(&key->key, parms->parms.rsa.exponent,
+-    &parms->parms.rsa.exponentSize);
++                      &key_length);  
++  parms->parms.rsa.exponentSize = (UINT32) key_length;
+   parms->parmSize = 12 + parms->parms.rsa.exponentSize;  
+   return 0;
+ }
+@@ -580,6 +600,7 @@ TPM_RESULT TPM_GetPubKey(TPM_KEY_HANDLE 
+   TPM_RESULT res;
+   TPM_KEY_DATA *key;
+   TPM_DIGEST digest;
++  size_t key_length;
+   info("TPM_GetPubKey()");
+   /* get key */
+   if (keyHandle == TPM_KH_SRK) return TPM_BAD_PARAMETER;
+@@ -607,8 +628,8 @@ TPM_RESULT TPM_GetPubKey(TPM_KEY_HANDLE 
+   pubKey->pubKey.keyLength = key->key.size >> 3;
+   pubKey->pubKey.key = tpm_malloc(pubKey->pubKey.keyLength);
+   if (pubKey->pubKey.key == NULL) return TPM_FAIL;
+-  rsa_export_modulus(&key->key, pubKey->pubKey.key, 
+-    &pubKey->pubKey.keyLength);
++  rsa_export_modulus(&key->key, pubKey->pubKey.key, &key_length); 
++  pubKey->pubKey.keyLength = (UINT32) key_length;
+   if (tpm_setup_key_parms(key, &pubKey->algorithmParms) != 0) {
+     tpm_free(pubKey->pubKey.key);
+     return TPM_FAIL;  
+diff -uprN tpm_emulator-0.2/tpm_version.h tpm_emulator-0.2-x86_64/tpm_version.h
+--- tpm_emulator-0.2/tpm_version.h     2005-08-15 00:58:57.000000000 -0700
++++ tpm_emulator-0.2-x86_64/tpm_version.h      1969-12-31 16:00:00.000000000 
-0800
+@@ -1,6 +0,0 @@
+-#ifndef _TPM_VERSION_H_
+-#define _TPM_VERSION_H_
+-#define VERSION_MAJOR 0
+-#define VERSION_MINOR 2
+-#define VERSION_BUILD 1123950310
+-#endif /* _TPM_VERSION_H_ */
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/char/tpm/tpm.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c       Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,627 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- * Note, the TPM chip is not interrupt driven (only polling)
- * and can have very long timeouts (minutes!). Hence the unusual
- * calls to schedule_timeout.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/poll.h>
-#include <linux/spinlock.h>
-#include "tpm.h"
-
-#define        TPM_MINOR                       224     /* officially assigned 
*/
-
-#define        TPM_BUFSIZE                     2048
-
-static LIST_HEAD(tpm_chip_list);
-static DEFINE_SPINLOCK(driver_lock);
-static int dev_mask[32];
-
-static void user_reader_timeout(unsigned long ptr)
-{
-       struct tpm_chip *chip = (struct tpm_chip *) ptr;
-
-       down(&chip->buffer_mutex);
-       atomic_set(&chip->data_pending, 0);
-       memset(chip->data_buffer, 0, TPM_BUFSIZE);
-       up(&chip->buffer_mutex);
-}
-
-void tpm_time_expired(unsigned long ptr)
-{
-       int *exp = (int *) ptr;
-       *exp = 1;
-}
-
-EXPORT_SYMBOL_GPL(tpm_time_expired);
-
-/*
- * Internal kernel interface to transmit TPM commands
- */
-static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
-                           size_t bufsiz)
-{
-       ssize_t len;
-       u32 count;
-       __be32 *native_size;
-
-       native_size = (__force __be32 *) (buf + 2);
-       count = be32_to_cpu(*native_size);
-
-       if (count == 0)
-               return -ENODATA;
-       if (count > bufsiz) {
-               dev_err(&chip->pci_dev->dev,
-                       "invalid count value %x %zx \n", count, bufsiz);
-               return -E2BIG;
-       }
-
-       down(&chip->tpm_mutex);
-
-       if ((len = chip->vendor->send(chip, (u8 *) buf, count)) < 0) {
-               dev_err(&chip->pci_dev->dev,
-                       "tpm_transmit: tpm_send: error %zd\n", len);
-               return len;
-       }
-
-       down(&chip->timer_manipulation_mutex);
-       chip->time_expired = 0;
-       init_timer(&chip->device_timer);
-       chip->device_timer.function = tpm_time_expired;
-       chip->device_timer.expires = jiffies + 2 * 60 * HZ;
-       chip->device_timer.data = (unsigned long) &chip->time_expired;
-       add_timer(&chip->device_timer);
-       up(&chip->timer_manipulation_mutex);
-
-       do {
-               u8 status = inb(chip->vendor->base + 1);
-               if ((status & chip->vendor->req_complete_mask) ==
-                   chip->vendor->req_complete_val) {
-                       down(&chip->timer_manipulation_mutex);
-                       del_singleshot_timer_sync(&chip->device_timer);
-                       up(&chip->timer_manipulation_mutex);
-                       goto out_recv;
-               }
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-               rmb();
-       } while (!chip->time_expired);
-
-
-       chip->vendor->cancel(chip);
-       dev_err(&chip->pci_dev->dev, "Time expired\n");
-       up(&chip->tpm_mutex);
-       return -EIO;
-
-out_recv:
-       len = chip->vendor->recv(chip, (u8 *) buf, bufsiz);
-       if (len < 0)
-               dev_err(&chip->pci_dev->dev,
-                       "tpm_transmit: tpm_recv: error %zd\n", len);
-       up(&chip->tpm_mutex);
-       return len;
-}
-
-#define TPM_DIGEST_SIZE 20
-#define CAP_PCR_RESULT_SIZE 18
-static u8 cap_pcr[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 22,            /* length */
-       0, 0, 0, 101,           /* TPM_ORD_GetCapability */
-       0, 0, 0, 5,
-       0, 0, 0, 4,
-       0, 0, 1, 1
-};
-
-#define READ_PCR_RESULT_SIZE 30
-static u8 pcrread[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 14,            /* length */
-       0, 0, 0, 21,            /* TPM_ORD_PcrRead */
-       0, 0, 0, 0              /* PCR index */
-};
-
-static ssize_t show_pcrs(struct device *dev, char *buf)
-{
-       u8 data[READ_PCR_RESULT_SIZE];
-       ssize_t len;
-       int i, j, index, num_pcrs;
-       char *str = buf;
-
-       struct tpm_chip *chip =
-           pci_get_drvdata(container_of(dev, struct pci_dev, dev));
-       if (chip == NULL)
-               return -ENODEV;
-
-       memcpy(data, cap_pcr, sizeof(cap_pcr));
-       if ((len = tpm_transmit(chip, data, sizeof(data)))
-           < CAP_PCR_RESULT_SIZE)
-               return len;
-
-       num_pcrs = be32_to_cpu(*((__force __be32 *) (data + 14)));
-
-       for (i = 0; i < num_pcrs; i++) {
-               memcpy(data, pcrread, sizeof(pcrread));
-               index = cpu_to_be32(i);
-               memcpy(data + 10, &index, 4);
-               if ((len = tpm_transmit(chip, data, sizeof(data)))
-                   < READ_PCR_RESULT_SIZE)
-                       return len;
-               str += sprintf(str, "PCR-%02d: ", i);
-               for (j = 0; j < TPM_DIGEST_SIZE; j++)
-                       str += sprintf(str, "%02X ", *(data + 10 + j));
-               str += sprintf(str, "\n");
-       }
-       return str - buf;
-}
-
-static DEVICE_ATTR(pcrs, S_IRUGO, show_pcrs, NULL);
-
-#define  READ_PUBEK_RESULT_SIZE 314
-static u8 readpubek[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 30,            /* length */
-       0, 0, 0, 124,           /* TPM_ORD_ReadPubek */
-};
-
-static ssize_t show_pubek(struct device *dev, char *buf)
-{
-       u8 data[READ_PUBEK_RESULT_SIZE];
-       ssize_t len;
-       __be32 *native_val;
-       int i;
-       char *str = buf;
-
-       struct tpm_chip *chip =
-           pci_get_drvdata(container_of(dev, struct pci_dev, dev));
-       if (chip == NULL)
-               return -ENODEV;
-
-       memcpy(data, readpubek, sizeof(readpubek));
-       memset(data + sizeof(readpubek), 0, 20);        /* zero nonce */
-
-       if ((len = tpm_transmit(chip, data, sizeof(data))) <
-           READ_PUBEK_RESULT_SIZE)
-               return len;
-
-       /*
-          ignore header 10 bytes
-          algorithm 32 bits (1 == RSA )
-          encscheme 16 bits
-          sigscheme 16 bits
-          parameters (RSA 12->bytes: keybit, #primes, expbit)
-          keylenbytes 32 bits
-          256 byte modulus
-          ignore checksum 20 bytes
-        */
-
-       native_val = (__force __be32 *) (data + 34);
-
-       str +=
-           sprintf(str,
-                   "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n"
-                   "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X"
-                   " %02X %02X %02X %02X %02X %02X %02X %02X\n"
-                   "Modulus length: %d\nModulus: \n",
-                   data[10], data[11], data[12], data[13], data[14],
-                   data[15], data[16], data[17], data[22], data[23],
-                   data[24], data[25], data[26], data[27], data[28],
-                   data[29], data[30], data[31], data[32], data[33],
-                   be32_to_cpu(*native_val)
-           );
-
-       for (i = 0; i < 256; i++) {
-               str += sprintf(str, "%02X ", data[i + 39]);
-               if ((i + 1) % 16 == 0)
-                       str += sprintf(str, "\n");
-       }
-       return str - buf;
-}
-
-static DEVICE_ATTR(pubek, S_IRUGO, show_pubek, NULL);
-
-#define CAP_VER_RESULT_SIZE 18
-static u8 cap_version[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 18,            /* length */
-       0, 0, 0, 101,           /* TPM_ORD_GetCapability */
-       0, 0, 0, 6,
-       0, 0, 0, 0
-};
-
-#define CAP_MANUFACTURER_RESULT_SIZE 18
-static u8 cap_manufacturer[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 22,            /* length */
-       0, 0, 0, 101,           /* TPM_ORD_GetCapability */
-       0, 0, 0, 5,
-       0, 0, 0, 4,
-       0, 0, 1, 3
-};
-
-static ssize_t show_caps(struct device *dev, char *buf)
-{
-       u8 data[READ_PUBEK_RESULT_SIZE];
-       ssize_t len;
-       char *str = buf;
-
-       struct tpm_chip *chip =
-           pci_get_drvdata(container_of(dev, struct pci_dev, dev));
-       if (chip == NULL)
-               return -ENODEV;
-
-       memcpy(data, cap_manufacturer, sizeof(cap_manufacturer));
-
-       if ((len = tpm_transmit(chip, data, sizeof(data))) <
-           CAP_MANUFACTURER_RESULT_SIZE)
-               return len;
-
-       str += sprintf(str, "Manufacturer: 0x%x\n",
-                      be32_to_cpu(*(data + 14)));
-
-       memcpy(data, cap_version, sizeof(cap_version));
-
-       if ((len = tpm_transmit(chip, data, sizeof(data))) <
-           CAP_VER_RESULT_SIZE)
-               return len;
-
-       str +=
-           sprintf(str, "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                   (int) data[14], (int) data[15], (int) data[16],
-                   (int) data[17]);
-
-       return str - buf;
-}
-
-static DEVICE_ATTR(caps, S_IRUGO, show_caps, NULL);
-
-/*
- * Device file system interface to the TPM
- */
-int tpm_open(struct inode *inode, struct file *file)
-{
-       int rc = 0, minor = iminor(inode);
-       struct tpm_chip *chip = NULL, *pos;
-
-       spin_lock(&driver_lock);
-
-       list_for_each_entry(pos, &tpm_chip_list, list) {
-               if (pos->vendor->miscdev.minor == minor) {
-                       chip = pos;
-                       break;
-               }
-       }
-
-       if (chip == NULL) {
-               rc = -ENODEV;
-               goto err_out;
-       }
-
-       if (chip->num_opens) {
-               dev_dbg(&chip->pci_dev->dev,
-                       "Another process owns this TPM\n");
-               rc = -EBUSY;
-               goto err_out;
-       }
-
-       chip->num_opens++;
-       pci_dev_get(chip->pci_dev);
-
-       spin_unlock(&driver_lock);
-
-       chip->data_buffer = kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL);
-       if (chip->data_buffer == NULL) {
-               chip->num_opens--;
-               pci_dev_put(chip->pci_dev);
-               return -ENOMEM;
-       }
-
-       atomic_set(&chip->data_pending, 0);
-
-       file->private_data = chip;
-       return 0;
-
-err_out:
-       spin_unlock(&driver_lock);
-       return rc;
-}
-
-EXPORT_SYMBOL_GPL(tpm_open);
-
-int tpm_release(struct inode *inode, struct file *file)
-{
-       struct tpm_chip *chip = file->private_data;
-
-       file->private_data = NULL;
-
-       spin_lock(&driver_lock);
-       chip->num_opens--;
-       spin_unlock(&driver_lock);
-
-       down(&chip->timer_manipulation_mutex);
-       if (timer_pending(&chip->user_read_timer))
-               del_singleshot_timer_sync(&chip->user_read_timer);
-       else if (timer_pending(&chip->device_timer))
-               del_singleshot_timer_sync(&chip->device_timer);
-       up(&chip->timer_manipulation_mutex);
-
-       kfree(chip->data_buffer);
-       atomic_set(&chip->data_pending, 0);
-
-       pci_dev_put(chip->pci_dev);
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_release);
-
-ssize_t tpm_write(struct file * file, const char __user * buf,
-                 size_t size, loff_t * off)
-{
-       struct tpm_chip *chip = file->private_data;
-       int in_size = size, out_size;
-
-       /* cannot perform a write until the read has cleared
-          either via tpm_read or a user_read_timer timeout */
-       while (atomic_read(&chip->data_pending) != 0) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-       }
-
-       down(&chip->buffer_mutex);
-
-       if (in_size > TPM_BUFSIZE)
-               in_size = TPM_BUFSIZE;
-
-       if (copy_from_user
-           (chip->data_buffer, (void __user *) buf, in_size)) {
-               up(&chip->buffer_mutex);
-               return -EFAULT;
-       }
-
-       /* atomic tpm command send and result receive */
-       out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
-
-       atomic_set(&chip->data_pending, out_size);
-       atomic_set(&chip->data_position, 0);
-       up(&chip->buffer_mutex);
-
-       /* Set a timeout by which the reader must come claim the result */
-       down(&chip->timer_manipulation_mutex);
-       init_timer(&chip->user_read_timer);
-       chip->user_read_timer.function = user_reader_timeout;
-       chip->user_read_timer.data = (unsigned long) chip;
-       chip->user_read_timer.expires = jiffies + (60 * HZ);
-       add_timer(&chip->user_read_timer);
-       up(&chip->timer_manipulation_mutex);
-
-       return in_size;
-}
-
-EXPORT_SYMBOL_GPL(tpm_write);
-
-ssize_t tpm_read(struct file * file, char __user * buf,
-                size_t size, loff_t * off)
-{
-       struct tpm_chip *chip = file->private_data;
-       int ret_size = -ENODATA;
-       int pos, pending = 0;
-
-       down(&chip->buffer_mutex);
-       ret_size = atomic_read(&chip->data_pending);
-       if ( ret_size > 0 ) {   /* Result available */
-               if (size < ret_size)
-                       ret_size = size;
-
-               pos = atomic_read(&chip->data_position);
-
-               if (copy_to_user((void __user *) buf,
-                                &chip->data_buffer[pos], ret_size)) {
-                       ret_size = -EFAULT;
-               } else {
-                       pending = atomic_read(&chip->data_pending) - ret_size;
-                       if ( pending ) {
-                               atomic_set( &chip->data_pending, pending );
-                               atomic_set( &chip->data_position, pos+ret_size 
);
-                       }
-               }
-       }
-       up(&chip->buffer_mutex);
-
-       if ( ret_size <= 0 || pending == 0 ) {
-               atomic_set( &chip->data_pending, 0 );
-               down(&chip->timer_manipulation_mutex);
-               del_singleshot_timer_sync(&chip->user_read_timer);
-               up(&chip->timer_manipulation_mutex);
-       }
-
-       return ret_size;
-}
-
-EXPORT_SYMBOL_GPL(tpm_read);
-
-void __devexit tpm_remove(struct pci_dev *pci_dev)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
-
-       if (chip == NULL) {
-               dev_err(&pci_dev->dev, "No device data found\n");
-               return;
-       }
-
-       spin_lock(&driver_lock);
-
-       list_del(&chip->list);
-
-       spin_unlock(&driver_lock);
-
-       pci_set_drvdata(pci_dev, NULL);
-       misc_deregister(&chip->vendor->miscdev);
-
-       device_remove_file(&pci_dev->dev, &dev_attr_pubek);
-       device_remove_file(&pci_dev->dev, &dev_attr_pcrs);
-       device_remove_file(&pci_dev->dev, &dev_attr_caps);
-
-       pci_disable_device(pci_dev);
-
-       dev_mask[chip->dev_num / 32] &= !(1 << (chip->dev_num % 32));
-
-       kfree(chip);
-
-       pci_dev_put(pci_dev);
-}
-
-EXPORT_SYMBOL_GPL(tpm_remove);
-
-static u8 savestate[] = {
-       0, 193,                 /* TPM_TAG_RQU_COMMAND */
-       0, 0, 0, 10,            /* blob length (in bytes) */
-       0, 0, 0, 152            /* TPM_ORD_SaveState */
-};
-
-/*
- * We are about to suspend. Save the TPM state
- * so that it can be restored.
- */
-int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
-       if (chip == NULL)
-               return -ENODEV;
-
-       tpm_transmit(chip, savestate, sizeof(savestate));
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_pm_suspend);
-
-/*
- * Resume from a power safe. The BIOS already restored
- * the TPM state.
- */
-int tpm_pm_resume(struct pci_dev *pci_dev)
-{
-       struct tpm_chip *chip = pci_get_drvdata(pci_dev);
-
-       if (chip == NULL)
-               return -ENODEV;
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_pm_resume);
-
-/*
- * Called from tpm_<specific>.c probe function only for devices
- * the driver has determined it should claim.  Prior to calling
- * this function the specific probe function has called pci_enable_device
- * upon errant exit from this function specific probe function should call
- * pci_disable_device
- */
-int tpm_register_hardware(struct pci_dev *pci_dev,
-                         struct tpm_vendor_specific *entry)
-{
-       char devname[7];
-       struct tpm_chip *chip;
-       int i, j;
-
-       /* Driver specific per-device data */
-       chip = kmalloc(sizeof(*chip), GFP_KERNEL);
-       if (chip == NULL)
-               return -ENOMEM;
-
-       memset(chip, 0, sizeof(struct tpm_chip));
-
-       init_MUTEX(&chip->buffer_mutex);
-       init_MUTEX(&chip->tpm_mutex);
-       init_MUTEX(&chip->timer_manipulation_mutex);
-       INIT_LIST_HEAD(&chip->list);
-
-       chip->vendor = entry;
-
-       chip->dev_num = -1;
-
-       for (i = 0; i < 32; i++)
-               for (j = 0; j < 8; j++)
-                       if ((dev_mask[i] & (1 << j)) == 0) {
-                               chip->dev_num = i * 32 + j;
-                               dev_mask[i] |= 1 << j;
-                               goto dev_num_search_complete;
-                       }
-
-dev_num_search_complete:
-       if (chip->dev_num < 0) {
-               dev_err(&pci_dev->dev,
-                       "No available tpm device numbers\n");
-               kfree(chip);
-               return -ENODEV;
-       } else if (chip->dev_num == 0)
-               chip->vendor->miscdev.minor = TPM_MINOR;
-       else
-               chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR;
-
-       snprintf(devname, sizeof(devname), "%s%d", "tpm", chip->dev_num);
-       chip->vendor->miscdev.name = devname;
-
-       chip->vendor->miscdev.dev = &(pci_dev->dev);
-       chip->pci_dev = pci_dev_get(pci_dev);
-
-       if (misc_register(&chip->vendor->miscdev)) {
-               dev_err(&chip->pci_dev->dev,
-                       "unable to misc_register %s, minor %d\n",
-                       chip->vendor->miscdev.name,
-                       chip->vendor->miscdev.minor);
-               pci_dev_put(pci_dev);
-               kfree(chip);
-               dev_mask[i] &= !(1 << j);
-               return -ENODEV;
-       }
-
-       pci_set_drvdata(pci_dev, chip);
-
-       list_add(&chip->list, &tpm_chip_list);
-
-       device_create_file(&pci_dev->dev, &dev_attr_pubek);
-       device_create_file(&pci_dev->dev, &dev_attr_pcrs);
-       device_create_file(&pci_dev->dev, &dev_attr_caps);
-
-       return 0;
-}
-
-EXPORT_SYMBOL_GPL(tpm_register_hardware);
-
-static int __init init_tpm(void)
-{
-       return 0;
-}
-
-static void __exit cleanup_tpm(void)
-{
-
-}
-
-module_init(init_tpm);
-module_exit(cleanup_tpm);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
diff -r 97dbd9524a7e -r 06d84bf87159 linux-2.6-xen-sparse/drivers/char/tpm/tpm.h
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h       Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- */
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-
-#define TPM_TIMEOUT msecs_to_jiffies(5)
-
-/* TPM addresses */
-#define        TPM_ADDR                        0x4E
-#define        TPM_DATA                        0x4F
-
-struct tpm_chip;
-
-struct tpm_vendor_specific {
-       u8 req_complete_mask;
-       u8 req_complete_val;
-       u16 base;               /* TPM base address */
-
-       int (*recv) (struct tpm_chip *, u8 *, size_t);
-       int (*send) (struct tpm_chip *, u8 *, size_t);
-       void (*cancel) (struct tpm_chip *);
-       struct miscdevice miscdev;
-};
-
-struct tpm_chip {
-       struct pci_dev *pci_dev;        /* PCI device stuff */
-
-       int dev_num;            /* /dev/tpm# */
-       int num_opens;          /* only one allowed */
-       int time_expired;
-
-       /* Data passed to and from the tpm via the read/write calls */
-       u8 *data_buffer;
-       atomic_t data_pending;
-       atomic_t data_position;
-       struct semaphore buffer_mutex;
-
-       struct timer_list user_read_timer;      /* user needs to claim result */
-       struct semaphore tpm_mutex;     /* tpm is processing */
-       struct timer_list device_timer; /* tpm is processing */
-       struct semaphore timer_manipulation_mutex;
-
-       struct tpm_vendor_specific *vendor;
-
-       struct list_head list;
-};
-
-static inline int tpm_read_index(int index)
-{
-       outb(index, TPM_ADDR);
-       return inb(TPM_DATA) & 0xFF;
-}
-
-static inline void tpm_write_index(int index, int value)
-{
-       outb(index, TPM_ADDR);
-       outb(value & 0xFF, TPM_DATA);
-}
-
-extern void tpm_time_expired(unsigned long);
-extern int tpm_register_hardware(struct pci_dev *,
-                                struct tpm_vendor_specific *);
-extern int tpm_open(struct inode *, struct file *);
-extern int tpm_release(struct inode *, struct file *);
-extern ssize_t tpm_write(struct file *, const char __user *, size_t,
-                        loff_t *);
-extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *);
-extern void __devexit tpm_remove(struct pci_dev *);
-extern int tpm_pm_suspend(struct pci_dev *, pm_message_t);
-extern int tpm_pm_resume(struct pci_dev *);
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- */
-
-#include "tpm.h"
-
-/* Atmel definitions */
-enum tpm_atmel_addr {
-       TPM_ATMEL_BASE_ADDR_LO = 0x08,
-       TPM_ATMEL_BASE_ADDR_HI = 0x09
-};
-
-/* write status bits */
-#define        ATML_STATUS_ABORT               0x01
-#define        ATML_STATUS_LASTBYTE            0x04
-
-/* read status bits */
-#define        ATML_STATUS_BUSY                0x01
-#define        ATML_STATUS_DATA_AVAIL          0x02
-#define        ATML_STATUS_REWRITE             0x04
-
-
-static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       u8 status, *hdr = buf;
-       u32 size;
-       int i;
-       __be32 *native_size;
-
-       /* start reading header */
-       if (count < 6)
-               return -EIO;
-
-       for (i = 0; i < 6; i++) {
-               status = inb(chip->vendor->base + 1);
-               if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "error reading header\n");
-                       return -EIO;
-               }
-               *buf++ = inb(chip->vendor->base);
-       }
-
-       /* size of the data received */
-       native_size = (__force __be32 *) (hdr + 2);
-       size = be32_to_cpu(*native_size);
-
-       if (count < size) {
-               dev_err(&chip->pci_dev->dev,
-                       "Recv size(%d) less than available space\n", size);
-               for (; i < size; i++) { /* clear the waiting data anyway */
-                       status = inb(chip->vendor->base + 1);
-                       if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                               dev_err(&chip->pci_dev->dev,
-                                       "error reading data\n");
-                               return -EIO;
-                       }
-               }
-               return -EIO;
-       }
-
-       /* read all the data available */
-       for (; i < size; i++) {
-               status = inb(chip->vendor->base + 1);
-               if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "error reading data\n");
-                       return -EIO;
-               }
-               *buf++ = inb(chip->vendor->base);
-       }
-
-       /* make sure data available is gone */
-       status = inb(chip->vendor->base + 1);
-       if (status & ATML_STATUS_DATA_AVAIL) {
-               dev_err(&chip->pci_dev->dev, "data available is stuck\n");
-               return -EIO;
-       }
-
-       return size;
-}
-
-static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       int i;
-
-       dev_dbg(&chip->pci_dev->dev, "tpm_atml_send: ");
-       for (i = 0; i < count; i++) {
-               dev_dbg(&chip->pci_dev->dev, "0x%x(%d) ", buf[i], buf[i]);
-               outb(buf[i], chip->vendor->base);
-       }
-
-       return count;
-}
-
-static void tpm_atml_cancel(struct tpm_chip *chip)
-{
-       outb(ATML_STATUS_ABORT, chip->vendor->base + 1);
-}
-
-static struct file_operations atmel_ops = {
-       .owner = THIS_MODULE,
-       .llseek = no_llseek,
-       .open = tpm_open,
-       .read = tpm_read,
-       .write = tpm_write,
-       .release = tpm_release,
-};
-
-static struct tpm_vendor_specific tpm_atmel = {
-       .recv = tpm_atml_recv,
-       .send = tpm_atml_send,
-       .cancel = tpm_atml_cancel,
-       .req_complete_mask = ATML_STATUS_BUSY | ATML_STATUS_DATA_AVAIL,
-       .req_complete_val = ATML_STATUS_DATA_AVAIL,
-       .miscdev = { .fops = &atmel_ops, },
-};
-
-static int __devinit tpm_atml_init(struct pci_dev *pci_dev,
-                                  const struct pci_device_id *pci_id)
-{
-       u8 version[4];
-       int rc = 0;
-       int lo, hi;
-
-       if (pci_enable_device(pci_dev))
-               return -EIO;
-
-       lo = tpm_read_index( TPM_ATMEL_BASE_ADDR_LO );
-       hi = tpm_read_index( TPM_ATMEL_BASE_ADDR_HI );
-
-       tpm_atmel.base = (hi<<8)|lo;
-       dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base);
-
-       /* verify that it is an Atmel part */
-       if (tpm_read_index(4) != 'A' || tpm_read_index(5) != 'T'
-           || tpm_read_index(6) != 'M' || tpm_read_index(7) != 'L') {
-               rc = -ENODEV;
-               goto out_err;
-       }
-
-       /* query chip for its version number */
-       if ((version[0] = tpm_read_index(0x00)) != 0xFF) {
-               version[1] = tpm_read_index(0x01);
-               version[2] = tpm_read_index(0x02);
-               version[3] = tpm_read_index(0x03);
-       } else {
-               dev_info(&pci_dev->dev, "version query failed\n");
-               rc = -ENODEV;
-               goto out_err;
-       }
-
-       if ((rc = tpm_register_hardware(pci_dev, &tpm_atmel)) < 0)
-               goto out_err;
-
-       dev_info(&pci_dev->dev,
-                "Atmel TPM version %d.%d.%d.%d\n", version[0], version[1],
-                version[2], version[3]);
-
-       return 0;
-out_err:
-       pci_disable_device(pci_dev);
-       return rc;
-}
-
-static struct pci_device_id tpm_pci_tbl[] __devinitdata = {
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)},
-       {0,}
-};
-
-MODULE_DEVICE_TABLE(pci, tpm_pci_tbl);
-
-static struct pci_driver atmel_pci_driver = {
-       .name = "tpm_atmel",
-       .id_table = tpm_pci_tbl,
-       .probe = tpm_atml_init,
-       .remove = __devexit_p(tpm_remove),
-       .suspend = tpm_pm_suspend,
-       .resume = tpm_pm_resume,
-};
-
-static int __init init_atmel(void)
-{
-       return pci_register_driver(&atmel_pci_driver);
-}
-
-static void __exit cleanup_atmel(void)
-{
-       pci_unregister_driver(&atmel_pci_driver);
-}
-
-module_init(init_atmel);
-module_exit(cleanup_atmel);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c   Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,377 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Authors:
- * Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
- * Dave Safford <safford@xxxxxxxxxxxxxx>
- * Reiner Sailer <sailer@xxxxxxxxxxxxxx>
- * Kylene Hall <kjhall@xxxxxxxxxx>
- *
- * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx>
- *
- * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, version 2 of the
- * License.
- *
- */
-
-#include "tpm.h"
-
-/* National definitions */
-#define        TPM_NSC_BASE                    0x360
-#define        TPM_NSC_IRQ                     0x07
-#define        TPM_NSC_BASE0_HI                0x60
-#define        TPM_NSC_BASE0_LO                0x61
-#define        TPM_NSC_BASE1_HI                0x62
-#define        TPM_NSC_BASE1_LO                0x63
-
-#define        NSC_LDN_INDEX                   0x07
-#define        NSC_SID_INDEX                   0x20
-#define        NSC_LDC_INDEX                   0x30
-#define        NSC_DIO_INDEX                   0x60
-#define        NSC_CIO_INDEX                   0x62
-#define        NSC_IRQ_INDEX                   0x70
-#define        NSC_ITS_INDEX                   0x71
-
-#define        NSC_STATUS                      0x01
-#define        NSC_COMMAND                     0x01
-#define        NSC_DATA                        0x00
-
-/* status bits */
-#define        NSC_STATUS_OBF                  0x01    /* output buffer full */
-#define        NSC_STATUS_IBF                  0x02    /* input buffer full */
-#define        NSC_STATUS_F0                   0x04    /* F0 */
-#define        NSC_STATUS_A2                   0x08    /* A2 */
-#define        NSC_STATUS_RDY                  0x10    /* ready to receive 
command */
-#define        NSC_STATUS_IBR                  0x20    /* ready to receive 
data */
-
-/* command bits */
-#define        NSC_COMMAND_NORMAL              0x01    /* normal mode */
-#define        NSC_COMMAND_EOC                 0x03
-#define        NSC_COMMAND_CANCEL              0x22
-
-/*
- * Wait for a certain status to appear
- */
-static int wait_for_stat(struct tpm_chip *chip, u8 mask, u8 val, u8 * data)
-{
-       int expired = 0;
-       struct timer_list status_timer =
-           TIMER_INITIALIZER(tpm_time_expired, jiffies + 10 * HZ,
-                             (unsigned long) &expired);
-
-       /* status immediately available check */
-       *data = inb(chip->vendor->base + NSC_STATUS);
-       if ((*data & mask) == val)
-               return 0;
-
-       /* wait for status */
-       add_timer(&status_timer);
-       do {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-               *data = inb(chip->vendor->base + 1);
-               if ((*data & mask) == val) {
-                       del_singleshot_timer_sync(&status_timer);
-                       return 0;
-               }
-       }
-       while (!expired);
-
-       return -EBUSY;
-}
-
-static int nsc_wait_for_ready(struct tpm_chip *chip)
-{
-       int status;
-       int expired = 0;
-       struct timer_list status_timer =
-           TIMER_INITIALIZER(tpm_time_expired, jiffies + 100,
-                             (unsigned long) &expired);
-
-       /* status immediately available check */
-       status = inb(chip->vendor->base + NSC_STATUS);
-       if (status & NSC_STATUS_OBF)
-               status = inb(chip->vendor->base + NSC_DATA);
-       if (status & NSC_STATUS_RDY)
-               return 0;
-
-       /* wait for status */
-       add_timer(&status_timer);
-       do {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(TPM_TIMEOUT);
-               status = inb(chip->vendor->base + NSC_STATUS);
-               if (status & NSC_STATUS_OBF)
-                       status = inb(chip->vendor->base + NSC_DATA);
-               if (status & NSC_STATUS_RDY) {
-                       del_singleshot_timer_sync(&status_timer);
-                       return 0;
-               }
-       }
-       while (!expired);
-
-       dev_info(&chip->pci_dev->dev, "wait for ready failed\n");
-       return -EBUSY;
-}
-
-
-static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       u8 *buffer = buf;
-       u8 data, *p;
-       u32 size;
-       __be32 *native_size;
-
-       if (count < 6)
-               return -EIO;
-
-       if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "F0 timeout\n");
-               return -EIO;
-       }
-       if ((data =
-            inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_NORMAL) {
-               dev_err(&chip->pci_dev->dev, "not in normal mode (0x%x)\n",
-                       data);
-               return -EIO;
-       }
-
-       /* read the whole packet */
-       for (p = buffer; p < &buffer[count]; p++) {
-               if (wait_for_stat
-                   (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "OBF timeout (while reading data)\n");
-                       return -EIO;
-               }
-               if (data & NSC_STATUS_F0)
-                       break;
-               *p = inb(chip->vendor->base + NSC_DATA);
-       }
-
-       if ((data & NSC_STATUS_F0) == 0) {
-               dev_err(&chip->pci_dev->dev, "F0 not set\n");
-               return -EIO;
-       }
-       if ((data = inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_EOC) {
-               dev_err(&chip->pci_dev->dev,
-                       "expected end of command(0x%x)\n", data);
-               return -EIO;
-       }
-
-       native_size = (__force __be32 *) (buf + 2);
-       size = be32_to_cpu(*native_size);
-
-       if (count < size)
-               return -EIO;
-
-       return size;
-}
-
-static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count)
-{
-       u8 data;
-       int i;
-
-       /*
-        * If we hit the chip with back to back commands it locks up
-        * and never set IBF. Hitting it with this "hammer" seems to
-        * fix it. Not sure why this is needed, we followed the flow
-        * chart in the manual to the letter.
-        */
-       outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND);
-
-       if (nsc_wait_for_ready(chip) != 0)
-               return -EIO;
-
-       if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "IBF timeout\n");
-               return -EIO;
-       }
-
-       outb(NSC_COMMAND_NORMAL, chip->vendor->base + NSC_COMMAND);
-       if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "IBR timeout\n");
-               return -EIO;
-       }
-
-       for (i = 0; i < count; i++) {
-               if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-                       dev_err(&chip->pci_dev->dev,
-                               "IBF timeout (while writing data)\n");
-                       return -EIO;
-               }
-               outb(buf[i], chip->vendor->base + NSC_DATA);
-       }
-
-       if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) {
-               dev_err(&chip->pci_dev->dev, "IBF timeout\n");
-               return -EIO;
-       }
-       outb(NSC_COMMAND_EOC, chip->vendor->base + NSC_COMMAND);
-
-       return count;
-}
-
-static void tpm_nsc_cancel(struct tpm_chip *chip)
-{
-       outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND);
-}
-
-static struct file_operations nsc_ops = {
-       .owner = THIS_MODULE,
-       .llseek = no_llseek,
-       .open = tpm_open,
-       .read = tpm_read,
-       .write = tpm_write,
-       .release = tpm_release,
-};
-
-static struct tpm_vendor_specific tpm_nsc = {
-       .recv = tpm_nsc_recv,
-       .send = tpm_nsc_send,
-       .cancel = tpm_nsc_cancel,
-       .req_complete_mask = NSC_STATUS_OBF,
-       .req_complete_val = NSC_STATUS_OBF,
-       .miscdev = { .fops = &nsc_ops, },
-
-};
-
-static int __devinit tpm_nsc_init(struct pci_dev *pci_dev,
-                                 const struct pci_device_id *pci_id)
-{
-       int rc = 0;
-       int lo, hi;
-
-       hi = tpm_read_index(TPM_NSC_BASE0_HI);
-       lo = tpm_read_index(TPM_NSC_BASE0_LO);
-
-       tpm_nsc.base = (hi<<8) | lo;
-
-       if (pci_enable_device(pci_dev))
-               return -EIO;
-
-       /* verify that it is a National part (SID) */
-       if (tpm_read_index(NSC_SID_INDEX) != 0xEF) {
-               rc = -ENODEV;
-               goto out_err;
-       }
-
-       dev_dbg(&pci_dev->dev, "NSC TPM detected\n");
-       dev_dbg(&pci_dev->dev,
-               "NSC LDN 0x%x, SID 0x%x, SRID 0x%x\n",
-               tpm_read_index(0x07), tpm_read_index(0x20),
-               tpm_read_index(0x27));
-       dev_dbg(&pci_dev->dev,
-               "NSC SIOCF1 0x%x SIOCF5 0x%x SIOCF6 0x%x SIOCF8 0x%x\n",
-               tpm_read_index(0x21), tpm_read_index(0x25),
-               tpm_read_index(0x26), tpm_read_index(0x28));
-       dev_dbg(&pci_dev->dev, "NSC IO Base0 0x%x\n",
-               (tpm_read_index(0x60) << 8) | tpm_read_index(0x61));
-       dev_dbg(&pci_dev->dev, "NSC IO Base1 0x%x\n",
-               (tpm_read_index(0x62) << 8) | tpm_read_index(0x63));
-       dev_dbg(&pci_dev->dev, "NSC Interrupt number and wakeup 0x%x\n",
-               tpm_read_index(0x70));
-       dev_dbg(&pci_dev->dev, "NSC IRQ type select 0x%x\n",
-               tpm_read_index(0x71));
-       dev_dbg(&pci_dev->dev,
-               "NSC DMA channel select0 0x%x, select1 0x%x\n",
-               tpm_read_index(0x74), tpm_read_index(0x75));
-       dev_dbg(&pci_dev->dev,
-               "NSC Config "
-               "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
-               tpm_read_index(0xF0), tpm_read_index(0xF1),
-               tpm_read_index(0xF2), tpm_read_index(0xF3),
-               tpm_read_index(0xF4), tpm_read_index(0xF5),
-               tpm_read_index(0xF6), tpm_read_index(0xF7),
-               tpm_read_index(0xF8), tpm_read_index(0xF9));
-
-       dev_info(&pci_dev->dev,
-                "NSC PC21100 TPM revision %d\n",
-                tpm_read_index(0x27) & 0x1F);
-
-       if (tpm_read_index(NSC_LDC_INDEX) == 0)
-               dev_info(&pci_dev->dev, ": NSC TPM not active\n");
-
-       /* select PM channel 1 */
-       tpm_write_index(NSC_LDN_INDEX, 0x12);
-       tpm_read_index(NSC_LDN_INDEX);
-
-       /* disable the DPM module */
-       tpm_write_index(NSC_LDC_INDEX, 0);
-       tpm_read_index(NSC_LDC_INDEX);
-
-       /* set the data register base addresses */
-       tpm_write_index(NSC_DIO_INDEX, TPM_NSC_BASE >> 8);
-       tpm_write_index(NSC_DIO_INDEX + 1, TPM_NSC_BASE);
-       tpm_read_index(NSC_DIO_INDEX);
-       tpm_read_index(NSC_DIO_INDEX + 1);
-
-       /* set the command register base addresses */
-       tpm_write_index(NSC_CIO_INDEX, (TPM_NSC_BASE + 1) >> 8);
-       tpm_write_index(NSC_CIO_INDEX + 1, (TPM_NSC_BASE + 1));
-       tpm_read_index(NSC_DIO_INDEX);
-       tpm_read_index(NSC_DIO_INDEX + 1);
-
-       /* set the interrupt number to be used for the host interface */
-       tpm_write_index(NSC_IRQ_INDEX, TPM_NSC_IRQ);
-       tpm_write_index(NSC_ITS_INDEX, 0x00);
-       tpm_read_index(NSC_IRQ_INDEX);
-
-       /* enable the DPM module */
-       tpm_write_index(NSC_LDC_INDEX, 0x01);
-       tpm_read_index(NSC_LDC_INDEX);
-
-       if ((rc = tpm_register_hardware(pci_dev, &tpm_nsc)) < 0)
-               goto out_err;
-
-       return 0;
-
-out_err:
-       pci_disable_device(pci_dev);
-       return rc;
-}
-
-static struct pci_device_id tpm_pci_tbl[] __devinitdata = {
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)},
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)},
-       {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)},
-       {0,}
-};
-
-MODULE_DEVICE_TABLE(pci, tpm_pci_tbl);
-
-static struct pci_driver nsc_pci_driver = {
-       .name = "tpm_nsc",
-       .id_table = tpm_pci_tbl,
-       .probe = tpm_nsc_init,
-       .remove = __devexit_p(tpm_remove),
-       .suspend = tpm_pm_suspend,
-       .resume = tpm_pm_resume,
-};
-
-static int __init init_nsc(void)
-{
-       return pci_register_driver(&nsc_pci_driver);
-}
-
-static void __exit cleanup_nsc(void)
-{
-       pci_unregister_driver(&nsc_pci_driver);
-}
-
-module_init(init_nsc);
-module_exit(cleanup_nsc);
-
-MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)");
-MODULE_DESCRIPTION("TPM Driver");
-MODULE_VERSION("2.0");
-MODULE_LICENSE("GPL");
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,229 +0,0 @@
-/*
- * blktap.h
- * 
- * Interfaces for the Xen block tap driver.
- * 
- * (c) 2004, Andrew Warfield, University of Cambridge
- * 
- */
-
-#ifndef __BLKTAP_H__
-#define __BLKTAP_H__
-
-#include <linux/version.h>
-#include <linux/blkdev.h>
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm/hypervisor.h>
-#include <asm-xen/xen-public/io/blkif.h>
-#include <asm-xen/xen-public/io/ring.h>
-
-/* Used to signal to the backend that this is a tap domain. */
-#define BLKTAP_COOKIE 0xbeadfeed
-
-/* -------[ debug / pretty printing ]--------------------------------- */
-
-#define PRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#if 0
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#if 1
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
-
-
-/* -------[ state descriptors ]--------------------------------------- */
-
-#define BLKIF_STATE_CLOSED       0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED    2
-
-/* -------[ connection tracking ]------------------------------------- */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-extern spinlock_t blkif_io_lock;
-
-typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t             domid;
-    unsigned int        handle;
-    /* Physical parameters of the comms window. */
-    unsigned long       shmem_frame;
-    unsigned int        evtchn;
-    /* Comms information. */
-    blkif_back_ring_t   blk_ring;
-    
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */    
-    u8                  disconnect_rspid;
-    struct blkif_st    *hash_next;
-    struct list_head    blkdev_list;
-    spinlock_t          blk_ring_lock;
-    atomic_t            refcnt;
-    struct work_struct work;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    u16 shmem_handle;
-    unsigned long shmem_vaddr;
-    grant_ref_t shmem_ref;
-#endif
-} blkif_t;
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-void blkif_disconnect_complete(blkif_t *blkif);
-#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define blkif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            blkif_disconnect_complete(_b);        \
-    } while (0)
-
-
-/* -------[ active request tracking ]--------------------------------- */
-
-typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    int            next_free;
-} active_req_t;
-
-typedef unsigned int ACTIVE_RING_IDX;
-
-active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
-
-extern inline unsigned int ID_TO_IDX(unsigned long id) 
-{ 
-    return ( id & 0x0000ffff );
-}
-
-extern inline domid_t ID_TO_DOM(unsigned long id) 
-{ 
-    return (id >> 16); 
-}
-
-void active_reqs_init(void);
-
-/* -------[ interposition -> character device interface ]------------- */
-
-/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-#define BLKTAP_IOCTL_PRINT_IDXS      100  
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-}
-
-
-
-/* -------[ Mappings to User VMA ]------------------------------------ */
-#define BATCH_PER_DOMAIN 16
-
-/* -------[ Here be globals ]----------------------------------------- */
-extern unsigned long blktap_mode;
-
-/* Connection to a single backend domain. */
-extern blkif_front_ring_t blktap_be_ring;
-extern unsigned int blktap_be_evtchn;
-extern unsigned int blktap_be_state;
-
-/* User ring status. */
-extern unsigned long blktap_ring_ok;
-
-/* -------[ ...and function prototypes. ]----------------------------- */
-
-/* init function for character device interface.                       */
-int blktap_init(void);
-
-/* init function for the blkif cache. */
-void __init blkif_interface_init(void);
-void __init blkdev_schedule_init(void);
-void blkif_deschedule(blkif_t *blkif);
-
-/* interfaces to the char driver, passing messages to and from apps.   */
-void blktap_kick_user(void);
-
-/* user ring access functions: */
-int blktap_write_fe_ring(blkif_request_t *req);
-int blktap_write_be_ring(blkif_response_t *rsp);
-int blktap_write_ctrl_ring(ctrl_msg_t *msg);
-
-/* fe/be ring access functions: */
-int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
-int write_req_to_be_ring(blkif_request_t *req);
-
-/* event notification functions */
-void kick_fe_domain(blkif_t *blkif);
-void kick_be_domain(void);
-
-/* Interrupt handlers. */
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
-                                  struct pt_regs *ptregs);
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
-
-/* Control message receiver. */
-extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
-
-/* debug */
-void print_fe_ring_idxs(void);
-void print_be_ring_idxs(void);
-        
-#define __BLKINT_H__
-#endif
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,83 +0,0 @@
-
-#ifndef __USBIF__BACKEND__COMMON_H__
-#define __USBIF__BACKEND__COMMON_H__
-
-#include <linux/config.h>
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/rbtree.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm/hypervisor.h>
-
-#include <asm-xen/xen-public/io/usbif.h>
-
-#if 0
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-typedef struct usbif_priv_st usbif_priv_t;
-
-struct usbif_priv_st {
-    /* Unique identifier for this interface. */
-    domid_t          domid;
-    unsigned int     handle;
-    /* Physical parameters of the comms window. */
-    unsigned long    shmem_frame;
-    unsigned int     evtchn;
-    /* Comms Information */
-    usbif_back_ring_t usb_ring;
-    /* Private fields. */
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */
-    u8                   disconnect_rspid;
-    usbif_priv_t        *hash_next;
-    struct list_head     usbif_list;
-    spinlock_t           usb_ring_lock;
-    atomic_t             refcnt;
-
-    struct work_struct work;
-};
-
-void usbif_create(usbif_be_create_t *create);
-void usbif_destroy(usbif_be_destroy_t *destroy);
-void usbif_connect(usbif_be_connect_t *connect);
-int  usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id);
-void usbif_disconnect_complete(usbif_priv_t *up);
-
-void usbif_release_port(usbif_be_release_port_t *msg);
-int usbif_claim_port(usbif_be_claim_port_t *msg);
-void usbif_release_ports(usbif_priv_t *up);
-
-usbif_priv_t *usbif_find(domid_t domid);
-#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define usbif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            usbif_disconnect_complete(_b);        \
-    } while (0)
-
-
-void usbif_interface_init(void);
-void usbif_ctrlif_init(void);
-
-void usbif_deschedule(usbif_priv_t *up);
-void remove_from_usbif_list(usbif_priv_t *up);
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-
-#endif /* __USBIF__BACKEND__COMMON_H__ */
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/control.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/control.c        Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,61 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/control.c
- * 
- * Routines for interfacing with the control plane.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype);
-    
-    switch ( msg->subtype )
-    {
-    case CMSG_USBIF_BE_CREATE:
-        usbif_create((usbif_be_create_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_DESTROY:
-        usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_CONNECT:
-        usbif_connect((usbif_be_connect_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_DISCONNECT:
-        if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) )
-            return; /* Sending the response is deferred until later. */
-        break;        
-    case CMSG_USBIF_BE_CLAIM_PORT:
-       usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]);
-        break;
-    case CMSG_USBIF_BE_RELEASE_PORT:
-        usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]);
-        break;
-    default:
-        DPRINTK("Parse error while reading message subtype %d, len %d\n",
-                msg->subtype, msg->length);
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-void usbif_ctrlif_init(void)
-{
-    ctrl_msg_t                       cmsg;
-    usbif_be_driver_status_changed_t st;
-
-    (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx, 
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_USBIF_BE;
-    cmsg.subtype   = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED;
-    cmsg.length    = sizeof(usbif_be_driver_status_changed_t);
-    st.status      = USBIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c      Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,242 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/interface.c
- * 
- * USB device interface management.
- * 
- * by Mark Williamson, Copyright (c) 2004
- */
-
-#include "common.h"
-
-#define USBIF_HASHSZ 1024
-#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1))
-
-static kmem_cache_t      *usbif_priv_cachep;
-static usbif_priv_t      *usbif_priv_hash[USBIF_HASHSZ];
-
-usbif_priv_t *usbif_find(domid_t domid)
-{
-    usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)];
-    while ( (up != NULL ) && ( up->domid != domid ) )
-        up = up->hash_next;
-    return up;
-}
-
-static void __usbif_disconnect_complete(void *arg)
-{
-    usbif_priv_t         *usbif = (usbif_priv_t *)arg;
-    ctrl_msg_t            cmsg;
-    usbif_be_disconnect_t disc;
-
-    /*
-     * These can't be done in usbif_disconnect() because at that point there
-     * may be outstanding requests at the device whose asynchronous responses
-     * must still be notified to the remote driver.
-     */
-    vfree(usbif->usb_ring.sring);
-
-    /* Construct the deferred response message. */
-    cmsg.type         = CMSG_USBIF_BE;
-    cmsg.subtype      = CMSG_USBIF_BE_DISCONNECT;
-    cmsg.id           = usbif->disconnect_rspid;
-    cmsg.length       = sizeof(usbif_be_disconnect_t);
-    disc.domid        = usbif->domid;
-    disc.status       = USBIF_BE_STATUS_OKAY;
-    memcpy(cmsg.msg, &disc, sizeof(disc));
-
-    /*
-     * Make sure message is constructed /before/ status change, because
-     * after the status change the 'usbif' structure could be deallocated at
-     * any time. Also make sure we send the response /after/ status change,
-     * as otherwise a subsequent CONNECT request could spuriously fail if
-     * another CPU doesn't see the status change yet.
-     */
-    mb();
-    if ( usbif->status != DISCONNECTING )
-        BUG();
-    usbif->status = DISCONNECTED;
-    mb();
-
-    /* Send the successful response. */
-    ctrl_if_send_response(&cmsg);
-}
-
-void usbif_disconnect_complete(usbif_priv_t *up)
-{
-    INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up);
-    schedule_work(&up->work);
-}
-
-void usbif_create(usbif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    usbif_priv_t **pup, *up;
-
-    if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL )
-    {
-        DPRINTK("Could not create usbif: out of memory\n");
-        create->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(up, 0, sizeof(*up));
-    up->domid  = domid;
-    up->status = DISCONNECTED;
-    spin_lock_init(&up->usb_ring_lock);
-    atomic_set(&up->refcnt, 0);
-
-    pup = &usbif_priv_hash[USBIF_HASH(domid)];
-    while ( *pup != NULL )
-    {
-        if ( (*pup)->domid == domid )
-        {
-            create->status = USBIF_BE_STATUS_INTERFACE_EXISTS;
-            kmem_cache_free(usbif_priv_cachep, up);
-            return;
-        }
-        pup = &(*pup)->hash_next;
-    }
-
-    up->hash_next = *pup;
-    *pup = up;
-
-    create->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_destroy(usbif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    usbif_priv_t  **pup, *up;
-
-    pup = &usbif_priv_hash[USBIF_HASH(domid)];
-    while ( (up = *pup) != NULL )
-    {
-        if ( up->domid == domid )
-        {
-            if ( up->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pup = &up->hash_next;
-    }
-
-    destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pup = up->hash_next;
-    usbif_release_ports(up);
-    kmem_cache_free(usbif_priv_cachep, up);
-    destroy->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_connect(usbif_be_connect_t *connect)
-{
-    domid_t       domid  = connect->domid;
-    unsigned int  evtchn = connect->evtchn;
-    unsigned long shmem_frame = connect->shmem_frame;
-    struct vm_struct *vma;
-    pgprot_t      prot;
-    int           error;
-    usbif_priv_t *up;
-    usbif_sring_t *sring;
-
-    up = usbif_find(domid);
-    if ( unlikely(up == NULL) )
-    {
-        DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n", 
-                connect->domid); 
-        connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    prot = __pgprot(_KERNPG_TABLE);
-    error = direct_remap_pfn_range(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        if ( error == -ENOMEM )
-            connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT )
-            connect->status = USBIF_BE_STATUS_MAPPING_ERROR;
-        else
-            connect->status = USBIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-
-    if ( up->status != DISCONNECTED )
-    {
-        connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
-        vfree(vma->addr);
-        return;
-    }
-
-    sring = (usbif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&up->usb_ring, sring, PAGE_SIZE);
-
-    up->evtchn        = evtchn;
-    up->shmem_frame   = shmem_frame;
-    up->status        = CONNECTED;
-    usbif_get(up);
-
-    (void)bind_evtchn_to_irqhandler(
-        evtchn, usbif_be_int, 0, "usbif-backend", up);
-
-    connect->status = USBIF_BE_STATUS_OKAY;
-}
-
-/* Remove URBs for this interface before destroying it. */
-void usbif_deschedule(usbif_priv_t *up)
-{
-    remove_from_usbif_list(up);
-}
-
-int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id)
-{
-    domid_t       domid  = disconnect->domid;
-    usbif_priv_t *up;
-
-    up = usbif_find(domid);
-    if ( unlikely(up == NULL) )
-    {
-        DPRINTK("usbif_disconnect attempted for non-existent usbif"
-                " (%u)\n", disconnect->domid); 
-        disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return 1; /* Caller will send response error message. */
-    }
-
-    if ( up->status == CONNECTED )
-    {
-        up->status = DISCONNECTING;
-        up->disconnect_rspid = rsp_id;
-        wmb(); /* Let other CPUs see the status change. */
-        unbind_evtchn_from_irqhandler(up->evtchn, up);
-       usbif_deschedule(up);
-        usbif_put(up);
-        return 0; /* Caller should not send response message. */
-    }
-
-    disconnect->status = USBIF_BE_STATUS_OKAY;
-    return 1;
-}
-
-void __init usbif_interface_init(void)
-{
-    usbif_priv_cachep = kmem_cache_create("usbif_priv_cache",
-                                         sizeof(usbif_priv_t), 
-                                         0, 0, NULL, NULL);
-    memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash));
-}
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,1068 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/main.c
- * 
- * Backend for the Xen virtual USB driver - provides an abstraction of a
- * USB host controller to the corresponding frontend driver.
- *
- * by Mark Williamson
- * Copyright (c) 2004 Intel Research Cambridge
- * Copyright (c) 2004, 2005 Mark Williamson
- *
- * Based on arch/xen/drivers/blkif/backend/main.c
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/tqueue.h>
-
-/*
- * This is rather arbitrary.
- */
-#define MAX_PENDING_REQS 4
-#define BATCH_PER_DOMAIN 1
-
-static unsigned long mmap_vstart;
-
-/* Needs to be sufficiently large that we can map the (large) buffers
- * the USB mass storage driver wants. */
-#define MMAP_PAGES_PER_REQUEST \
-    (128)
-#define MMAP_PAGES             \
-    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-
-#define MMAP_VADDR(_req,_seg)                        \
-    (mmap_vstart +                                   \
-     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
-     ((_seg) * PAGE_SIZE))
-
-
-static spinlock_t owned_ports_lock;
-LIST_HEAD(owned_ports);
-
-/* A list of these structures is used to track ownership of physical USB
- * ports. */
-typedef struct 
-{
-    usbif_priv_t     *usbif_priv;
-    char             path[16];
-    int               guest_port;
-    int enabled;
-    struct list_head  list;
-    unsigned long guest_address; /* The USB device address that has been
-                                  * assigned by the guest. */
-    int               dev_present; /* Is there a device present? */
-    struct usb_device * dev;
-    unsigned long ifaces;  /* What interfaces are present on this device? */
-} owned_port_t;
-
-
-/*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it.  The request is complete, the specified
- * domain has a response queued for it, with the saved 'id' passed back.
- */
-typedef struct {
-    usbif_priv_t       *usbif_priv;
-    unsigned long      id;
-    int                nr_pages;
-    unsigned short     operation;
-    int                status;
-} pending_req_t;
-
-/*
- * We can't allocate pending_req's in order, since they may complete out of 
- * order. We therefore maintain an allocation ring. This ring also indicates 
- * when enough work has been passed down -- at that point the allocation ring 
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock;
-
-/* NB. We use a different index type to differentiate from shared usb rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do);
-static void make_response(usbif_priv_t *usbif, unsigned long id, 
-                          unsigned short op, int st, int inband,
-                         unsigned long actual_length);
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned 
long port);
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req);    
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid);
-static owned_port_t *usbif_find_port(char *);
-
-/******************************************************************
- * PRIVATE DEBUG FUNCTIONS
- */
-
-#undef DEBUG
-#ifdef DEBUG
-
-static void dump_port(owned_port_t *p)
-{
-    printk(KERN_DEBUG "owned_port_t @ %p\n"
-          "  usbif_priv @ %p\n"
-          "  path: %s\n"
-          "  guest_port: %d\n"
-          "  guest_address: %ld\n"
-          "  dev_present: %d\n"
-          "  dev @ %p\n"
-          "  ifaces: 0x%lx\n",
-          p, p->usbif_priv, p->path, p->guest_port, p->guest_address,
-          p->dev_present, p->dev, p->ifaces);
-}
-
-
-static void dump_request(usbif_request_t *req)
-{    
-    printk(KERN_DEBUG "id = 0x%lx\n"
-          "devnum %d\n"
-          "endpoint 0x%x\n"
-          "direction %d\n"
-          "speed %d\n"
-          "pipe_type 0x%x\n"
-          "transfer_buffer 0x%lx\n"
-          "length 0x%lx\n"
-          "transfer_flags 0x%lx\n"
-          "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n"
-          "iso_schedule = 0x%lx\n"
-          "num_iso %ld\n",
-          req->id, req->devnum, req->endpoint, req->direction, req->speed,
-          req->pipe_type, req->transfer_buffer, req->length,
-          req->transfer_flags, req->setup[0], req->setup[1], req->setup[2],
-          req->setup[3], req->setup[4], req->setup[5], req->setup[6],
-          req->setup[7], req->iso_schedule, req->num_iso);
-}
-
-static void dump_urb(struct urb *urb)
-{
-    printk(KERN_DEBUG "dumping urb @ %p\n", urb);
-
-#define DUMP_URB_FIELD(name, format) \
-    printk(KERN_DEBUG "  " # name " " format "\n", urb-> name)
-    
-    DUMP_URB_FIELD(pipe, "0x%x");
-    DUMP_URB_FIELD(status, "%d");
-    DUMP_URB_FIELD(transfer_flags, "0x%x");    
-    DUMP_URB_FIELD(transfer_buffer, "%p");
-    DUMP_URB_FIELD(transfer_buffer_length, "%d");
-    DUMP_URB_FIELD(actual_length, "%d");
-}
-
-static void dump_response(usbif_response_t *resp)
-{
-    printk(KERN_DEBUG "usbback: Sending response:\n"
-          "         id = 0x%x\n"
-          "         op = %d\n"
-          "         status = %d\n"
-          "         data = %d\n"
-          "         length = %d\n",
-          resp->id, resp->op, resp->status, resp->data, resp->length);
-}
-
-#else /* DEBUG */
-
-#define dump_port(blah)     ((void)0)
-#define dump_request(blah)   ((void)0)
-#define dump_urb(blah)      ((void)0)
-#define dump_response(blah) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************
- * MEMORY MANAGEMENT
- */
-
-static void fast_flush_area(int idx, int nr_pages)
-{
-    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-    int               i;
-
-    for ( i = 0; i < nr_pages; i++ )
-    {
-       MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i),
-                               __pte(0), 0);
-    }
-
-    mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
-    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
-        BUG();
-}
-
-
-/******************************************************************
- * USB INTERFACE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head usbio_schedule_list;
-static spinlock_t usbio_schedule_list_lock;
-
-static int __on_usbif_list(usbif_priv_t *up)
-{
-    return up->usbif_list.next != NULL;
-}
-
-void remove_from_usbif_list(usbif_priv_t *up)
-{
-    unsigned long flags;
-    if ( !__on_usbif_list(up) ) return;
-    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
-    if ( __on_usbif_list(up) )
-    {
-        list_del(&up->usbif_list);
-        up->usbif_list.next = NULL;
-        usbif_put(up);
-    }
-    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-static void add_to_usbif_list_tail(usbif_priv_t *up)
-{
-    unsigned long flags;
-    if ( __on_usbif_list(up) ) return;
-    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
-    if ( !__on_usbif_list(up) && (up->status == CONNECTED) )
-    {
-        list_add_tail(&up->usbif_list, &usbio_schedule_list);
-        usbif_get(up);
-    }
-    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-void free_pending(int pending_idx)
-{
-    unsigned long flags;
-
-    /* Free the pending request. */
-    spin_lock_irqsave(&pend_prod_lock, flags);
-    pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-    spin_unlock_irqrestore(&pend_prod_lock, flags);
-}
-
-/******************************************************************
- * COMPLETION CALLBACK -- Called as urb->complete()
- */
-
-static void maybe_trigger_usbio_schedule(void);
-
-static void __end_usb_io_op(struct urb *purb)
-{
-    pending_req_t *pending_req;
-    int pending_idx;
-
-    pending_req = purb->context;
-
-    pending_idx = pending_req - pending_reqs;
-
-    ASSERT(purb->actual_length <= purb->transfer_buffer_length);
-    ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE);
-    
-    /* An error fails the entire request. */
-    if ( purb->status )
-    {
-        printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, 
purb->status);
-    }
-
-    if ( usb_pipetype(purb->pipe) == 0 )
-    {
-        int i;
-        usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, 
pending_req->nr_pages - 1);
-
-        /* If we're dealing with an iso pipe, we need to copy back the 
schedule. */
-        for ( i = 0; i < purb->number_of_packets; i++ )
-        {
-            sched[i].length = purb->iso_frame_desc[i].actual_length;
-            ASSERT(sched[i].buffer_offset ==
-                   purb->iso_frame_desc[i].offset);
-            sched[i].status = purb->iso_frame_desc[i].status;
-        }
-    }
-    
-    fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages);
-
-    kfree(purb->setup_packet);
-
-    make_response(pending_req->usbif_priv, pending_req->id,
-                 pending_req->operation, pending_req->status, 0, 
purb->actual_length);
-    usbif_put(pending_req->usbif_priv);
-
-    usb_free_urb(purb);
-
-    free_pending(pending_idx);
-    
-    rmb();
-
-    /* Check for anything still waiting in the rings, having freed a 
request... */
-    maybe_trigger_usbio_schedule();
-}
-
-/******************************************************************
- * SCHEDULER FUNCTIONS
- */
-
-static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait);
-
-static int usbio_schedule(void *arg)
-{
-    DECLARE_WAITQUEUE(wq, current);
-
-    usbif_priv_t          *up;
-    struct list_head *ent;
-
-    daemonize();
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&usbio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-             list_empty(&usbio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&usbio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                !list_empty(&usbio_schedule_list) )
-        {
-            ent = usbio_schedule_list.next;
-            up = list_entry(ent, usbif_priv_t, usbif_list);
-            usbif_get(up);
-            remove_from_usbif_list(up);
-            if ( do_usb_io_op(up, BATCH_PER_DOMAIN) )
-                add_to_usbif_list_tail(up);
-            usbif_put(up);
-        }
-    }
-}
-
-static void maybe_trigger_usbio_schedule(void)
-{
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( !list_empty(&usbio_schedule_list) )
-        wake_up(&usbio_schedule_wait);
-}
-
-
-/******************************************************************************
- * NOTIFICATION FROM GUEST OS.
- */
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-{
-    usbif_priv_t *up = dev_id;
-
-    smp_mb();
-
-    add_to_usbif_list_tail(up); 
-
-    /* Will in fact /always/ trigger an io schedule in this case. */
-    maybe_trigger_usbio_schedule();
-
-    return IRQ_HANDLED;
-}
-
-
-
-/******************************************************************
- * DOWNWARD CALLS -- These interface with the usb-device layer proper.
- */
-
-static int do_usb_io_op(usbif_priv_t *up, int max_to_do)
-{
-    usbif_back_ring_t *usb_ring = &up->usb_ring;
-    usbif_request_t *req;
-    RING_IDX i, rp;
-    int more_to_do = 0;
-
-    rp = usb_ring->sring->req_prod;
-    rmb(); /* Ensure we see queued requests up to 'rp'. */
-    
-    /* Take items off the comms ring, taking care not to overflow. */
-    for ( i = usb_ring->req_cons; 
-          (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
-
-        req = RING_GET_REQUEST(usb_ring, i);
-        
-        switch ( req->operation )
-        {
-        case USBIF_OP_PROBE:
-            dispatch_usb_probe(up, req->id, req->port);
-            break;
-
-        case USBIF_OP_IO:
-         /* Assemble an appropriate URB. */
-         dispatch_usb_io(up, req);
-          break;
-
-       case USBIF_OP_RESET:
-         dispatch_usb_reset(up, req->port);
-          break;
-
-        default:
-            DPRINTK("error: unknown USB io operation [%d]\n",
-                    req->operation);
-            make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-            break;
-        }
-    }
-
-    usb_ring->req_cons = i;
-
-    return more_to_do;
-}
-
-static owned_port_t *find_guest_port(usbif_priv_t *up, int port)
-{
-    unsigned long flags;
-    struct list_head *l;
-
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each(l, &owned_ports)
-    {
-        owned_port_t *p = list_entry(l, owned_port_t, list);
-        if(p->usbif_priv == up && p->guest_port == port)
-        {
-            spin_unlock_irqrestore(&owned_ports_lock, flags);
-            return p;
-        }
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return NULL;
-}
-
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid)
-{
-    owned_port_t *port = find_guest_port(up, portid);
-    int ret = 0;
-
-
-    /* Allowing the guest to actually reset the device causes more problems
-     * than it's worth.  We just fake it out in software but we will do a real
-     * reset when the interface is destroyed. */
-
-    dump_port(port);
-
-    port->guest_address = 0;
-    /* If there's an attached device then the port is now enabled. */
-    if ( port->dev_present )
-        port->enabled = 1;
-    else
-        port->enabled = 0;
-
-    make_response(up, 0, USBIF_OP_RESET, ret, 0, 0);
-}
-
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned 
long portid)
-{
-    owned_port_t *port = find_guest_port(up, portid);
-    int ret;
- 
-    if ( port != NULL )
-        ret = port->dev_present;
-    else
-    {
-        ret = -EINVAL;
-        printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request "
-              "(port %ld)\n", portid);
-    }
-
-    /* Probe result is sent back in-band.  Probes don't have an associated id
-     * right now... */
-    make_response(up, id, USBIF_OP_PROBE, ret, portid, 0);
-}
-
-/**
- * check_iso_schedule - safety check the isochronous schedule for an URB
- * @purb : the URB in question
- */
-static int check_iso_schedule(struct urb *purb)
-{
-    int i;
-    unsigned long total_length = 0;
-    
-    for ( i = 0; i < purb->number_of_packets; i++ )
-    {
-        struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i];
-        
-        if ( desc->offset >= purb->transfer_buffer_length
-            || ( desc->offset + desc->length) > purb->transfer_buffer_length )
-            return -EINVAL;
-
-        total_length += desc->length;
-
-        if ( total_length > purb->transfer_buffer_length )
-            return -EINVAL;
-    }
-    
-    return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req);
-
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req)
-{
-    unsigned long buffer_mach;
-    int i = 0, offset = 0,
-        pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-    pending_req_t *pending_req;
-    unsigned long  remap_prot;
-    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-    struct urb *purb = NULL;
-    owned_port_t *port;
-    unsigned char *setup;    
-
-    dump_request(req);
-
-    if ( NR_PENDING_REQS == MAX_PENDING_REQS )
-    {
-        printk(KERN_WARNING "usbback: Max requests already queued. "
-              "Giving up!\n");
-        
-        return;
-    }
-
-    port = find_port_for_request(up, req);
-
-    if ( port == NULL )
-    {
-       printk(KERN_WARNING "No such device! (%d)\n", req->devnum);
-       dump_request(req);
-
-        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
-       return;
-    }
-    else if ( !port->dev_present )
-    {
-        /* In normal operation, we'll only get here if a device is unplugged
-         * and the frontend hasn't noticed yet. */
-        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
-       return;
-    }
-        
-
-    setup = kmalloc(8, GFP_KERNEL);
-
-    if ( setup == NULL )
-        goto no_mem;
-   
-    /* Copy request out for safety. */
-    memcpy(setup, req->setup, 8);
-
-    if( setup[0] == 0x0 && setup[1] == 0x5)
-    {
-        /* To virtualise the USB address space, we need to intercept
-         * set_address messages and emulate.  From the USB specification:
-         * bmRequestType = 0x0;
-         * Brequest = SET_ADDRESS (i.e. 0x5)
-         * wValue = device address
-         * wIndex = 0
-         * wLength = 0
-         * data = None
-         */
-        /* Store into the guest transfer buffer using cpu_to_le16 */
-        port->guest_address = le16_to_cpu(*(u16 *)(setup + 2));
-        /* Make a successful response.  That was easy! */
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-       kfree(setup);
-        return;
-    }
-    else if ( setup[0] == 0x0 && setup[1] == 0x9 )
-    {
-        /* The host kernel needs to know what device configuration is in use
-         * because various error checks get confused otherwise.  We just do
-         * configuration settings here, under controlled conditions.
-         */
-
-      /* Ignore configuration setting and hope that the host kernel
-        did it right. */
-        /* usb_set_configuration(port->dev, setup[2]); */
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-        kfree(setup);
-        return;
-    }
-    else if ( setup[0] == 0x1 && setup[1] == 0xB )
-    {
-        /* The host kernel needs to know what device interface is in use
-         * because various error checks get confused otherwise.  We just do
-         * configuration settings here, under controlled conditions.
-         */
-        usb_set_interface(port->dev, (setup[4] | setup[5] << 8),
-                          (setup[2] | setup[3] << 8) );
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-        kfree(setup);
-        return;
-    }
-
-    if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK)
-          + req->length )
-        > MMAP_PAGES_PER_REQUEST * PAGE_SIZE )
-    {
-        printk(KERN_WARNING "usbback: request of %lu bytes too large\n",
-              req->length);
-        make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-        kfree(setup);
-        return;
-    }
-    
-    buffer_mach = req->transfer_buffer;
-
-    if( buffer_mach == 0 )
-       goto no_remap;
-
-    ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST);
-    ASSERT(buffer_mach);
-
-    /* Always map writeable for now. */
-    remap_prot = _KERNPG_TABLE;
-
-    for ( i = 0, offset = 0; offset < req->length;
-          i++, offset += PAGE_SIZE )
-    {
-       MULTI_update_va_mapping_otherdomain(
-           mcl+i, MMAP_VADDR(pending_idx, i),
-           pfn_pte_ma((buffer_mach + offset) >> PAGE_SHIFT, remap_prot),
-           0, up->domid);
-        
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
-
-        ASSERT(virt_to_mfn(MMAP_VADDR(pending_idx, i))
-               == ((buffer_mach >> PAGE_SHIFT) + i));
-    }
-
-    if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
-    {
-        /* Map in ISO schedule, if necessary. */
-       MULTI_update_va_mapping_otherdomain(
-           mcl+i, MMAP_VADDR(pending_idx, i),
-           pfn_pte_ma(req->iso_schedule >> PAGE_SHIFT, remap_prot),
-           0, up->domid);
-
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
-    
-        i++;
-    }
-
-    if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) )
-        BUG();
-    
-    {
-        int j;
-        for ( j = 0; j < i; j++ )
-        {
-            if ( unlikely(mcl[j].result != 0) )
-            {
-                printk(KERN_WARNING
-                      "invalid buffer %d -- could not remap it\n", j);
-                fast_flush_area(pending_idx, i);
-                goto bad_descriptor;
-            }
-       }
-    }
-    
- no_remap:
-
-    ASSERT(i <= MMAP_PAGES_PER_REQUEST);
-    ASSERT(i * PAGE_SIZE >= req->length);
-
-    /* We have to do this because some things might complete out of order. */
-    pending_req = &pending_reqs[pending_idx];
-    pending_req->usbif_priv= up;
-    pending_req->id        = req->id;
-    pending_req->operation = req->operation;
-    pending_req->nr_pages  = i;
-
-    pending_cons++;
-
-    usbif_get(up);
-    
-    /* Fill out an actual request for the USB layer. */
-    purb = usb_alloc_urb(req->num_iso);
-
-    if ( purb == NULL )
-    {
-        usbif_put(up);
-        free_pending(pending_idx);
-        goto no_mem;
-    }
-
-    purb->dev = port->dev;
-    purb->context = pending_req;
-    purb->transfer_buffer =
-        (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK));
-    if(buffer_mach == 0)
-      purb->transfer_buffer = NULL;
-    purb->complete = __end_usb_io_op;
-    purb->transfer_buffer_length = req->length;
-    purb->transfer_flags = req->transfer_flags;
-
-    purb->pipe = 0;
-    purb->pipe |= req->direction << 7;
-    purb->pipe |= port->dev->devnum << 8;
-    purb->pipe |= req->speed << 26;
-    purb->pipe |= req->pipe_type << 30;
-    purb->pipe |= req->endpoint << 15;
-
-    purb->number_of_packets = req->num_iso;
-
-    if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE )
-        goto urb_error;
-
-    /* Make sure there's always some kind of timeout. */
-    purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000
-                    :  1000;
-
-    purb->setup_packet = setup;
-
-    if ( req->pipe_type == 0 ) /* ISO */
-    {
-        int j;
-        usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1);
-
-        /* If we're dealing with an iso pipe, we need to copy in a schedule. */
-        for ( j = 0; j < purb->number_of_packets; j++ )
-        {
-            purb->iso_frame_desc[j].length = iso_sched[j].length;
-            purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset;
-            iso_sched[j].status = 0;
-        }
-    }
-
-    if ( check_iso_schedule(purb) != 0 )
-        goto urb_error;
-
-    if ( usb_submit_urb(purb) != 0 )
-        goto urb_error;
-
-    return;
-
- urb_error:
-    dump_urb(purb);    
-    usbif_put(up);
-    free_pending(pending_idx);
-
- bad_descriptor:
-    kfree ( setup );
-    if ( purb != NULL )
-        usb_free_urb(purb);
-    make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-    return;
-    
- no_mem:
-    if ( setup != NULL )
-        kfree(setup);
-    make_response(up, req->id, req->operation, -ENOMEM, 0, 0);
-    return;
-} 
-
-
-
-/******************************************************************
- * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
- */
-
-
-static void make_response(usbif_priv_t *up, unsigned long id,
-                          unsigned short op, int st, int inband,
-                         unsigned long length)
-{
-    usbif_response_t *resp;
-    unsigned long     flags;
-    usbif_back_ring_t *usb_ring = &up->usb_ring;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&up->usb_ring_lock, flags);
-    resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    resp->data      = inband;
-    resp->length = length;
-    wmb(); /* Ensure other side can see the response fields. */
-
-    dump_response(resp);
-
-    usb_ring->rsp_prod_pvt++;
-    RING_PUSH_RESPONSES(usb_ring);
-    spin_unlock_irqrestore(&up->usb_ring_lock, flags);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(up->evtchn);
-}
-
-/**
- * usbif_claim_port - claim devices on a port on behalf of guest
- *
- * Once completed, this will ensure that any device attached to that
- * port is claimed by this driver for use by the guest.
- */
-int usbif_claim_port(usbif_be_claim_port_t *msg)
-{
-    owned_port_t *o_p;
-    
-    /* Sanity... */
-    if ( usbif_find_port(msg->path) != NULL )
-    {
-        printk(KERN_WARNING "usbback: Attempted to claim USB port "
-               "we already own!\n");
-        return -EINVAL;
-    }
-
-    /* No need for a slab cache - this should be infrequent. */
-    o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL);
-
-    if ( o_p == NULL )
-        return -ENOMEM;
-
-    o_p->enabled = 0;
-    o_p->usbif_priv = usbif_find(msg->domid);
-    o_p->guest_port = msg->usbif_port;
-    o_p->dev_present = 0;
-    o_p->guest_address = 0; /* Default address. */
-
-    strcpy(o_p->path, msg->path);
-
-    spin_lock_irq(&owned_ports_lock);
-    
-    list_add(&o_p->list, &owned_ports);
-
-    spin_unlock_irq(&owned_ports_lock);
-
-    printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path,
-          msg->domid, msg->usbif_port);
-
-    /* Force a reprobe for unclaimed devices. */
-    usb_scan_devices();
-
-    return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req)
-{
-    unsigned long flags;
-    struct list_head *port;
-
-    /* I'm assuming this is not called from IRQ context - correct?  I think
-     * it's probably only called in response to control messages or plug events
-     * in the USB hub kernel thread, so should be OK. */
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each(port, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if(p->usbif_priv == up && p->guest_address == req->devnum && 
p->enabled )
-         {
-              dump_port(p);
-
-             spin_unlock_irqrestore(&owned_ports_lock, flags);
-              return p;
-         }
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return NULL;    
-}
-
-owned_port_t *__usbif_find_port(char *path)
-{
-    struct list_head *port;
-
-    list_for_each(port, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if(!strcmp(path, p->path))
-        {
-            return p;
-        }
-    }
-
-    return NULL;
-}
-
-owned_port_t *usbif_find_port(char *path)
-{
-    owned_port_t *ret;
-    unsigned long flags;
-
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    ret = __usbif_find_port(path);    
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return ret;
-}
-
-
-static void *probe(struct usb_device *dev, unsigned iface,
-                   const struct usb_device_id *id)
-{
-    owned_port_t *p;
-
-    /* We don't care what the device is - if we own the port, we want it.  We
-     * don't deal with device-specifics in this driver, so we don't care what
-     * the device actually is ;-) */
-    if ( ( p = usbif_find_port(dev->devpath) ) != NULL )
-    {
-        printk(KERN_INFO "usbback: claimed device attached to owned port\n");
-
-        p->dev_present = 1;
-        p->dev = dev;
-        set_bit(iface, &p->ifaces);
-        
-        return p->usbif_priv;
-    }
-    else
-        printk(KERN_INFO "usbback: hotplug for non-owned port (%s), 
ignoring\n",
-              dev->devpath);
-   
-
-    return NULL;
-}
-
-static void disconnect(struct usb_device *dev, void *usbif)
-{
-    /* Note the device is removed so we can tell the guest when it probes. */
-    owned_port_t *port = usbif_find_port(dev->devpath);
-    port->dev_present = 0;
-    port->dev = NULL;
-    port->ifaces = 0;
-}
-
-
-struct usb_driver driver =
-{
-    .owner      = THIS_MODULE,
-    .name       = "Xen USB Backend",
-    .probe      = probe,
-    .disconnect = disconnect,
-    .id_table   = NULL,
-};
-
-/* __usbif_release_port - internal mechanics for releasing a port */
-void __usbif_release_port(owned_port_t *p)
-{
-    int i;
-
-    for ( i = 0; p->ifaces != 0; i++)
-        if ( p->ifaces & 1 << i )
-        {
-            usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i));
-            clear_bit(i, &p->ifaces);
-        }
-    list_del(&p->list);
-
-    /* Reset the real device.  We don't simulate disconnect / probe for other
-     * drivers in this kernel because we assume the device is completely under
-     * the control of ourselves (i.e. the guest!).  This should ensure that the
-     * device is in a sane state for the next customer ;-) */
-
-    /* MAW NB: we're not resetting the real device here.  This looks perfectly
-     * valid to me but it causes memory corruption.  We seem to get away with 
not
-     * resetting for now, although it'd be nice to have this tracked down. */
-/*     if ( p->dev != NULL) */
-/*         usb_reset_device(p->dev); */
-
-    kfree(p);
-}
-
-
-/**
- * usbif_release_port - stop claiming devices on a port on behalf of guest
- */
-void usbif_release_port(usbif_be_release_port_t *msg)
-{
-    owned_port_t *p;
-
-    spin_lock_irq(&owned_ports_lock);
-    p = __usbif_find_port(msg->path);
-    __usbif_release_port(p);
-    spin_unlock_irq(&owned_ports_lock);
-}
-
-void usbif_release_ports(usbif_priv_t *up)
-{
-    struct list_head *port, *tmp;
-    unsigned long flags;
-    
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each_safe(port, tmp, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if ( p->usbif_priv == up )
-            __usbif_release_port(p);
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-}
-
-static int __init usbif_init(void)
-{
-    int i;
-    struct page *page;
-
-    if ( !(xen_start_info->flags & SIF_INITDOMAIN) &&
-         !(xen_start_info->flags & SIF_USB_BE_DOMAIN) )
-        return 0;
-
-    page = balloon_alloc_empty_page_range(MMAP_PAGES);
-    BUG_ON(page == NULL);
-    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
-
-    spin_lock_init(&pend_prod_lock);
-
-    spin_lock_init(&owned_ports_lock);
-    INIT_LIST_HEAD(&owned_ports);
-
-    spin_lock_init(&usbio_schedule_list_lock);
-    INIT_LIST_HEAD(&usbio_schedule_list);
-
-    if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-    
-    usbif_interface_init();
-
-    usbif_ctrlif_init();
-
-    usb_register(&driver);
-
-    printk(KERN_INFO "Xen USB Backend Initialised");
-
-    return 0;
-}
-
-__initcall(usbif_init);
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Thu Sep 22 
17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,1735 +0,0 @@
-/*
- * Xen Virtual USB Frontend Driver 
- *
- * This file contains the first version of the Xen virtual USB hub
- * that I've managed not to delete by mistake (3rd time lucky!).
- *
- * Based on Linux's uhci.c, original copyright notices are displayed
- * below.  Portions also (c) 2004 Intel Research Cambridge
- * and (c) 2004, 2005 Mark Williamson
- *
- * Contact <mark.williamson@xxxxxxxxxxxx> or
- * <xen-devel@xxxxxxxxxxxxxxxxxxxxx> regarding this code.
- *
- * Still to be (maybe) implemented:
- * - migration / backend restart support?
- * - support for building / using as a module
- */
-
-/*
- * Universal Host Controller Interface driver for USB.
- *
- * Maintainer: Johannes Erdfelt <johannes@xxxxxxxxxxx>
- *
- * (C) Copyright 1999 Linus Torvalds
- * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@xxxxxxxxxxx
- * (C) Copyright 1999 Randy Dunlap
- * (C) Copyright 1999 Georg Acher, acher@xxxxxxxxx
- * (C) Copyright 1999 Deti Fliegl, deti@xxxxxxxxx
- * (C) Copyright 1999 Thomas Sailer, sailer@xxxxxxxxxxxxxx
- * (C) Copyright 1999 Roman Weissgaerber, weissg@xxxxxxxxx
- * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
- *               support from usb-ohci.c by Adam Richter, adam@xxxxxxxxxxxxx).
- * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
- *
- * Intel documents this fairly well, and as far as I know there
- * are no royalties or anything like that, but even so there are
- * people who decided that they want to do the same thing in a
- * completely different way.
- *
- * WARNING! The USB documentation is downright evil. Most of it
- * is just crap, written by a committee. You're better off ignoring
- * most of it, the important stuff is:
- *  - the low-level protocol (fairly simple but lots of small details)
- *  - working around the horridness of the rest
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#ifdef CONFIG_USB_DEBUG
-#define DEBUG
-#else
-#undef DEBUG
-#endif
-#include <linux/usb.h>
-
-#include <asm/irq.h>
-#include <asm/system.h>
-
-#include "xhci.h"
-
-#include "../../../../../drivers/usb/hcd.h"
-
-#include <asm-xen/xen-public/io/usbif.h>
-#include <asm/xen-public/io/domain_controller.h>
-
-/*
- * Version Information
- */
-#define DRIVER_VERSION "v1.0"
-#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \
-                      "Randy Dunlap, Georg Acher, Deti Fliegl, " \
-                      "Thomas Sailer, Roman Weissgaerber, Mark Williamson"
-#define DRIVER_DESC "Xen Virtual USB Host Controller Interface"
-
-/*
- * debug = 0, no debugging messages
- * debug = 1, dump failed URB's except for stalls
- * debug = 2, dump all failed URB's (including stalls)
- */
-#ifdef DEBUG
-static int debug = 1;
-#else
-static int debug = 0;
-#endif
-MODULE_PARM(debug, "i");
-MODULE_PARM_DESC(debug, "Debug level");
-static char *errbuf;
-#define ERRBUF_LEN    (PAGE_SIZE * 8)
-
-static int rh_submit_urb(struct urb *urb);
-static int rh_unlink_urb(struct urb *urb);
-static int xhci_unlink_urb(struct urb *urb);
-static void xhci_call_completion(struct urb *urb);
-static void xhci_drain_ring(void);
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb);
-static void xhci_finish_completion(void);
-
-#define MAX_URB_LOOP   2048            /* Maximum number of linked URB's */
-
-static kmem_cache_t *xhci_up_cachep;   /* urb_priv cache */
-static struct xhci *xhci;               /* XHCI structure for the interface */
-
-/******************************************************************************
- * DEBUGGING
- */
-
-#ifdef DEBUG
-
-static void dump_urb(struct urb *urb)
-{
-    printk(KERN_DEBUG "dumping urb @ %p\n"
-           "  hcpriv = %p\n"
-           "  next = %p\n"
-           "  dev = %p\n"
-           "  pipe = 0x%lx\n"
-           "  status = %d\n"
-           "  transfer_flags = 0x%lx\n"
-           "  transfer_buffer = %p\n"
-           "  transfer_buffer_length = %d\n"
-           "  actual_length = %d\n"
-           "  bandwidth = %d\n"
-           "  setup_packet = %p\n",
-           urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status,
-           urb->transfer_flags, urb->transfer_buffer,
-           urb->transfer_buffer_length, urb->actual_length, urb->bandwidth,
-           urb->setup_packet);
-    if ( urb->setup_packet != NULL )
-        printk(KERN_DEBUG
-               "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n",
-               urb->setup_packet[0], urb->setup_packet[1],
-               urb->setup_packet[2], urb->setup_packet[3],
-               urb->setup_packet[4], urb->setup_packet[5],
-               urb->setup_packet[6], urb->setup_packet[7]);
-    printk(KERN_DEBUG "complete = %p\n"
-           "interval = %d\n", urb->complete, urb->interval);
-        
-}
-
-static void xhci_show_resp(usbif_response_t *r)
-{
-        printk(KERN_DEBUG "dumping response @ %p\n"
-               "  id=0x%lx\n"
-               "  op=0x%x\n"
-               "  data=0x%x\n"
-               "  status=0x%x\n"
-               "  length=0x%lx\n",
-               r->id, r->operation, r->data, r->status, r->length);
-}
-
-#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__)
-
-#else /* DEBUG */
-
-#define dump_urb(blah) ((void)0)
-#define xhci_show_resp(blah) ((void)0)
-#define DPRINTK(blah,...) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************************
- * RING REQUEST HANDLING
- */
-
-#define RING_PLUGGED(_hc) ( RING_FULL(&_hc->usb_ring) || _hc->recovery )
-
-/**
- * xhci_construct_isoc - add isochronous information to a request
- */
-static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb)
-{
-        usbif_iso_t *schedule;
-        int i;
-        struct urb_priv *urb_priv = urb->hcpriv;
-        
-        req->num_iso = urb->number_of_packets;
-        schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL);
-
-        if ( schedule == NULL )
-            return -ENOMEM;
-
-        for ( i = 0; i < req->num_iso; i++ )
-        {
-                schedule[i].buffer_offset = urb->iso_frame_desc[i].offset;
-                schedule[i].length = urb->iso_frame_desc[i].length;
-        }
-
-        urb_priv->schedule = schedule;
-       req->iso_schedule = virt_to_mfn(schedule) << PAGE_SHIFT;
-
-        return 0;
-}
-
-/**
- * xhci_queue_req - construct and queue request for an URB
- */
-static int xhci_queue_req(struct urb *urb)
-{
-        unsigned long flags;
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
-        printk(KERN_DEBUG
-               "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons 
= %d\n",
-               usbif, usbif->req_prod, virt_to_mfn(&usbif->req_prod),
-               usbif->resp_prod, xhci->usb_resp_cons);
-#endif
-        
-        spin_lock_irqsave(&xhci->ring_lock, flags);
-
-        if ( RING_PLUGGED(xhci) )
-        {
-                printk(KERN_WARNING
-                       "xhci_queue_req(): USB ring plugged, not queuing 
request\n");
-                spin_unlock_irqrestore(&xhci->ring_lock, flags);
-                return -ENOBUFS;
-        }
-
-        /* Stick something in the shared communications ring. */
-       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        req->operation       = USBIF_OP_IO;
-        req->port            = 0; /* We don't care what the port is. */
-        req->id              = (unsigned long) urb->hcpriv;
-        req->transfer_buffer = virt_to_mfn(urb->transfer_buffer) << PAGE_SHIFT;
-       req->devnum          = usb_pipedevice(urb->pipe);
-        req->direction       = usb_pipein(urb->pipe);
-       req->speed           = usb_pipeslow(urb->pipe);
-        req->pipe_type       = usb_pipetype(urb->pipe);
-        req->length          = urb->transfer_buffer_length;
-        req->transfer_flags  = urb->transfer_flags;
-       req->endpoint        = usb_pipeendpoint(urb->pipe);
-       req->speed           = usb_pipeslow(urb->pipe);
-       req->timeout         = urb->timeout * (1000 / HZ);
-
-        if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */
-        {
-            int ret = xhci_construct_isoc(req, urb);
-            if ( ret != 0 )
-                return ret;
-        }
-
-       if(urb->setup_packet != NULL)
-                memcpy(req->setup, urb->setup_packet, 8);
-        else
-                memset(req->setup, 0, 8);
-        
-        usb_ring->req_prod_pvt++;
-        RING_PUSH_REQUESTS(usb_ring);
-
-        spin_unlock_irqrestore(&xhci->ring_lock, flags);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        DPRINTK("Queued request for an URB.\n");
-        dump_urb(urb);
-
-        return -EINPROGRESS;
-}
-
-/**
- * xhci_queue_probe - queue a probe request for a particular port
- */
-static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port)
-{
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
-       printk(KERN_DEBUG
-               "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
-               "resp_cons = %d\n", usbif->req_prod,
-               virt_to_mfn(&usbif->req_prod),
-              usbif->resp_prod, xhci->usb_resp_cons);
-#endif
- 
-        /* This is always called from the timer interrupt. */
-        spin_lock(&xhci->ring_lock);
-       
-        if ( RING_PLUGGED(xhci) )
-        {
-                printk(KERN_WARNING
-                       "xhci_queue_probe(): ring full, not queuing request\n");
-                spin_unlock(&xhci->ring_lock);
-                return NULL;
-        }
-
-        /* Stick something in the shared communications ring. */
-        req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        memset(req, 0, sizeof(*req));
-
-        req->operation       = USBIF_OP_PROBE;
-        req->port            = port;
-
-        usb_ring->req_prod_pvt++;
-        RING_PUSH_REQUESTS(usb_ring);
-
-        spin_unlock(&xhci->ring_lock);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        return req;
-}
-
-/**
- * xhci_port_reset - queue a reset request for a particular port
- */
-static int xhci_port_reset(usbif_vdev_t port)
-{
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-        /* Only ever happens from process context (hub thread). */
-        spin_lock_irq(&xhci->ring_lock);
-
-        if ( RING_PLUGGED(xhci) )
-        {
-                printk(KERN_WARNING
-                       "xhci_port_reset(): ring plugged, not queuing 
request\n");
-                spin_unlock_irq(&xhci->ring_lock);
-                return -ENOBUFS;
-        }
-
-        /* We only reset one port at a time, so we only need one variable per
-         * hub. */
-        xhci->awaiting_reset = 1;
-        
-        /* Stick something in the shared communications ring. */
-       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        memset(req, 0, sizeof(*req));
-
-        req->operation       = USBIF_OP_RESET;
-        req->port            = port;
-        
-        usb_ring->req_prod_pvt++;
-       RING_PUSH_REQUESTS(usb_ring);
-
-        spin_unlock_irq(&xhci->ring_lock);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        while ( xhci->awaiting_reset > 0 )
-        {
-                mdelay(1);
-                xhci_drain_ring();
-        }
-
-       xhci->rh.ports[port].pe = 1;
-       xhci->rh.ports[port].pe_chg = 1;
-
-        return xhci->awaiting_reset;
-}
-
-
-/******************************************************************************
- * RING RESPONSE HANDLING
- */
-
-static void receive_usb_reset(usbif_response_t *resp)
-{
-    xhci->awaiting_reset = resp->status;
-    rmb();
-    
-}
-
-static void receive_usb_probe(usbif_response_t *resp)
-{
-    spin_lock(&xhci->rh.port_state_lock);
-
-    if ( resp->status >= 0 )
-    {
-        if ( resp->status == 1 )
-        {
-            /* If theres a device there and there wasn't one before there must
-             * have been a connection status change. */
-            if( xhci->rh.ports[resp->data].cs == 0 )
-           {
-                xhci->rh.ports[resp->data].cs = 1;
-                xhci->rh.ports[resp->data].cs_chg = 1;
-           }
-        }
-        else if ( resp->status == 0 )
-        {
-            if(xhci->rh.ports[resp->data].cs == 1 )
-            {
-                xhci->rh.ports[resp->data].cs  = 0;
-                xhci->rh.ports[resp->data].cs_chg = 1;
-               xhci->rh.ports[resp->data].pe = 0;
-               /* According to USB Spec v2.0, 11.24.2.7.2.2, we don't need
-                * to set pe_chg since an error has not occurred. */
-            }
-        }
-        else
-            printk(KERN_WARNING "receive_usb_probe(): unexpected status %d "
-                   "for port %d\n", resp->status, resp->data);
-    }
-    else if ( resp->status < 0)
-        printk(KERN_WARNING "receive_usb_probe(): got error status %d\n",
-               resp->status);
-
-    spin_unlock(&xhci->rh.port_state_lock);
-}
-
-static void receive_usb_io(usbif_response_t *resp)
-{
-        struct urb_priv *urbp = (struct urb_priv *)resp->id;
-        struct urb *urb = urbp->urb;
-
-        urb->actual_length = resp->length;
-        urbp->in_progress = 0;
-
-        if( usb_pipetype(urb->pipe) == 0 ) /* ISO */
-        {
-                int i;
-              
-                /* Copy ISO schedule results back in. */
-                for ( i = 0; i < urb->number_of_packets; i++ )
-                {
-                        urb->iso_frame_desc[i].status
-                                = urbp->schedule[i].status;
-                        urb->iso_frame_desc[i].actual_length
-                                = urbp->schedule[i].length;
-                }
-                free_page((unsigned long)urbp->schedule);
-        }
-
-        /* Only set status if it's not been changed since submission.  It might
-         * have been changed if the URB has been unlinked asynchronously, for
-         * instance. */
-       if ( urb->status == -EINPROGRESS )
-                urbp->status = urb->status = resp->status;
-}
-
-/**
- * xhci_drain_ring - drain responses from the ring, calling handlers
- *
- * This may be called from interrupt context when an event is received from the
- * backend domain, or sometimes in process context whilst waiting for a port
- * reset or URB completion.
- */
-static void xhci_drain_ring(void)
-{
-       struct list_head *tmp, *head;
-       usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-       usbif_response_t *resp;
-        RING_IDX i, rp;
-
-        /* Walk the ring here to get responses, updating URBs to show what
-         * completed. */
-        
-        rp = usb_ring->sring->rsp_prod;
-        rmb(); /* Ensure we see queued requests up to 'rp'. */
-
-        /* Take items off the comms ring, taking care not to overflow. */
-        for ( i = usb_ring->rsp_cons; i != rp; i++ )
-        {
-            resp = RING_GET_RESPONSE(usb_ring, i);
-            
-            /* May need to deal with batching and with putting a ceiling on
-               the number dispatched for performance and anti-dos reasons */
-
-            xhci_show_resp(resp);
-
-            switch ( resp->operation )
-            {
-            case USBIF_OP_PROBE:
-                receive_usb_probe(resp);
-                break;
-                
-            case USBIF_OP_IO:
-                receive_usb_io(resp);
-                break;
-
-            case USBIF_OP_RESET:
-                receive_usb_reset(resp);
-                break;
-
-            default:
-                printk(KERN_WARNING
-                       "error: unknown USB io operation response [%d]\n",
-                       resp->operation);
-                break;
-            }
-        }
-
-        usb_ring->rsp_cons = i;
-
-       /* Walk the list of pending URB's to see which ones completed and do
-         * callbacks, etc. */
-       spin_lock(&xhci->urb_list_lock);
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *urb = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               /* Checks the status and does all of the magic necessary */
-               xhci_transfer_result(xhci, urb);
-       }
-       spin_unlock(&xhci->urb_list_lock);
-
-       xhci_finish_completion();
-}
-
-
-static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs)
-{
-        xhci_drain_ring();
-}
-
-/******************************************************************************
- * HOST CONTROLLER FUNCTIONALITY
- */
-
-/**
- * no-op implementation of private device alloc / free routines
- */
-static int xhci_do_nothing_dev(struct usb_device *dev)
-{
-       return 0;
-}
-
-static inline void xhci_add_complete(struct urb *urb)
-{
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-       unsigned long flags;
-
-       spin_lock_irqsave(&xhci->complete_list_lock, flags);
-       list_add_tail(&urbp->complete_list, &xhci->complete_list);
-       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-/* When this returns, the owner of the URB may free its
- * storage.
- *
- * We spin and wait for the URB to complete before returning.
- *
- * Call with urb->lock acquired.
- */
-static void xhci_delete_urb(struct urb *urb)
-{
-        struct urb_priv *urbp;
-
-       urbp = urb->hcpriv;
-
-        /* If there's no urb_priv structure for this URB then it can't have
-         * been submitted at all. */
-       if ( urbp == NULL )
-               return;
-
-       /* For now we just spin until the URB completes.  It shouldn't take too
-         * long and we don't expect to have to do this very often. */
-       while ( urb->status == -EINPROGRESS )
-        {
-            xhci_drain_ring();
-            mdelay(1);
-        }
-
-       /* Now we know that further transfers to the buffer won't
-        * occur, so we can safely return. */
-}
-
-static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb)
-{
-       struct urb_priv *urbp;
-
-       urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC);
-       if (!urbp) {
-               err("xhci_alloc_urb_priv: couldn't allocate memory for 
urb_priv\n");
-               return NULL;
-       }
-
-       memset((void *)urbp, 0, sizeof(*urbp));
-
-       urbp->inserttime = jiffies;
-       urbp->urb = urb;
-       urbp->dev = urb->dev;
-       
-       INIT_LIST_HEAD(&urbp->complete_list);
-
-       urb->hcpriv = urbp;
-
-       return urbp;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-/* When is this called?  Do we need to stop the transfer (as we
- * currently do)? */
-static void xhci_destroy_urb_priv(struct urb *urb)
-{
-    struct urb_priv *urbp;
-    
-    urbp = (struct urb_priv *)urb->hcpriv;
-    if (!urbp)
-        return;
-
-    if (!list_empty(&urb->urb_list))
-        warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb);
-    
-    if (!list_empty(&urbp->complete_list))
-        warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", 
urb);
-    
-    kmem_cache_free(xhci_up_cachep, urb->hcpriv);
-
-    urb->hcpriv = NULL;
-}
-
-/**
- * Try to find URBs in progress on the same pipe to the same device.
- *
- * MUST be called with xhci->urb_list_lock acquired
- */
-static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb)
-{
-       struct list_head *tmp, *head;
-
-       /* We don't match Isoc transfers since they are special */
-       if (usb_pipeisoc(urb->pipe))
-               return NULL;
-
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               if (u->dev == urb->dev && u->pipe == urb->pipe &&
-                   u->status == -EINPROGRESS)
-                       return u;
-       }
-
-       return NULL;
-}
-
-static int xhci_submit_urb(struct urb *urb)
-{
-       int ret = -EINVAL;
-       unsigned long flags;
-       struct urb *eurb;
-       int bustime;
-
-        DPRINTK("URB submitted to XHCI driver.\n");
-        dump_urb(urb);
-
-       if (!urb)
-               return -EINVAL;
-
-       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) {
-               warn("xhci_submit_urb: urb %p belongs to disconnected device or 
bus?", urb);
-               return -ENODEV;
-       }
-
-        if ( urb->dev->devpath == NULL )
-                BUG();
-
-       usb_inc_dev_use(urb->dev);
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       spin_lock(&urb->lock);
-
-       if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET ||
-           urb->status == -ECONNABORTED) {
-               dbg("xhci_submit_urb: urb not available to submit (status = 
%d)", urb->status);
-               /* Since we can have problems on the out path */
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               usb_dec_dev_use(urb->dev);
-
-               return ret;
-       }
-
-       INIT_LIST_HEAD(&urb->urb_list);
-       if (!xhci_alloc_urb_priv(urb)) {
-               ret = -ENOMEM;
-
-               goto out;
-       }
-
-        ( (struct urb_priv *)urb->hcpriv )->in_progress = 1;
-
-       eurb = xhci_find_urb_ep(xhci, urb);
-       if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) {
-               ret = -ENXIO;
-
-               goto out;
-       }
-
-       /* Short circuit the virtual root hub */
-       if (urb->dev == xhci->rh.dev) {
-               ret = rh_submit_urb(urb);
-
-               goto out;
-       }
-
-       switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-       case PIPE_BULK:
-               ret = xhci_queue_req(urb);
-               break;
-
-       case PIPE_INTERRUPT:
-               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
-                       bustime = usb_check_bandwidth(urb->dev, urb);
-                       if (bustime < 0)
-                               ret = bustime;
-                       else {
-                               ret = xhci_queue_req(urb);
-                               if (ret == -EINPROGRESS)
-                                       usb_claim_bandwidth(urb->dev, urb,
-                                                            bustime, 0);
-                       }
-               } else          /* bandwidth is already set */
-                       ret = xhci_queue_req(urb);
-               break;
-
-       case PIPE_ISOCHRONOUS:
-               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
-                       if (urb->number_of_packets <= 0) {
-                               ret = -EINVAL;
-                               break;
-                       }
-                       bustime = usb_check_bandwidth(urb->dev, urb);
-                       if (bustime < 0) {
-                               ret = bustime;
-                               break;
-                       }
-
-                       ret = xhci_queue_req(urb);
-                       if (ret == -EINPROGRESS)
-                               usb_claim_bandwidth(urb->dev, urb, bustime, 1);
-               } else          /* bandwidth is already set */
-                       ret = xhci_queue_req(urb);
-               break;
-       }
-out:
-       urb->status = ret;
-
-       if (ret == -EINPROGRESS) {
-               /* We use _tail to make find_urb_ep more efficient */
-               list_add_tail(&urb->urb_list, &xhci->urb_list);
-
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-               return 0;
-       }
-
-       xhci_delete_urb(urb);
-
-       spin_unlock(&urb->lock);
-       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-       /* Only call completion if it was successful */
-       if (!ret)
-               xhci_call_completion(urb);
-
-       return ret;
-}
-
-/*
- * Return the result of a transfer
- *
- * MUST be called with urb_list_lock acquired
- */
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb)
-{
-       int ret = 0;
-       unsigned long flags;
-       struct urb_priv *urbp;
-
-       /* The root hub is special */
-       if (urb->dev == xhci->rh.dev)
-               return;
-
-       spin_lock_irqsave(&urb->lock, flags);
-
-       urbp = (struct urb_priv *)urb->hcpriv;
-
-        if ( ( (struct urb_priv *)urb->hcpriv )->in_progress )
-                ret = -EINPROGRESS;
-
-        if (urb->actual_length < urb->transfer_buffer_length) {
-                if (urb->transfer_flags & USB_DISABLE_SPD) {
-                        ret = -EREMOTEIO;
-                }
-        }
-
-       if (urb->status == -EPIPE)
-        {
-                ret = urb->status;
-               /* endpoint has stalled - mark it halted */
-               usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe),
-                                  usb_pipeout(urb->pipe));
-        }
-
-       if ((debug == 1 && ret != 0 && ret != -EPIPE) ||
-            (ret != 0 && debug > 1)) {
-               /* Some debugging code */
-               dbg("xhci_result_interrupt/bulk() failed with status %x",
-                       status);
-       }
-
-       if (ret == -EINPROGRESS)
-               goto out;
-
-       switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-       case PIPE_BULK:
-       case PIPE_ISOCHRONOUS:
-               /* Release bandwidth for Interrupt or Isoc. transfers */
-               /* Spinlock needed ? */
-               if (urb->bandwidth)
-                       usb_release_bandwidth(urb->dev, urb, 1);
-               xhci_delete_urb(urb);
-               break;
-       case PIPE_INTERRUPT:
-               /* Interrupts are an exception */
-               if (urb->interval)
-                       goto out_complete;
-
-               /* Release bandwidth for Interrupt or Isoc. transfers */
-               /* Spinlock needed ? */
-               if (urb->bandwidth)
-                       usb_release_bandwidth(urb->dev, urb, 0);
-               xhci_delete_urb(urb);
-               break;
-       default:
-               info("xhci_transfer_result: unknown pipe type %d for urb %p\n",
-                     usb_pipetype(urb->pipe), urb);
-       }
-
-       /* Remove it from xhci->urb_list */
-       list_del_init(&urb->urb_list);
-
-out_complete:
-       xhci_add_complete(urb);
-
-out:
-       spin_unlock_irqrestore(&urb->lock, flags);
-}
-
-static int xhci_unlink_urb(struct urb *urb)
-{
-       unsigned long flags;
-       struct urb_priv *urbp = urb->hcpriv;
-
-       if (!urb)
-               return -EINVAL;
-
-       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv)
-               return -ENODEV;
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       spin_lock(&urb->lock);
-
-       /* Release bandwidth for Interrupt or Isoc. transfers */
-       /* Spinlock needed ? */
-       if (urb->bandwidth) {
-               switch (usb_pipetype(urb->pipe)) {
-               case PIPE_INTERRUPT:
-                       usb_release_bandwidth(urb->dev, urb, 0);
-                       break;
-               case PIPE_ISOCHRONOUS:
-                       usb_release_bandwidth(urb->dev, urb, 1);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       if (urb->status != -EINPROGRESS) {
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               return 0;
-       }
-
-       list_del_init(&urb->urb_list);
-
-       /* Short circuit the virtual root hub */
-       if (urb->dev == xhci->rh.dev) {
-               rh_unlink_urb(urb);
-
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-               xhci_call_completion(urb);
-       } else {
-               if (urb->transfer_flags & USB_ASYNC_UNLINK) {
-                        /* We currently don't currently attempt to cancel URBs
-                         * that have been queued in the ring.  We handle async
-                         * unlinked URBs when they complete. */
-                       urbp->status = urb->status = -ECONNABORTED;
-                       spin_unlock(&urb->lock);
-                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               } else {
-                       urb->status = -ENOENT;
-
-                       spin_unlock(&urb->lock);
-                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-                       if (in_interrupt()) {   /* wait at least 1 frame */
-                               static int errorcount = 10;
-
-                               if (errorcount--)
-                                       dbg("xhci_unlink_urb called from 
interrupt for urb %p", urb);
-                               udelay(1000);
-                       } else
-                               schedule_timeout(1+1*HZ/1000); 
-
-                        xhci_delete_urb(urb);
-
-                       xhci_call_completion(urb);
-               }
-       }
-
-       return 0;
-}
-
-static void xhci_call_completion(struct urb *urb)
-{
-       struct urb_priv *urbp;
-       struct usb_device *dev = urb->dev;
-       int is_ring = 0, killed, resubmit_interrupt, status;
-       struct urb *nurb;
-       unsigned long flags;
-
-       spin_lock_irqsave(&urb->lock, flags);
-
-       urbp = (struct urb_priv *)urb->hcpriv;
-       if (!urbp || !urb->dev) {
-               spin_unlock_irqrestore(&urb->lock, flags);
-               return;
-       }
-
-       killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED ||
-                       urb->status == -ECONNRESET);
-       resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT &&
-                       urb->interval);
-
-       nurb = urb->next;
-       if (nurb && !killed) {
-               int count = 0;
-
-               while (nurb && nurb != urb && count < MAX_URB_LOOP) {
-                       if (nurb->status == -ENOENT ||
-                           nurb->status == -ECONNABORTED ||
-                           nurb->status == -ECONNRESET) {
-                               killed = 1;
-                               break;
-                       }
-
-                       nurb = nurb->next;
-                       count++;
-               }
-
-               if (count == MAX_URB_LOOP)
-                       err("xhci_call_completion: too many linked URB's, loop? 
(first loop)");
-
-               /* Check to see if chain is a ring */
-               is_ring = (nurb == urb);
-       }
-
-       status = urbp->status;
-       if (!resubmit_interrupt || killed)
-               /* We don't need urb_priv anymore */
-               xhci_destroy_urb_priv(urb);
-
-       if (!killed)
-               urb->status = status;
-
-       spin_unlock_irqrestore(&urb->lock, flags);
-
-       if (urb->complete)
-               urb->complete(urb);
-
-       if (resubmit_interrupt)
-               /* Recheck the status. The completion handler may have */
-               /*  unlinked the resubmitting interrupt URB */
-               killed = (urb->status == -ENOENT ||
-                         urb->status == -ECONNABORTED ||
-                         urb->status == -ECONNRESET);
-
-       if (resubmit_interrupt && !killed) {
-                if ( urb->dev != xhci->rh.dev )
-                        xhci_queue_req(urb); /* XXX What if this fails? */
-                /* Don't need to resubmit URBs for the virtual root dev. */
-       } else {
-               if (is_ring && !killed) {
-                       urb->dev = dev;
-                       xhci_submit_urb(urb);
-               } else {
-                       /* We decrement the usage count after we're done */
-                       /*  with everything */
-                       usb_dec_dev_use(dev);
-               }
-       }
-}
-
-static void xhci_finish_completion(void)
-{
-       struct list_head *tmp, *head;
-       unsigned long flags;
-
-       spin_lock_irqsave(&xhci->complete_list_lock, flags);
-       head = &xhci->complete_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb_priv *urbp = list_entry(tmp, struct urb_priv,
-                                                   complete_list);
-               struct urb *urb = urbp->urb;
-
-               list_del_init(&urbp->complete_list);
-               spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-
-               xhci_call_completion(urb);
-
-               spin_lock_irqsave(&xhci->complete_list_lock, flags);
-               head = &xhci->complete_list;
-               tmp = head->next;
-       }
-       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-static struct usb_operations xhci_device_operations = {
-       .allocate = xhci_do_nothing_dev,
-       .deallocate = xhci_do_nothing_dev,
-        /* It doesn't look like any drivers actually care what the frame number
-        * is at the moment!  If necessary, we could approximate the current
-        * frame nubmer by passing it from the backend in response messages. */
-       .get_frame_number = NULL,
-       .submit_urb = xhci_submit_urb,
-       .unlink_urb = xhci_unlink_urb
-};
-
-/******************************************************************************
- * VIRTUAL ROOT HUB EMULATION
- */
-
-static __u8 root_hub_dev_des[] =
-{
-       0x12,                   /*  __u8  bLength; */
-       0x01,                   /*  __u8  bDescriptorType; Device */
-       0x00,                   /*  __u16 bcdUSB; v1.0 */
-       0x01,
-       0x09,                   /*  __u8  bDeviceClass; HUB_CLASSCODE */
-       0x00,                   /*  __u8  bDeviceSubClass; */
-       0x00,                   /*  __u8  bDeviceProtocol; */
-       0x08,                   /*  __u8  bMaxPacketSize0; 8 Bytes */
-       0x00,                   /*  __u16 idVendor; */
-       0x00,
-       0x00,                   /*  __u16 idProduct; */
-       0x00,
-       0x00,                   /*  __u16 bcdDevice; */
-       0x00,
-       0x00,                   /*  __u8  iManufacturer; */
-       0x02,                   /*  __u8  iProduct; */
-       0x01,                   /*  __u8  iSerialNumber; */
-       0x01                    /*  __u8  bNumConfigurations; */
-};
-
-
-/* Configuration descriptor */
-static __u8 root_hub_config_des[] =
-{
-       0x09,                   /*  __u8  bLength; */
-       0x02,                   /*  __u8  bDescriptorType; Configuration */
-       0x19,                   /*  __u16 wTotalLength; */
-       0x00,
-       0x01,                   /*  __u8  bNumInterfaces; */
-       0x01,                   /*  __u8  bConfigurationValue; */
-       0x00,                   /*  __u8  iConfiguration; */
-       0x40,                   /*  __u8  bmAttributes;
-                                       Bit 7: Bus-powered, 6: Self-powered,
-                                       Bit 5 Remote-wakeup, 4..0: resvd */
-       0x00,                   /*  __u8  MaxPower; */
-
-       /* interface */
-       0x09,                   /*  __u8  if_bLength; */
-       0x04,                   /*  __u8  if_bDescriptorType; Interface */
-       0x00,                   /*  __u8  if_bInterfaceNumber; */
-       0x00,                   /*  __u8  if_bAlternateSetting; */
-       0x01,                   /*  __u8  if_bNumEndpoints; */
-       0x09,                   /*  __u8  if_bInterfaceClass; HUB_CLASSCODE */
-       0x00,                   /*  __u8  if_bInterfaceSubClass; */
-       0x00,                   /*  __u8  if_bInterfaceProtocol; */
-       0x00,                   /*  __u8  if_iInterface; */
-
-       /* endpoint */
-       0x07,                   /*  __u8  ep_bLength; */
-       0x05,                   /*  __u8  ep_bDescriptorType; Endpoint */
-       0x81,                   /*  __u8  ep_bEndpointAddress; IN Endpoint 1 */
-       0x03,                   /*  __u8  ep_bmAttributes; Interrupt */
-       0x08,                   /*  __u16 ep_wMaxPacketSize; 8 Bytes */
-       0x00,
-       0xff                    /*  __u8  ep_bInterval; 255 ms */
-};
-
-static __u8 root_hub_hub_des[] =
-{
-       0x09,                   /*  __u8  bLength; */
-       0x29,                   /*  __u8  bDescriptorType; Hub-descriptor */
-       0x02,                   /*  __u8  bNbrPorts; */
-       0x00,                   /* __u16  wHubCharacteristics; */
-       0x00,
-       0x01,                   /*  __u8  bPwrOn2pwrGood; 2ms */
-       0x00,                   /*  __u8  bHubContrCurrent; 0 mA */
-       0x00,                   /*  __u8  DeviceRemovable; *** 7 Ports max *** 
*/
-       0xff                    /*  __u8  PortPwrCtrlMask; *** 7 ports max *** 
*/
-};
-
-/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */
-static int rh_send_irq(struct urb *urb)
-{
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-        xhci_port_t *ports = xhci->rh.ports;
-       unsigned long flags;
-       int i, len = 1;
-       __u16 data = 0;
-
-       spin_lock_irqsave(&urb->lock, flags);
-       for (i = 0; i < xhci->rh.numports; i++) {
-                /* Set a bit if anything at all has changed on the port, as per
-                * USB spec 11.12 */
-               data |= (ports[i].cs_chg || ports[i].pe_chg )
-                        ? (1 << (i + 1))
-                        : 0;
-
-               len = (i + 1) / 8 + 1;
-       }
-
-       *(__u16 *) urb->transfer_buffer = cpu_to_le16(data);
-       urb->actual_length = len;
-       urbp->status = 0;
-
-       spin_unlock_irqrestore(&urb->lock, flags);
-
-       if ((data > 0) && (xhci->rh.send != 0)) {
-               dbg("root-hub INT complete: data: %x", data);
-               xhci_call_completion(urb);
-       }
-
-       return 0;
-}
-
-/* Virtual Root Hub INTs are polled by this timer every "interval" ms */
-static int rh_init_int_timer(struct urb *urb);
-
-static void rh_int_timer_do(unsigned long ptr)
-{
-       struct urb *urb = (struct urb *)ptr;
-       struct list_head list, *tmp, *head;
-       unsigned long flags;
-       int i;
-
-       for ( i = 0; i < xhci->rh.numports; i++)
-                xhci_queue_probe(i);
-
-       if (xhci->rh.send)
-               rh_send_irq(urb);
-
-       INIT_LIST_HEAD(&list);
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-               struct urb_priv *up = (struct urb_priv *)u->hcpriv;
-
-               tmp = tmp->next;
-
-               spin_lock(&u->lock);
-
-               /* Check if the URB timed out */
-               if (u->timeout && time_after_eq(jiffies,
-                                                up->inserttime + u->timeout)) {
-                       list_del(&u->urb_list);
-                       list_add_tail(&u->urb_list, &list);
-               }
-
-               spin_unlock(&u->lock);
-       }
-       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-       head = &list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED;
-               xhci_unlink_urb(u);
-       }
-
-       rh_init_int_timer(urb);
-}
-
-/* Root Hub INTs are polled by this timer */
-static int rh_init_int_timer(struct urb *urb)
-{
-       xhci->rh.interval = urb->interval;
-       init_timer(&xhci->rh.rh_int_timer);
-       xhci->rh.rh_int_timer.function = rh_int_timer_do;
-       xhci->rh.rh_int_timer.data = (unsigned long)urb;
-       xhci->rh.rh_int_timer.expires = jiffies
-                + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000;
-       add_timer(&xhci->rh.rh_int_timer);
-
-       return 0;
-}
-
-#define OK(x)                  len = (x); break
-
-/* Root Hub Control Pipe */
-static int rh_submit_urb(struct urb *urb)
-{
-       unsigned int pipe = urb->pipe;
-       struct usb_ctrlrequest *cmd =
-                (struct usb_ctrlrequest *)urb->setup_packet;
-       void *data = urb->transfer_buffer;
-       int leni = urb->transfer_buffer_length;
-       int len = 0;
-       xhci_port_t *status;
-       int stat = 0;
-       int i;
-       int retstatus;
-        unsigned long flags;
-        
-       __u16 cstatus;
-       __u16 bmRType_bReq;
-       __u16 wValue;
-       __u16 wIndex;
-       __u16 wLength;
-
-       if (usb_pipetype(pipe) == PIPE_INTERRUPT) {
-               xhci->rh.urb = urb;
-               xhci->rh.send = 1;
-               xhci->rh.interval = urb->interval;
-               rh_init_int_timer(urb);
-
-               return -EINPROGRESS;
-       }
-
-       bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8;
-       wValue = le16_to_cpu(cmd->wValue);
-       wIndex = le16_to_cpu(cmd->wIndex);
-       wLength = le16_to_cpu(cmd->wLength);
-
-       for (i = 0; i < 8; i++)
-               xhci->rh.c_p_r[i] = 0;
-
-        status = &xhci->rh.ports[wIndex - 1];
-
-        spin_lock_irqsave(&xhci->rh.port_state_lock, flags);
-
-       switch (bmRType_bReq) {
-               /* Request Destination:
-                  without flags: Device,
-                  RH_INTERFACE: interface,
-                  RH_ENDPOINT: endpoint,
-                  RH_CLASS means HUB here,
-                  RH_OTHER | RH_CLASS  almost ever means HUB_PORT here
-               */
-
-       case RH_GET_STATUS:
-               *(__u16 *)data = cpu_to_le16(1);
-               OK(2);
-       case RH_GET_STATUS | RH_INTERFACE:
-               *(__u16 *)data = cpu_to_le16(0);
-               OK(2);
-       case RH_GET_STATUS | RH_ENDPOINT:
-               *(__u16 *)data = cpu_to_le16(0);
-               OK(2);
-       case RH_GET_STATUS | RH_CLASS:
-               *(__u32 *)data = cpu_to_le32(0);
-               OK(4);          /* hub power */
-       case RH_GET_STATUS | RH_OTHER | RH_CLASS:
-               cstatus = (status->cs_chg) |
-                       (status->pe_chg << 1) |
-                       (xhci->rh.c_p_r[wIndex - 1] << 4);
-               retstatus = (status->cs) |
-                       (status->pe << 1) |
-                       (status->susp << 2) |
-                       (1 << 8) |      /* power on */
-                       (status->lsda << 9);
-               *(__u16 *)data = cpu_to_le16(retstatus);
-               *(__u16 *)(data + 2) = cpu_to_le16(cstatus);
-               OK(4);
-       case RH_CLEAR_FEATURE | RH_ENDPOINT:
-               switch (wValue) {
-               case RH_ENDPOINT_STALL:
-                       OK(0);
-               }
-               break;
-       case RH_CLEAR_FEATURE | RH_CLASS:
-               switch (wValue) {
-               case RH_C_HUB_OVER_CURRENT:
-                       OK(0);  /* hub power over current */
-               }
-               break;
-       case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS:
-               switch (wValue) {
-               case RH_PORT_ENABLE:
-                        status->pe     = 0;
-                       OK(0);
-               case RH_PORT_SUSPEND:
-                        status->susp   = 0;
-                       OK(0);
-               case RH_PORT_POWER:
-                       OK(0);  /* port power */
-               case RH_C_PORT_CONNECTION:
-                        status->cs_chg = 0;
-                       OK(0);
-               case RH_C_PORT_ENABLE:
-                        status->pe_chg = 0;
-                       OK(0);
-               case RH_C_PORT_SUSPEND:
-                       /*** WR_RH_PORTSTAT(RH_PS_PSSC); */
-                       OK(0);
-               case RH_C_PORT_OVER_CURRENT:
-                       OK(0);  /* port power over current */
-               case RH_C_PORT_RESET:
-                       xhci->rh.c_p_r[wIndex - 1] = 0;
-                       OK(0);
-               }
-               break;
-       case RH_SET_FEATURE | RH_OTHER | RH_CLASS:
-               switch (wValue) {
-               case RH_PORT_SUSPEND:
-                        status->susp = 1;      
-                       OK(0);
-               case RH_PORT_RESET:
-                {
-                        int ret;
-                        xhci->rh.c_p_r[wIndex - 1] = 1;
-                        status->pr = 0;
-                        status->pe = 1;
-                        ret = xhci_port_reset(wIndex - 1);
-                        /* XXX MAW: should probably cancel queued transfers 
during reset... *\/ */
-                        if ( ret == 0 ) { OK(0); }
-                        else { return ret; }
-                }
-                break;
-               case RH_PORT_POWER:
-                       OK(0); /* port power ** */
-               case RH_PORT_ENABLE:
-                        status->pe = 1;
-                       OK(0);
-               }
-               break;
-       case RH_SET_ADDRESS:
-               xhci->rh.devnum = wValue;
-               OK(0);
-       case RH_GET_DESCRIPTOR:
-               switch ((wValue & 0xff00) >> 8) {
-               case 0x01:      /* device descriptor */
-                       len = min_t(unsigned int, leni,
-                                 min_t(unsigned int,
-                                     sizeof(root_hub_dev_des), wLength));
-                       memcpy(data, root_hub_dev_des, len);
-                       OK(len);
-               case 0x02:      /* configuration descriptor */
-                       len = min_t(unsigned int, leni,
-                                 min_t(unsigned int,
-                                     sizeof(root_hub_config_des), wLength));
-                       memcpy (data, root_hub_config_des, len);
-                       OK(len);
-               case 0x03:      /* string descriptors */
-                       len = usb_root_hub_string (wValue & 0xff,
-                               0, "XHCI-alt",
-                               data, wLength);
-                       if (len > 0) {
-                               OK(min_t(int, leni, len));
-                       } else 
-                               stat = -EPIPE;
-               }
-               break;
-       case RH_GET_DESCRIPTOR | RH_CLASS:
-               root_hub_hub_des[2] = xhci->rh.numports;
-               len = min_t(unsigned int, leni,
-                         min_t(unsigned int, sizeof(root_hub_hub_des), 
wLength));
-               memcpy(data, root_hub_hub_des, len);
-               OK(len);
-       case RH_GET_CONFIGURATION:
-               *(__u8 *)data = 0x01;
-               OK(1);
-       case RH_SET_CONFIGURATION:
-               OK(0);
-       case RH_GET_INTERFACE | RH_INTERFACE:
-               *(__u8 *)data = 0x00;
-               OK(1);
-       case RH_SET_INTERFACE | RH_INTERFACE:
-               OK(0);
-       default:
-               stat = -EPIPE;
-       }
-
-        spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags);
-
-       urb->actual_length = len;
-
-       return stat;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-static int rh_unlink_urb(struct urb *urb)
-{
-       if (xhci->rh.urb == urb) {
-               urb->status = -ENOENT;
-               xhci->rh.send = 0;
-               xhci->rh.urb = NULL;
-               del_timer(&xhci->rh.rh_int_timer);
-       }
-       return 0;
-}
-
-/******************************************************************************
- * CONTROL PLANE FUNCTIONALITY
- */
-
-/**
- * alloc_xhci - initialise a new virtual root hub for a new USB device channel
- */
-static int alloc_xhci(void)
-{
-       int retval;
-       struct usb_bus *bus;
-
-       retval = -EBUSY;
-
-       xhci = kmalloc(sizeof(*xhci), GFP_KERNEL);
-       if (!xhci) {
-               err("couldn't allocate xhci structure");
-               retval = -ENOMEM;
-               goto err_alloc_xhci;
-       }
-
-       xhci->state = USBIF_STATE_CLOSED;
-
-       spin_lock_init(&xhci->urb_list_lock);
-       INIT_LIST_HEAD(&xhci->urb_list);
-
-       spin_lock_init(&xhci->complete_list_lock);
-       INIT_LIST_HEAD(&xhci->complete_list);
-
-       spin_lock_init(&xhci->frame_list_lock);
-
-       bus = usb_alloc_bus(&xhci_device_operations);
-
-       if (!bus) {
-               err("unable to allocate bus");
-               goto err_alloc_bus;
-       }
-
-       xhci->bus = bus;
-       bus->bus_name = "XHCI";
-       bus->hcpriv = xhci;
-
-       usb_register_bus(xhci->bus);
-
-       /* Initialize the root hub */
-
-       xhci->rh.numports = 0;
-
-       xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus);
-       if (!xhci->rh.dev) {
-               err("unable to allocate root hub");
-               goto err_alloc_root_hub;
-       }
-
-       xhci->state = 0;
-
-       return 0;
-
-/*
- * error exits:
- */
-err_alloc_root_hub:
-        usb_deregister_bus(xhci->bus);
-       usb_free_bus(xhci->bus);
-       xhci->bus = NULL;
-
-err_alloc_bus:
-       kfree(xhci);
-
-err_alloc_xhci:
-       return retval;
-}
-
-/**
- * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message
- */
-static void usbif_status_change(usbif_fe_interface_status_changed_t *status)
-{
-    ctrl_msg_t                   cmsg;
-    usbif_fe_interface_connect_t up;
-    long rc;
-    usbif_sring_t *sring;
-
-    switch ( status->status )
-    {
-    case USBIF_INTERFACE_STATUS_DESTROYED:
-        printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n",
-               xhci->state);
-        break;
-
-    case USBIF_INTERFACE_STATUS_DISCONNECTED:
-        if ( xhci->state != USBIF_STATE_CLOSED )
-        {
-            printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message"
-                   " in state %d\n", xhci->state);
-            break;
-            /* Not bothering to do recovery here for now.  Keep things
-             * simple. */
-
-            spin_lock_irq(&xhci->ring_lock);
-            
-            /* Clean up resources. */
-            free_page((unsigned long)xhci->usb_ring.sring);
-            unbind_evtchn_from_irqhandler(xhci->evtchn, xhci);
-
-            /* Plug the ring. */
-            xhci->recovery = 1;
-            wmb();
-            
-            spin_unlock_irq(&xhci->ring_lock);
-        }
-
-        /* Move from CLOSED to DISCONNECTED state. */
-        sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
-        SHARED_RING_INIT(sring);
-        FRONT_RING_INIT(&xhci->usb_ring, sring, PAGE_SIZE);
-        xhci->state  = USBIF_STATE_DISCONNECTED;
-
-        /* Construct an interface-CONNECT message for the domain controller. */
-        cmsg.type      = CMSG_USBIF_FE;
-        cmsg.subtype   = CMSG_USBIF_FE_INTERFACE_CONNECT;
-        cmsg.length    = sizeof(usbif_fe_interface_connect_t);
-        up.shmem_frame = virt_to_mfn(sring);
-        memcpy(cmsg.msg, &up, sizeof(up));
-        
-        /* Tell the controller to bring up the interface. */
-        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-        break;
-
-    case USBIF_INTERFACE_STATUS_CONNECTED:
-        if ( xhci->state == USBIF_STATE_CLOSED )
-        {
-            printk(KERN_WARNING "Unexpected usbif-CONNECTED message"
-                   " in state %d\n", xhci->state);
-            break;
-        }
-
-        xhci->evtchn = status->evtchn;
-       xhci->bandwidth = status->bandwidth;
-       xhci->rh.numports = status->num_ports;
-
-        xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, 
GFP_KERNEL);
-       
-       if ( xhci->rh.ports == NULL )
-            goto alloc_ports_nomem;
-       
-        memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports);
-
-       usb_connect(xhci->rh.dev);
-
-       if (usb_new_device(xhci->rh.dev) != 0) {
-               err("unable to start root hub");
-       }
-
-       /* Allocate the appropriate USB bandwidth here...  Need to
-         * somehow know what the total available is thought to be so we
-         * can calculate the reservation correctly. */
-       usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
-                           1000 - xhci->bandwidth, 0);
-
-        if ( (rc = bind_evtchn_to_irqhandler(xhci->evtchn, xhci_interrupt, 
-                               SA_SAMPLE_RANDOM, "usbif", xhci)) )
-                printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
-
-       DPRINTK(KERN_INFO __FILE__
-                ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d\n",
-                xhci->usb_ring.sring, virt_to_mfn(xhci->usbif),
-                xhci->evtchn);
-
-        xhci->state = USBIF_STATE_CONNECTED;
-
-        break;
-
-    default:
-        printk(KERN_WARNING "Status change to unknown value %d\n", 
-               status->status);
-        break;
-    }
-
-    return;
-
- alloc_ports_nomem:
-    printk(KERN_WARNING "Failed to allocate port memory, XHCI failed to 
connect.\n");
-    return;
-}
-
-/**
- * usbif_ctrlif_rx - demux control messages by subtype
- */
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->subtype )
-    {
-    case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
-        usbif_status_change((usbif_fe_interface_status_changed_t *)
-                            &msg->msg[0]);
-        break;
-
-        /* New interface...? */
-    default:
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-static void send_driver_up(void)
-{
-        control_msg_t cmsg;
-        usbif_fe_interface_status_changed_t st;
-
-        /* Send a driver-UP notification to the domain controller. */
-        cmsg.type      = CMSG_USBIF_FE;
-        cmsg.subtype   = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED;
-        cmsg.length    = sizeof(usbif_fe_driver_status_changed_t);
-        st.status      = USBIF_DRIVER_STATUS_UP;
-        memcpy(cmsg.msg, &st, sizeof(st));
-        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-void usbif_resume(void)
-{
-        int i;
-        
-        /* Fake disconnection on all virtual USB ports (suspending / migrating
-         * will destroy hard state associated will the USB devices anyhow). */
-        /* No need to lock here. */
-        for ( i = 0; i < xhci->rh.numports; i++ )
-        {
-                xhci->rh.ports[i].cs = 0;
-                xhci->rh.ports[i].cs_chg = 1;
-               xhci->rh.ports[i].pe = 0;
-        }
-        
-        send_driver_up();
-}
-
-static int __init xhci_hcd_init(void)
-{
-       int retval = -ENOMEM, i;
-
-       if ( (xen_start_info->flags & SIF_INITDOMAIN) ||
-            (xen_start_info->flags & SIF_USB_BE_DOMAIN) )
-                return 0;
-
-       info(DRIVER_DESC " " DRIVER_VERSION);
-
-       if (debug) {
-               errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL);
-               if (!errbuf)
-                       goto errbuf_failed;
-       }
-
-       xhci_up_cachep = kmem_cache_create("xhci_urb_priv",
-               sizeof(struct urb_priv), 0, 0, NULL, NULL);
-       if (!xhci_up_cachep)
-               goto up_failed;
-
-        /* Let the domain controller know we're here.  For now we wait until
-         * connection, as for the block and net drivers.  This is only strictly
-         * necessary if we're going to boot off a USB device. */
-        printk(KERN_INFO "Initialising Xen virtual USB hub\n");
-    
-        (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx,
-                                        CALLBACK_IN_BLOCKING_CONTEXT);
-        
-       alloc_xhci();
-
-        send_driver_up();
-
-        /*
-         * We should read 'nr_interfaces' from response message and wait
-         * for notifications before proceeding. For now we assume that we
-         * will be notified of exactly one interface.
-         */
-        for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
-        {
-            set_current_state(TASK_INTERRUPTIBLE);
-            schedule_timeout(1);
-        }
-        
-        if (xhci->state != USBIF_STATE_CONNECTED)
-            printk(KERN_WARNING "Timeout connecting USB frontend driver!\n");
-       
-       return 0;
-
-up_failed:
-       if (errbuf)
-               kfree(errbuf);
-
-errbuf_failed:
-       return retval;
-}
-
-module_init(xhci_hcd_init);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-
diff -r 97dbd9524a7e -r 06d84bf87159 
linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/xhci.h  Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,182 +0,0 @@
-/******************************************************************************
- * xhci.h
- *
- * Private definitions for the Xen Virtual USB Controller.  Based on
- * drivers/usb/host/uhci.h from Linux.  Copyright for the imported content is
- * retained by the original authors.
- *
- * Modifications are:
- * Copyright (C) 2004 Intel Research Cambridge
- * Copyright (C) 2004, 2005 Mark Williamson
- */
-
-#ifndef __LINUX_XHCI_H
-#define __LINUX_XHCI_H
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <asm-xen/xen-public/io/usbif.h>
-#include <linux/spinlock.h>
-
-/* xhci_port_t - current known state of a virtual hub ports */
-typedef struct {
-        unsigned int cs     :1; /* Connection status.         */
-        unsigned int cs_chg :1; /* Connection status change.  */
-        unsigned int pe     :1; /* Port enable.               */
-        unsigned int pe_chg :1; /* Port enable change.        */
-        unsigned int susp   :1; /* Suspended.                 */
-        unsigned int lsda   :1; /* Low speed device attached. */
-        unsigned int pr     :1; /* Port reset.                */
-} xhci_port_t;
-
-/* struct virt_root_hub - state related to the virtual root hub */
-struct virt_root_hub {
-       struct usb_device *dev;
-       int devnum;             /* Address of Root Hub endpoint */
-       struct urb *urb;
-       void *int_addr;
-       int send;
-       int interval;
-       int numports;
-       int c_p_r[8];
-       struct timer_list rh_int_timer;
-        spinlock_t port_state_lock;
-        xhci_port_t *ports;
-};
-
-/* struct xhci - contains the state associated with a single USB interface */
-struct xhci {
-
-#ifdef CONFIG_PROC_FS
-       /* procfs */
-       int num;
-       struct proc_dir_entry *proc_entry;
-#endif
-
-        int evtchn;                        /* Interdom channel to backend */
-        enum { 
-                USBIF_STATE_CONNECTED    = 2,
-                USBIF_STATE_DISCONNECTED = 1,
-                USBIF_STATE_CLOSED       = 0
-        } state; /* State of this USB interface */
-        unsigned long recovery; /* boolean recovery in progress flag */
-        
-        unsigned long bandwidth;
-
-       struct usb_bus *bus;
-
-       /* Main list of URB's currently controlled by this HC */
-       spinlock_t urb_list_lock;
-       struct list_head urb_list;              /* P: xhci->urb_list_lock */
-
-       /* List of URB's awaiting completion callback */
-       spinlock_t complete_list_lock;
-       struct list_head complete_list;         /* P: xhci->complete_list_lock 
*/
-
-       struct virt_root_hub rh;        /* private data of the virtual root hub 
*/
-
-        spinlock_t ring_lock;
-        usbif_front_ring_t usb_ring;
-
-        int awaiting_reset;
-};
-
-/* per-URB private data structure for the host controller */
-struct urb_priv {
-       struct urb *urb;
-        usbif_iso_t *schedule;
-       struct usb_device *dev;
-
-        int in_progress : 1;           /* QH was queued (not linked in) */
-       int short_control_packet : 1;   /* If we get a short packet during */
-                                       /*  a control transfer, retrigger */
-                                       /*  the status phase */
-
-       int status;                     /* Final status */
-
-       unsigned long inserttime;       /* In jiffies */
-
-       struct list_head complete_list; /* P: xhci->complete_list_lock */
-};
-
-/*
- * Locking in xhci.c
- *
- * spinlocks are used extensively to protect the many lists and data
- * structures we have. It's not that pretty, but it's necessary. We
- * need to be done with all of the locks (except complete_list_lock) when
- * we call urb->complete. I've tried to make it simple enough so I don't
- * have to spend hours racking my brain trying to figure out if the
- * locking is safe.
- *
- * Here's the safe locking order to prevent deadlocks:
- *
- * #1 xhci->urb_list_lock
- * #2 urb->lock
- * #3 xhci->urb_remove_list_lock
- * #4 xhci->complete_list_lock
- *
- * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
- * at the lowest level FIRST and NEVER grab locks at the same level at the
- * same time.
- * 
- * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
- */
-
-/* -------------------------------------------------------------------------
-   Virtual Root HUB
-   ------------------------------------------------------------------------- */
-/* destination of request */
-#define RH_DEVICE              0x00
-#define RH_INTERFACE           0x01
-#define RH_ENDPOINT            0x02
-#define RH_OTHER               0x03
-
-#define RH_CLASS               0x20
-#define RH_VENDOR              0x40
-
-/* Requests: bRequest << 8 | bmRequestType */
-#define RH_GET_STATUS          0x0080
-#define RH_CLEAR_FEATURE       0x0100
-#define RH_SET_FEATURE         0x0300
-#define RH_SET_ADDRESS         0x0500
-#define RH_GET_DESCRIPTOR      0x0680
-#define RH_SET_DESCRIPTOR      0x0700
-#define RH_GET_CONFIGURATION   0x0880
-#define RH_SET_CONFIGURATION   0x0900
-#define RH_GET_STATE           0x0280
-#define RH_GET_INTERFACE       0x0A80
-#define RH_SET_INTERFACE       0x0B00
-#define RH_SYNC_FRAME          0x0C80
-/* Our Vendor Specific Request */
-#define RH_SET_EP              0x2000
-
-/* Hub port features */
-#define RH_PORT_CONNECTION     0x00
-#define RH_PORT_ENABLE         0x01
-#define RH_PORT_SUSPEND                0x02
-#define RH_PORT_OVER_CURRENT   0x03
-#define RH_PORT_RESET          0x04
-#define RH_PORT_POWER          0x08
-#define RH_PORT_LOW_SPEED      0x09
-#define RH_C_PORT_CONNECTION   0x10
-#define RH_C_PORT_ENABLE       0x11
-#define RH_C_PORT_SUSPEND      0x12
-#define RH_C_PORT_OVER_CURRENT 0x13
-#define RH_C_PORT_RESET                0x14
-
-/* Hub features */
-#define RH_C_HUB_LOCAL_POWER   0x00
-#define RH_C_HUB_OVER_CURRENT  0x01
-#define RH_DEVICE_REMOTE_WAKEUP        0x00
-#define RH_ENDPOINT_STALL      0x01
-
-/* Our Vendor Specific feature */
-#define RH_REMOVE_EP           0x00
-
-#define RH_ACK                 0x01
-#define RH_REQ_ERR             -1
-#define RH_NACK                        0x00
-
-#endif
-
diff -r 97dbd9524a7e -r 06d84bf87159 tools/python/xen/xend/server/controller.py
--- a/tools/python/xen/xend/server/controller.py        Thu Sep 22 17:34:14 2005
+++ /dev/null   Thu Sep 22 17:42:01 2005
@@ -1,423 +0,0 @@
-#============================================================================
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of version 2.1 of the GNU Lesser General Public
-# License as published by the Free Software Foundation.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#============================================================================
-# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
-#============================================================================
-
-"""General support for controllers, which handle devices
-for a domain.
-"""
-
-from xen.xend.XendError import XendError
-from xen.xend.xenstore import DBVar
-
-DEBUG = 0
-
-class DevControllerTable:
-    """Table of device controller classes, indexed by type name.
-    """
-
-    def __init__(self):
-        self.controllerClasses = {}
-
-    def getDevControllerClass(self, type):
-        return self.controllerClasses.get(type)
-
-    def addDevControllerClass(self, cls):
-        self.controllerClasses[cls.getType()] = cls
-
-    def delDevControllerClass(self, type):
-        if type in self.controllerClasses:
-            del self.controllerClasses[type]
-
-    def createDevController(self, type, vm, recreate=False):
-        cls = self.getDevControllerClass(type)
-        if not cls:
-            raise XendError("unknown device type: " + str(type))
-        return cls.createDevController(vm, recreate=recreate)
-
-def getDevControllerTable():
-    """Singleton constructor for the controller table.
-    """
-    global devControllerTable
-    try:
-        devControllerTable
-    except:
-        devControllerTable = DevControllerTable()
-    return devControllerTable
-
-def addDevControllerClass(name, cls):
-    """Add a device controller class to the controller table.
-    """
-    cls.type = name
-    getDevControllerTable().addDevControllerClass(cls)
-
-
-def isDevControllerClass(name):
-    """@return True if a device controller class has been registered with
-    the controller table under the given name."""
-    return name in getDevControllerTable().controllerClasses
-
-
-def createDevController(name, vm, recreate=False):
-    return getDevControllerTable().createDevController(name, vm, 
recreate=recreate)
-
-class DevController:
-    """Abstract class for a device controller attached to a domain.
-    A device controller manages all the devices of a given type for a domain.
-    There is exactly one device controller for each device type for
-    a domain.
-
-    """
-
-    # State:
-    # controller/<type> : for controller
-    # device/<type>/<id>   : for each device
-
-    def createDevController(cls, vm, recreate=False):
-        """Class method to create a dev controller.
-        """
-        ctrl = cls(vm, recreate=recreate)
-        ctrl.initController(recreate=recreate)
-        ctrl.exportToDB()
-        return ctrl
-
-    createDevController = classmethod(createDevController)
-
-    def getType(cls):
-        return cls.type
-
-    getType = classmethod(getType)
-
-    __exports__ = [
-        DBVar('type',      'str'),
-        DBVar('destroyed', 'bool'),
-        ]
-
-    # Set when registered.
-    type = None
-
-    def __init__(self, vm, recreate=False):
-        self.destroyed = False
-        self.vm = vm
-        self.db = self.getDB()
-        self.deviceId = 0
-        self.devices = {}
-        self.device_order = []
-
-    def getDB(self):
-        """Get the db node to use for a controller.
-        """
-        return self.vm.db.addChild("/controller/%s" % self.getType())
-
-    def getDevDB(self, id):
-        """Get the db node to use for a device.
-        """
-        return self.vm.db.addChild("/device/%s/%s" % (self.getType(), id))
-
-    def exportToDB(self, save=False):
-        self.db.exportToDB(self, fields=self.__exports__, save=save)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
-
-    def getDevControllerType(self):
-        return self.dctype
-
-    def getDomain(self):
-        return self.vm.getDomain()
-
-    def getDomainName(self):
-        return self.vm.getName()
-
-    def getDomainInfo(self):
-        return self.vm
-
-    
#----------------------------------------------------------------------------
-    # Subclass interface.
-    # Subclasses should define the unimplemented methods..
-    # Redefinitions must have the same arguments.
-
-    def initController(self, recreate=False, reboot=False):
-        """Initialise the controller. Called when the controller is
-        first created, and again after the domain is rebooted (with reboot 
True).
-        If called with recreate True (and reboot False) the controller is being
-        recreated after a xend restart.
-
-        As this can be a re-init (after reboot) any controller state should
-        be reset. For example the destroyed flag.
-        """
-        self.destroyed = False
-        if reboot:
-            self.rebootDevices()
-
-    def newDevice(self, id, config, recreate=False):
-        """Create a device with the given config.
-        Must be defined in subclass.
-        Called with recreate True when the device is being recreated after a
-        xend restart.
-
-        @return device
-        """
-        raise NotImplementedError()
-
-    def createDevice(self, config, recreate=False, change=False):
-        """Create a device and attach to its front- and back-ends.
-        If recreate is true the device is being recreated after a xend restart.
-        If change is true the device is a change to an existing domain,
-        i.e. it is being added at runtime rather than when the domain is 
created.
-        """
-        dev = self.newDevice(self.nextDeviceId(), config, recreate=recreate)
-        if self.vm.recreate:
-            dev.importFromDB()
-        dev.init(recreate=recreate)
-        self.addDevice(dev)
-        if not recreate:
-            dev.exportToDB()
-        dev.attach(recreate=recreate, change=change)
-        dev.exportToDB()
-
-        return dev
-
-    def configureDevice(self, id, config, change=False):
-        """Reconfigure an existing device.
-        May be defined in subclass."""
-        dev = self.getDevice(id, error=True)
-        dev.configure(config, change=change)
-
-    def destroyDevice(self, id, change=False, reboot=False):
-        """Destroy a device.
-        May be defined in subclass.
-
-        If reboot is true the device is being destroyed for a domain reboot.
-
-        The device is not deleted, since it may be recreated later.
-        """
-        dev = self.getDevice(id, error=True)
-        dev.destroy(change=change, reboot=reboot)
-        return dev
-
-    def deleteDevice(self, id, change=True):
-        """Destroy a device and delete it.
-        Normally called to remove a device from a domain at runtime.
-        """
-        dev = self.destroyDevice(id, change=change)
-        self.removeDevice(dev)
-
-    def destroyController(self, reboot=False):
-        """Destroy all devices and clean up.
-        May be defined in subclass.
-        If reboot is true the controller is being destroyed for a domain 
reboot.
-        Called at domain shutdown.
-        """
-        self.destroyed = True
-        self.destroyDevices(reboot=reboot)
-
-    
#----------------------------------------------------------------------------
-    
-    def isDestroyed(self):
-        return self.destroyed
-
-    def getDevice(self, id, error=False):
-        dev = self.devices.get(int(id))
-        if error and not dev:
-            raise XendError("invalid device id: " + str(id))
-        return dev
-
-    def getDeviceIds(self):
-        return [ dev.getId() for dev in self.device_order ]
-
-    def getDevices(self):
-        return self.device_order
-
-    def getDeviceConfig(self, id):
-        return self.getDevice(id).getConfig()
-
-    def getDeviceConfigs(self):
-        return [ dev.getConfig() for dev in self.device_order ]
-
-    def getDeviceSxprs(self):
-        return [ dev.sxpr() for dev in self.device_order ]
-
-    def addDevice(self, dev):
-        self.devices[dev.getId()] = dev
-        self.device_order.append(dev)
-        return dev
-
-    def removeDevice(self, dev):
-        if dev.getId() in self.devices:
-            del self.devices[dev.getId()]
-        if dev in self.device_order:
-            self.device_order.remove(dev)
-
-    def rebootDevices(self):
-        for dev in self.getDevices():
-            dev.reboot()
-
-    def destroyDevices(self, reboot=False):
-        """Destroy all devices.
-        """
-        for dev in self.getDevices():
-            dev.destroy(reboot=reboot)
-
-    def getMaxDeviceId(self):
-        maxid = 0
-        for id in self.devices:
-            if id > maxid:
-                maxid = id
-        return maxid
-
-    def nextDeviceId(self):
-        id = self.deviceId
-        self.deviceId += 1
-        return id
-
-    def getDeviceCount(self):
-        return len(self.devices)
-
-class Dev:
-    """Abstract class for a device attached to a device controller.
-
-    @ivar id:        identifier
-    @type id:        int
-    @ivar controller: device controller
-    @type controller: DevController
-    """
-    
-    # ./status       : need 2: actual and requested?
-    # down-down: initial.
-    # up-up: fully up.
-    # down-up: down requested, still up. Watch front and back, when both
-    # down go to down-down. But what if one (or both) is not connected?
-    # Still have front/back trees with status? Watch front/status, back/status?
-    # up-down: up requested, still down.
-    # Back-end watches ./status, front/status
-    # Front-end watches ./status, back/status
-    # i.e. each watches the other 2.
-    # Each is status/request status/actual?
-    #
-    # backend?
-    # frontend?
-
-    __exports__ = [
-        DBVar('id',        ty='int'),
-        DBVar('type',      ty='str'),
-        DBVar('config',    ty='sxpr'),
-        DBVar('destroyed', ty='bool'),
-        ]
-
-    def __init__(self, controller, id, config, recreate=False):
-        self.controller = controller
-        self.id = id
-        self.config = config
-        self.destroyed = False
-        self.type = self.getType()
-
-        self.db = controller.getDevDB(id)
-
-    def exportToDB(self, save=False):
-        self.db.exportToDB(self, fields=self.__exports__, save=save)
-
-    def importFromDB(self):
-        self.db.importFromDB(self, fields=self.__exports__)
-
-    def getDomain(self):
-        return self.controller.getDomain()
-
-    def getDomainName(self):
-        return self.controller.getDomainName()
-
-    def getDomainInfo(self):
-        return self.controller.getDomainInfo()
-    
-    def getController(self):
-        return self.controller
-
-    def getType(self):
-        return self.controller.getType()
-
-    def getId(self):
-        return self.id
-
-    def getConfig(self):
-        return self.config
-
-    def isDestroyed(self):
-        return self.destroyed
-
-    
#----------------------------------------------------------------------------
-    # Subclass interface.
-    # Define methods in subclass as needed.
-    # Redefinitions must have the same arguments.
-
-    def init(self, recreate=False, reboot=False):
-        """Initialization. Called on initial create (when reboot is False)
-        and on reboot (when reboot is True). When xend is restarting is
-        called with recreate True. Define in subclass if needed.
-
-        Device instance variables must be defined in the class constructor,
-        but given null or default values. The real values should be initialised
-        in this method. This allows devices to be re-initialised.
-
-        Since this can be called to re-initialise a device any state flags
-        should be reset.
-        """
-        self.destroyed = False
-
-    def attach(self, recreate=False, change=False):
-        """Attach the device to its front and back ends.
-        Define in subclass if needed.
-        """
-        pass
-
-    def reboot(self):
-        """Reconnect the device when the domain is rebooted.
-        """
-        self.init(reboot=True)
-        self.attach()
-
-    def sxpr(self):
-        """Get the s-expression for the deivice.
-        Implement in a subclass if needed.
-
-        @return: sxpr
-        """
-        return self.getConfig()
-
-    def configure(self, config, change=False):
-        """Reconfigure the device.
-
-        Implement in subclass.
-        """
-        raise NotImplementedError()
-
-    def refresh(self):
-        """Refresh the device..
-        Default no-op. Define in subclass if needed.
-        """
-        pass
-
-    def destroy(self, change=False, reboot=False):
-        """Destroy the device.
-        If change is True notify destruction (runtime change).
-        If reboot is True the device is being destroyed for a reboot.
-        Redefine in subclass if needed.
-
-        Called at domain shutdown and when a device is deleted from
-        a running domain (with change True).
-        """
-        self.destroyed = True
-        pass
-    
-    
#----------------------------------------------------------------------------

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>