WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 30 Aug 2006 22:11:03 +0000
Delivery-date: Wed, 30 Aug 2006 15:17:16 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User awilliam@xxxxxxxxxxx
# Node ID 684fdcfb251a443fa885c142b427d253ec033212
# Parent  896fcdd49c7ff59f7d28b6402fd4453e60c38232
# Parent  f790546ecfda03193a4b8983f7bb6b0f65924603
merge with xen-unstable.hg
---
 xen/arch/x86/shadow2-common.c                      | 3407 ---------------
 xen/arch/x86/shadow2.c                             | 4492 ---------------------
 xen/include/asm-x86/page-guest32.h                 |  105 
 xen/include/asm-x86/shadow2-multi.h                |  116 
 xen/include/asm-x86/shadow2-private.h              |  593 --
 xen/include/asm-x86/shadow2-types.h                |  692 ---
 xen/include/asm-x86/shadow2.h                      |  626 --
 docs/man/xend-config.sxp.pod.5                     |    2 
 docs/misc/xend.tex                                 |    4 
 docs/src/user.tex                                  |    4 
 tools/Makefile                                     |    1 
 tools/console/daemon/io.c                          |   18 
 tools/examples/vif-route                           |    6 
 tools/examples/xen-hotplug-common.sh               |    2 
 tools/examples/xen-network-common.sh               |   40 
 tools/examples/xend-config.sxp                     |    2 
 tools/firmware/hvmloader/smbios.c                  |   12 
 tools/firmware/hvmloader/util.c                    |   54 
 tools/firmware/hvmloader/util.h                    |   10 
 tools/ioemu/Makefile                               |    2 
 tools/ioemu/patches/qemu-logging                   |    1 
 tools/ioemu/patches/xen-build                      |   14 
 tools/ioemu/vl.c                                   |    2 
 tools/libxc/xc_hvm_build.c                         |    2 
 tools/misc/xend                                    |    2 
 tools/python/xen/util/bugtool.py                   |    4 
 tools/python/xen/xend/XendRoot.py                  |    2 
 tools/python/xen/xend/server/params.py             |    4 
 tools/security/python/xensec_gen/main.py           |    2 
 unmodified_drivers/linux-2.6/platform-pci/evtchn.c |    2 
 xen/arch/x86/Makefile                              |   21 
 xen/arch/x86/domain.c                              |   46 
 xen/arch/x86/domain_build.c                        |    8 
 xen/arch/x86/domctl.c                              |    2 
 xen/arch/x86/hvm/hvm.c                             |    6 
 xen/arch/x86/hvm/platform.c                        |    4 
 xen/arch/x86/hvm/svm/svm.c                         |  183 
 xen/arch/x86/hvm/svm/vmcb.c                        |    2 
 xen/arch/x86/hvm/vmx/vmcs.c                        |    4 
 xen/arch/x86/hvm/vmx/vmx.c                         |   20 
 xen/arch/x86/mm.c                                  |  142 
 xen/arch/x86/mm/Makefile                           |    1 
 xen/arch/x86/mm/shadow/Makefile                    |   15 
 xen/arch/x86/mm/shadow/common.c                    | 3407 +++++++++++++++
 xen/arch/x86/mm/shadow/multi.c                     | 4492 +++++++++++++++++++++
 xen/arch/x86/mm/shadow/multi.h                     |  116 
 xen/arch/x86/mm/shadow/page-guest32.h              |  105 
 xen/arch/x86/mm/shadow/private.h                   |  593 ++
 xen/arch/x86/mm/shadow/types.h                     |  692 +++
 xen/arch/x86/traps.c                               |    8 
 xen/include/asm-x86/domain.h                       |   18 
 xen/include/asm-x86/hvm/svm/vmcb.h                 |   45 
 xen/include/asm-x86/mm.h                           |   82 
 xen/include/asm-x86/perfc_defn.h                   |  102 
 xen/include/asm-x86/shadow.h                       |  614 ++
 55 files changed, 10488 insertions(+), 10463 deletions(-)

diff -r 896fcdd49c7f -r 684fdcfb251a docs/man/xend-config.sxp.pod.5
--- a/docs/man/xend-config.sxp.pod.5    Mon Aug 28 16:16:07 2006 -0600
+++ b/docs/man/xend-config.sxp.pod.5    Mon Aug 28 16:26:37 2006 -0600
@@ -23,7 +23,7 @@ The following lists the daemon configura
 =item I<logfile>
 
 The location of the file to record runtime log messages.  Defaults to
-I</var/log/xend.log>.
+I</var/log/xen/xend.log>.
 
 =item I<loglevel>
 
diff -r 896fcdd49c7f -r 684fdcfb251a docs/misc/xend.tex
--- a/docs/misc/xend.tex        Mon Aug 28 16:16:07 2006 -0600
+++ b/docs/misc/xend.tex        Mon Aug 28 16:26:37 2006 -0600
@@ -214,7 +214,7 @@ Configuration scripts ({\it e.g.} for ne
 Configuration scripts ({\it e.g.} for network-script) are looked for in {\tt 
/etc/xen}
 unless their name begins with '/'.
 
-Xend sends its log output to {\tt /var/log/xend.log}. This is a rotating 
logfile,
+Xend sends its log output to {\tt /var/log/xen/xend.log}. This is a rotating 
logfile,
 and logs are moved onto {\tt xend.log.1} {\it etc.} as they get large. Old 
logs may
 be deleted.
 
@@ -411,7 +411,7 @@ allows access to some debugging function
 \end{itemize}
 
 When tracing is on xend logs all functions calls and exceptions to
-{\tt /var/log/xend.trace}.
+{\tt /var/log/xen/xend.trace}.
 
 \begin{thebibliography}{99}
 
diff -r 896fcdd49c7f -r 684fdcfb251a docs/src/user.tex
--- a/docs/src/user.tex Mon Aug 28 16:16:07 2006 -0600
+++ b/docs/src/user.tex Mon Aug 28 16:26:37 2006 -0600
@@ -973,8 +973,8 @@ using the \texttt{xm} tool.
 
 \subsection{Logging}
 
-As \xend\ runs, events will be logged to \path{/var/log/xend.log} and
-(less frequently) to \path{/var/log/xend-debug.log}. These, along with
+As \xend\ runs, events will be logged to \path{/var/log/xen/xend.log} and
+(less frequently) to \path{/var/log/xen/xend-debug.log}. These, along with
 the standard syslog files, are useful when troubleshooting problems.
 
 \subsection{Configuring \Xend\ }
diff -r 896fcdd49c7f -r 684fdcfb251a tools/Makefile
--- a/tools/Makefile    Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/Makefile    Mon Aug 28 16:26:37 2006 -0600
@@ -39,6 +39,7 @@ install: check
        done
        $(MAKE) ioemuinstall
        $(INSTALL_DIR) -p $(DESTDIR)/var/xen/dump
+       $(INSTALL_DIR) -p $(DESTDIR)/var/log/xen
 
 .PHONY: clean
 clean: check_clean
diff -r 896fcdd49c7f -r 684fdcfb251a tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/console/daemon/io.c Mon Aug 28 16:26:37 2006 -0600
@@ -584,16 +584,14 @@ void handle_io(void)
                            FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds))
                                handle_ring_read(d);
 
-                       if (d->tty_fd != -1) {
-                               if (FD_ISSET(d->tty_fd, &readfds))
-                                       handle_tty_read(d);
-
-                               if (FD_ISSET(d->tty_fd, &writefds))
-                                       handle_tty_write(d);
-
-                               if (d->is_dead)
-                                       cleanup_domain(d);
-                       }
+                       if (d->tty_fd != -1 && FD_ISSET(d->tty_fd, &readfds))
+                               handle_tty_read(d);
+
+                       if (d->tty_fd != -1 && FD_ISSET(d->tty_fd, &writefds))
+                               handle_tty_write(d);
+
+                       if (d->is_dead)
+                               cleanup_domain(d);
                }
        } while (ret > -1);
 }
diff -r 896fcdd49c7f -r 684fdcfb251a tools/examples/vif-route
--- a/tools/examples/vif-route  Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/examples/vif-route  Mon Aug 28 16:26:37 2006 -0600
@@ -30,10 +30,12 @@ case "$command" in
         ifconfig ${vif} ${main_ip} netmask 255.255.255.255 up
         echo 1 >/proc/sys/net/ipv4/conf/${vif}/proxy_arp
         ipcmd='add'
+        cmdprefix=''
         ;;
     offline)
-        ifdown ${vif}
+        do_without_error ifdown ${vif}
         ipcmd='del'
+        cmdprefix='do_without_error'
         ;;
 esac
 
@@ -41,7 +43,7 @@ if [ "${ip}" ] ; then
     # If we've been given a list of IP addresses, then add routes from dom0 to
     # the guest using those addresses.
     for addr in ${ip} ; do
-      ip route ${ipcmd} ${addr} dev ${vif} src ${main_ip}
+      ${cmdprefix} ip route ${ipcmd} ${addr} dev ${vif} src ${main_ip}
     done 
 fi
 
diff -r 896fcdd49c7f -r 684fdcfb251a tools/examples/xen-hotplug-common.sh
--- a/tools/examples/xen-hotplug-common.sh      Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/examples/xen-hotplug-common.sh      Mon Aug 28 16:26:37 2006 -0600
@@ -21,7 +21,7 @@ dir=$(dirname "$0")
 . "$dir/xen-script-common.sh"
 . "$dir/locking.sh"
 
-exec 2>>/var/log/xen-hotplug.log
+exec 2>>/var/log/xen/xen-hotplug.log
 
 export PATH="/sbin:/bin:/usr/bin:/usr/sbin:$PATH"
 export LANG="POSIX"
diff -r 896fcdd49c7f -r 684fdcfb251a tools/examples/xen-network-common.sh
--- a/tools/examples/xen-network-common.sh      Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/examples/xen-network-common.sh      Mon Aug 28 16:26:37 2006 -0600
@@ -44,34 +44,18 @@ then
   }
 elif ! which ifup >/dev/null 2>/dev/null
 then
-  if [ -e /etc/conf.d/net ]
-  then
-    preiftransfer()
-    {
-      true
-    }
-    ifup()
-    {
-      /etc/init.d/net.$1 start
-    }
-    ifdown()
-    {
-      /etc/init.d/net.$1 stop
-    }
-  else
-    preiftransfer()
-    {
-      true
-    }
-    ifup()
-    {
-      false
-    }
-    ifdown()
-    {
-      false
-    }
-  fi
+  preiftransfer()
+  {
+    true
+  }
+  ifup()
+  {
+    false
+  }
+  ifdown()
+  {
+    false
+  }
 else
   preiftransfer()
   {
diff -r 896fcdd49c7f -r 684fdcfb251a tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp    Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/examples/xend-config.sxp    Mon Aug 28 16:26:37 2006 -0600
@@ -11,7 +11,7 @@
 # Commented out entries show the default for that entry, unless otherwise
 # specified.
 
-#(logfile /var/log/xend.log)
+#(logfile /var/log/xen/xend.log)
 #(loglevel DEBUG)
 
 #(xend-http-server no)
diff -r 896fcdd49c7f -r 684fdcfb251a tools/firmware/hvmloader/smbios.c
--- a/tools/firmware/hvmloader/smbios.c Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/firmware/hvmloader/smbios.c Mon Aug 28 16:26:37 2006 -0600
@@ -116,8 +116,10 @@ smbios_table_size(uint32_t vcpus, const 
 
        /* type 0: "Xen", xen_version, and release_date */
        size += strlen("Xen") + strlen(xen_version) + 2;
-       /* type 1: "Xen", xen_version, "HVM domU" */
-       size += strlen("Xen") + strlen("HVM domU") + strlen(xen_version) + 3;
+       /* type 1: "Xen", xen_version, "HVM domU", UUID as string for 
+                   serial number */
+       size += strlen("Xen") + strlen("HVM domU") + strlen(xen_version) +
+                       36 + 4;
        /* type 3: "Xen" */
        size += strlen("Xen") + 1;
        /* type 4: socket designation ("CPU n"), processor_manufacturer */
@@ -371,6 +373,7 @@ smbios_type_1_init(void *start, const ch
 smbios_type_1_init(void *start, const char *xen_version, 
                   uint8_t uuid[16])
 {
+       char uuid_str[37];
        struct smbios_type_1 *p = (struct smbios_type_1 *)start;
        p->header.type = 1;
        p->header.length = sizeof(struct smbios_type_1);
@@ -379,7 +382,7 @@ smbios_type_1_init(void *start, const ch
        p->manufacturer_str = 1;
        p->product_name_str = 2;
        p->version_str = 3;
-       p->serial_number_str = 0;
+       p->serial_number_str = 4;
     
        memcpy(p->uuid, uuid, 16);
 
@@ -395,6 +398,9 @@ smbios_type_1_init(void *start, const ch
        start += strlen("HVM domU") + 1;
        strcpy((char *)start, xen_version);
        start += strlen(xen_version) + 1;
+       uuid_to_string(uuid_str, uuid); 
+       strcpy((char *)start, uuid_str);
+       start += strlen(uuid_str) + 1;
        *((uint8_t *)start) = 0;
     
        return start+1; 
diff -r 896fcdd49c7f -r 684fdcfb251a tools/firmware/hvmloader/util.c
--- a/tools/firmware/hvmloader/util.c   Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/firmware/hvmloader/util.c   Mon Aug 28 16:26:37 2006 -0600
@@ -174,3 +174,57 @@ cpuid(uint32_t idx, uint32_t *eax, uint3
                : "0" (idx) );
 }
 
+/* Write a two-character hex representation of 'byte' to digits[].
+   Pre-condition: sizeof(digits) >= 2 */
+void
+byte_to_hex(char *digits, uint8_t byte)
+{
+       uint8_t nybbel = byte >> 4;
+
+       if (nybbel > 9)
+               digits[0] = 'a' + nybbel-10;
+       else
+               digits[0] = '0' + nybbel;
+
+       nybbel = byte & 0x0f;
+       if (nybbel > 9)
+               digits[1] = 'a' + nybbel-10;
+       else
+               digits[1] = '0' + nybbel;
+}
+
+/* Convert an array of 16 unsigned bytes to a DCE/OSF formatted UUID
+   string.
+
+   Pre-condition: sizeof(dest) >= 37 */
+void
+uuid_to_string(char *dest, uint8_t *uuid)
+{
+       int i = 0;
+       char *p = dest;
+
+       for (i = 0; i < 4; ++i) {
+               byte_to_hex(p, uuid[i]);
+               p += 2;
+       }
+       *p++ = '-';
+       for (i = 4; i < 6; ++i) {
+               byte_to_hex(p, uuid[i]);
+               p += 2;
+       }
+       *p++ = '-';
+       for (i = 6; i < 8; ++i) {
+               byte_to_hex(p, uuid[i]);
+               p += 2;
+       }
+       *p++ = '-';
+       for (i = 8; i < 10; ++i) {
+               byte_to_hex(p, uuid[i]);
+               p += 2;
+       }
+       *p++ = '-';
+       for (i = 10; i < 16; ++i) {
+               byte_to_hex(p, uuid[i]);
+               p += 2;
+       }
+}
diff -r 896fcdd49c7f -r 684fdcfb251a tools/firmware/hvmloader/util.h
--- a/tools/firmware/hvmloader/util.h   Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/firmware/hvmloader/util.h   Mon Aug 28 16:26:37 2006 -0600
@@ -25,6 +25,16 @@ void *memset(void *s, int c, unsigned n)
 void *memset(void *s, int c, unsigned n);
 char *itoa(char *a, unsigned int i);
 
+/* convert a byte to two lowercase hex digits, with no terminating NUL 
+   character.  digits[] must have at least two elements. */
+void byte_to_hex(char *digits, uint8_t byte);
+
+/* Convert an array of 16 unsigned bytes to a DCE/OSF formatted UUID
+   string.
+
+   Pre-condition: sizeof(dest) >= 37 */
+void uuid_to_string(char *dest, uint8_t *uuid);
+
 /* Debug output */
 void puts(const char *s);
 
diff -r 896fcdd49c7f -r 684fdcfb251a tools/ioemu/Makefile
--- a/tools/ioemu/Makefile      Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/ioemu/Makefile      Mon Aug 28 16:26:37 2006 -0600
@@ -94,7 +94,7 @@ test speed test2: all
        $(MAKE) -C tests $@
 
 TAGS: 
-       etags *.[ch] tests/*.[ch]
+       etags *.[ch] target-i386-dm/*.[ch] hw/*.[ch]
 
 cscope:
        rm -f ./cscope.*
diff -r 896fcdd49c7f -r 684fdcfb251a tools/ioemu/patches/qemu-logging
--- a/tools/ioemu/patches/qemu-logging  Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/ioemu/patches/qemu-logging  Mon Aug 28 16:26:37 2006 -0600
@@ -43,7 +43,7 @@ Index: ioemu/vl.c
      /* default mac address of the first network interface */
      
 +    /* init debug */
-+    sprintf(qemu_dm_logfilename, "/var/log/qemu-dm.%d.log", getpid());
++    sprintf(qemu_dm_logfilename, "/var/log/xen/qemu-dm.%d.log", getpid());
 +    cpu_set_log_filename(qemu_dm_logfilename);
 +    cpu_set_log(0);
 +    
diff -r 896fcdd49c7f -r 684fdcfb251a tools/ioemu/patches/xen-build
--- a/tools/ioemu/patches/xen-build     Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/ioemu/patches/xen-build     Mon Aug 28 16:26:37 2006 -0600
@@ -1,7 +1,7 @@ Index: ioemu/Makefile
 Index: ioemu/Makefile
 ===================================================================
---- ioemu.orig/Makefile        2006-08-06 02:03:44.915543858 +0100
-+++ ioemu/Makefile     2006-08-06 02:11:33.461331417 +0100
+--- ioemu.orig/Makefile        2006-08-28 20:19:23.000000000 +0100
++++ ioemu/Makefile     2006-08-28 20:20:08.000000000 +0100
 @@ -1,11 +1,14 @@
  # Makefile for QEMU.
  
@@ -60,6 +60,15 @@ Index: ioemu/Makefile
  ifndef CONFIG_WIN32
        mkdir -p "$(DESTDIR)$(datadir)/keymaps"
        for x in $(KEYMAPS); do \
+@@ -89,7 +94,7 @@
+       $(MAKE) -C tests $@
+ 
+ TAGS: 
+-      etags *.[ch] tests/*.[ch]
++      etags *.[ch] target-i386-dm/*.[ch] hw/*.[ch]
+ 
+ cscope:
+       rm -f ./cscope.*
 @@ -107,11 +112,11 @@
        texi2dvi $<
  
@@ -76,8 +85,8 @@ Index: ioemu/Makefile
  info: qemu-doc.info qemu-tech.info
 Index: ioemu/Makefile.target
 ===================================================================
---- ioemu.orig/Makefile.target 2006-08-06 02:03:44.922543079 +0100
-+++ ioemu/Makefile.target      2006-08-06 02:09:22.320951557 +0100
+--- ioemu.orig/Makefile.target 2006-08-28 20:19:23.000000000 +0100
++++ ioemu/Makefile.target      2006-08-28 20:19:47.000000000 +0100
 @@ -1,5 +1,8 @@
  include config.mak
  
@@ -149,8 +158,8 @@ Index: ioemu/Makefile.target
  include .depend
 Index: ioemu/configure
 ===================================================================
---- ioemu.orig/configure       2006-08-06 02:03:45.783447220 +0100
-+++ ioemu/configure    2006-08-06 02:09:41.076860544 +0100
+--- ioemu.orig/configure       2006-08-28 20:19:23.000000000 +0100
++++ ioemu/configure    2006-08-28 20:19:47.000000000 +0100
 @@ -18,8 +18,8 @@
  
  # default parameters
diff -r 896fcdd49c7f -r 684fdcfb251a tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/ioemu/vl.c  Mon Aug 28 16:26:37 2006 -0600
@@ -5924,7 +5924,7 @@ int main(int argc, char **argv)
     /* default mac address of the first network interface */
     
     /* init debug */
-    sprintf(qemu_dm_logfilename, "/var/log/qemu-dm.%d.log", getpid());
+    sprintf(qemu_dm_logfilename, "/var/log/xen/qemu-dm.%d.log", getpid());
     cpu_set_log_filename(qemu_dm_logfilename);
     cpu_set_log(0);
     
diff -r 896fcdd49c7f -r 684fdcfb251a tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/libxc/xc_hvm_build.c        Mon Aug 28 16:26:37 2006 -0600
@@ -441,7 +441,7 @@ static int xc_hvm_build_internal(int xc_
         goto error_out;
     }
 
-    /* HVM domains must be put into shadow2 mode at the start of day */
+    /* HVM domains must be put into shadow mode at the start of day */
     if ( xc_shadow_control(xc_handle, domid, XEN_DOMCTL_SHADOW_OP_ENABLE,
                            NULL, 0, NULL, 
                            XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT  |
diff -r 896fcdd49c7f -r 684fdcfb251a tools/misc/xend
--- a/tools/misc/xend   Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/misc/xend   Mon Aug 28 16:26:37 2006 -0600
@@ -86,7 +86,7 @@ def start_xenstored():
     XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
     cmd = "xenstored --pid-file /var/run/xenstore.pid"
     if XENSTORED_TRACE:
-        cmd += " -T /var/log/xenstored-trace.log"
+        cmd += " -T /var/log/xen/xenstored-trace.log"
     s,o = commands.getstatusoutput(cmd)
 
 def start_consoled():
diff -r 896fcdd49c7f -r 684fdcfb251a tools/python/xen/util/bugtool.py
--- a/tools/python/xen/util/bugtool.py  Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/python/xen/util/bugtool.py  Mon Aug 28 16:26:37 2006 -0600
@@ -43,8 +43,8 @@ TITLE_RE = re.compile(r'<title>(.*)</tit
 
 FILES_TO_SEND = [ '/var/log/' + x for x in 
                   [ 'syslog', 'messages', 'debug',
-                    'xend.log', 'xend-debug.log', 'xenstored-trace.log',
-                    'xen-hotplug.log' ] ]
+                    'xen/xend.log', 'xen/xend-debug.log', 
'xen/xenstored-trace.log',
+                    'xen/xen-hotplug.log' ] ]
 #FILES_TO_SEND = [  ]
 
 
diff -r 896fcdd49c7f -r 684fdcfb251a tools/python/xen/xend/XendRoot.py
--- a/tools/python/xen/xend/XendRoot.py Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/python/xen/xend/XendRoot.py Mon Aug 28 16:26:37 2006 -0600
@@ -52,7 +52,7 @@ class XendRoot:
     block_script_dir = "/etc/xen/scripts"
 
     """Default path to the log file. """
-    logfile_default = "/var/log/xend.log"
+    logfile_default = "/var/log/xen/xend.log"
 
     """Default level of information to be logged."""
     loglevel_default = 'DEBUG'
diff -r 896fcdd49c7f -r 684fdcfb251a tools/python/xen/xend/server/params.py
--- a/tools/python/xen/xend/server/params.py    Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/python/xen/xend/server/params.py    Mon Aug 28 16:26:37 2006 -0600
@@ -39,8 +39,8 @@ def getenv(var, val, conv=None):
 
 # The following parameters could be placed in a configuration file.
 XEND_PID_FILE      = '/var/run/xend.pid'
-XEND_TRACE_FILE    = '/var/log/xend.trace'
-XEND_DEBUG_LOG     = '/var/log/xend-debug.log'
+XEND_TRACE_FILE    = '/var/log/xen/xend.trace'
+XEND_DEBUG_LOG     = '/var/log/xen/xend-debug.log'
 XEND_USER          = 'root'
 XEND_DEBUG         = getenv("XEND_DEBUG",     0, conv=int)
 XEND_DAEMONIZE     = getenv("XEND_DAEMONIZE", not XEND_DEBUG, conv=int)
diff -r 896fcdd49c7f -r 684fdcfb251a tools/security/python/xensec_gen/main.py
--- a/tools/security/python/xensec_gen/main.py  Mon Aug 28 16:16:07 2006 -0600
+++ b/tools/security/python/xensec_gen/main.py  Mon Aug 28 16:26:37 2006 -0600
@@ -34,7 +34,7 @@ import CGIHTTPServer
 
 gHttpPort = 7777
 gHttpDir  = '/var/lib/xensec_gen'
-gLogFile  = '/var/log/xensec_gen.log'
+gLogFile  = '/var/log/xen/xensec_gen.log'
 gUser     = 'nobody'
 gGroup    = 'nobody'
 
diff -r 896fcdd49c7f -r 684fdcfb251a 
unmodified_drivers/linux-2.6/platform-pci/evtchn.c
--- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c        Mon Aug 28 
16:16:07 2006 -0600
+++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c        Mon Aug 28 
16:26:37 2006 -0600
@@ -4,7 +4,7 @@
  * A simplified event channel for para-drivers in unmodified linux
  *
  * Copyright (c) 2002-2005, K A Fraser
- * Copyright (c) 2005, <xiaofeng.ling@xxxxxxxxx>
+ * Copyright (c) 2005, Intel Corporation <xiaofeng.ling@xxxxxxxxx>
  *
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/Makefile     Mon Aug 28 16:26:37 2006 -0600
@@ -2,6 +2,7 @@ subdir-y += cpu
 subdir-y += cpu
 subdir-y += genapic
 subdir-y += hvm
+subdir-y += mm
 subdir-y += oprofile
 
 subdir-$(x86_32) += x86_32
@@ -41,23 +42,6 @@ obj-y += usercopy.o
 obj-y += usercopy.o
 obj-y += x86_emulate.o
 
-ifneq ($(pae),n)
-obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s3.o shadow2_g3_on_s3.o
-else
-obj-$(x86_32) += shadow2-common.o shadow2_g2_on_s2.o
-endif
-
-obj-$(x86_64) += shadow2-common.o shadow2_g4_on_s4.o shadow2_g3_on_s3.o \
-                 shadow2_g2_on_s3.o
-
-guest_levels  = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(subst 
shadow2_,,$(1))))))
-shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(subst 
shadow2_,,$(1))))))
-shadow2_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \
-                -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1))
-
-shadow2_%.o: shadow2.c $(HDRS) Makefile
-       $(CC) $(CFLAGS) $(call shadow2_defns,$(@F)) -c $< -o $@
-
 obj-$(crash_debug) += gdbstub.o
 
 $(TARGET): $(TARGET)-syms boot/mkelf32
@@ -86,9 +70,6 @@ boot/mkelf32: boot/mkelf32.c
 boot/mkelf32: boot/mkelf32.c
        $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
 
-shadow_guest32.o: shadow.c
-shadow_guest32pae.o: shadow.c
-
 .PHONY: clean
 clean::
        rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/domain.c     Mon Aug 28 16:26:37 2006 -0600
@@ -200,12 +200,12 @@ int arch_domain_create(struct domain *d)
 
 #endif /* __x86_64__ */
 
-    shadow2_lock_init(d);
-    for ( i = 0; i <= SHADOW2_MAX_ORDER; i++ )
-        INIT_LIST_HEAD(&d->arch.shadow2.freelists[i]);
-    INIT_LIST_HEAD(&d->arch.shadow2.p2m_freelist);
-    INIT_LIST_HEAD(&d->arch.shadow2.p2m_inuse);
-    INIT_LIST_HEAD(&d->arch.shadow2.toplevel_shadows);
+    shadow_lock_init(d);
+    for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
+        INIT_LIST_HEAD(&d->arch.shadow.freelists[i]);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist);
+    INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse);
+    INIT_LIST_HEAD(&d->arch.shadow.toplevel_shadows);
 
     if ( !is_idle_domain(d) )
     {
@@ -236,7 +236,7 @@ int arch_domain_create(struct domain *d)
 
 void arch_domain_destroy(struct domain *d)
 {
-    shadow2_final_teardown(d);
+    shadow_final_teardown(d);
 
     free_xenheap_pages(
         d->arch.mm_perdomain_pt,
@@ -342,10 +342,10 @@ int arch_set_info_guest(
         }
     }    
 
-    /* Shadow2: make sure the domain has enough shadow memory to
+    /* Shadow: make sure the domain has enough shadow memory to
      * boot another vcpu */
-    if ( shadow2_mode_enabled(d) 
-         && d->arch.shadow2.total_pages < shadow2_min_acceptable_pages(d) )
+    if ( shadow_mode_enabled(d) 
+         && d->arch.shadow.total_pages < shadow_min_acceptable_pages(d) )
     {
         destroy_gdt(v);
         return -ENOMEM;
@@ -357,8 +357,8 @@ int arch_set_info_guest(
     /* Don't redo final setup */
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
-    if ( shadow2_mode_enabled(d) )
-        shadow2_update_paging_modes(v);
+    if ( shadow_mode_enabled(d) )
+        shadow_update_paging_modes(v);
 
     update_cr3(v);
 
@@ -936,11 +936,11 @@ void domain_relinquish_resources(struct 
     for_each_vcpu ( d, v )
     {
         /* Drop ref to guest_table (from new_guest_cr3(), svm/vmx cr3 handling,
-         * or sh2_update_paging_modes()) */
+         * or sh_update_paging_modes()) */
         pfn = pagetable_get_pfn(v->arch.guest_table);
         if ( pfn != 0 )
         {
-            if ( shadow2_mode_refcounts(d) )
+            if ( shadow_mode_refcounts(d) )
                 put_page(mfn_to_page(pfn));
             else
                 put_page_and_type(mfn_to_page(pfn));
@@ -962,7 +962,7 @@ void domain_relinquish_resources(struct 
         hvm_relinquish_guest_resources(d);
 
     /* Tear down shadow mode stuff. */
-    shadow2_teardown(d);
+    shadow_teardown(d);
 
     /*
      * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
@@ -981,18 +981,18 @@ void domain_relinquish_resources(struct 
 
 void arch_dump_domain_info(struct domain *d)
 {
-    if ( shadow2_mode_enabled(d) )
-    {
-        printk("    shadow2 mode: ");
-        if ( d->arch.shadow2.mode & SHM2_enable )
+    if ( shadow_mode_enabled(d) )
+    {
+        printk("    shadow mode: ");
+        if ( d->arch.shadow.mode & SHM2_enable )
             printk("enabled ");
-        if ( shadow2_mode_refcounts(d) )
+        if ( shadow_mode_refcounts(d) )
             printk("refcounts ");
-        if ( shadow2_mode_log_dirty(d) )
+        if ( shadow_mode_log_dirty(d) )
             printk("log_dirty ");
-        if ( shadow2_mode_translate(d) )
+        if ( shadow_mode_translate(d) )
             printk("translate ");
-        if ( shadow2_mode_external(d) )
+        if ( shadow_mode_external(d) )
             printk("external ");
         printk("\n");
     }
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/domain_build.c       Mon Aug 28 16:26:37 2006 -0600
@@ -679,8 +679,8 @@ int construct_dom0(struct domain *d,
         (void)alloc_vcpu(d, i, i);
 
     /* Set up CR3 value for write_ptbase */
-    if ( shadow2_mode_enabled(v->domain) )
-        shadow2_update_paging_modes(v);
+    if ( shadow_mode_enabled(v->domain) )
+        shadow_update_paging_modes(v);
     else
         update_cr3(v);
 
@@ -791,8 +791,8 @@ int construct_dom0(struct domain *d,
     new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
 
     if ( opt_dom0_shadow )
-        if ( shadow2_test_enable(d) == 0 ) 
-            shadow2_update_paging_modes(v);
+        if ( shadow_test_enable(d) == 0 ) 
+            shadow_update_paging_modes(v);
 
     if ( supervisor_mode_kernel )
     {
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/domctl.c     Mon Aug 28 16:26:37 2006 -0600
@@ -39,7 +39,7 @@ long arch_do_domctl(
         d = find_domain_by_id(domctl->domain);
         if ( d != NULL )
         {
-            ret = shadow2_domctl(d, &domctl->u.shadow_op, u_domctl);
+            ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl);
             put_domain(d);
             copy_to_guest(u_domctl, domctl, 1);
         } 
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Mon Aug 28 16:26:37 2006 -0600
@@ -384,8 +384,8 @@ int hvm_copy(void *buf, unsigned long va
         if (count > size)
             count = size;
 
-        gfn = shadow2_gva_to_gfn(v, vaddr);
-        mfn = mfn_x(sh2_vcpu_gfn_to_mfn(v, gfn));
+        gfn = shadow_gva_to_gfn(v, vaddr);
+        mfn = mfn_x(sh_vcpu_gfn_to_mfn(v, gfn));
 
         if (mfn == INVALID_MFN)
             return 0;
@@ -539,7 +539,7 @@ void hvm_do_hypercall(struct cpu_user_re
         return;
     }
 
-    if ( current->arch.shadow2.mode->guest_levels == 4 )
+    if ( current->arch.shadow.mode->guest_levels == 4 )
     {
         pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
                                                        pregs->rsi,
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/hvm/platform.c       Mon Aug 28 16:26:37 2006 -0600
@@ -721,7 +721,7 @@ void send_pio_req(struct cpu_user_regs *
 
     if (pvalid) {
         if (hvm_paging_enabled(current))
-            p->u.data = shadow2_gva_to_gpa(current, value);
+            p->u.data = shadow_gva_to_gpa(current, value);
         else
             p->u.pdata = (void *) value; /* guest VA == guest PA */
     } else
@@ -771,7 +771,7 @@ void send_mmio_req(
 
     if (pvalid) {
         if (hvm_paging_enabled(v))
-            p->u.data = shadow2_gva_to_gpa(v, value);
+            p->u.data = shadow_gva_to_gpa(v, value);
         else
             p->u.pdata = (void *) value; /* guest VA == guest PA */
     } else
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Mon Aug 28 16:26:37 2006 -0600
@@ -29,7 +29,7 @@
 #include <xen/domain_page.h>
 #include <asm/current.h>
 #include <asm/io.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
 #include <asm/regs.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
@@ -402,6 +402,50 @@ static inline int long_mode_do_msr_write
     }
     return 1;
 }
+
+
+#define loaddebug(_v,_reg) \
+    __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
+#define savedebug(_v,_reg) \
+    __asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" 
((_v)->debugreg[_reg]))
+
+
+static inline void svm_save_dr(struct vcpu *v)
+{
+    if (v->arch.hvm_vcpu.flag_dr_dirty)
+    {
+        /* clear the DR dirty flag and re-enable intercepts for DR accesses */ 
+        v->arch.hvm_vcpu.flag_dr_dirty = 0;
+        v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
+
+        savedebug(&v->arch.guest_context, 0);    
+        savedebug(&v->arch.guest_context, 1);    
+        savedebug(&v->arch.guest_context, 2);    
+        savedebug(&v->arch.guest_context, 3);    
+    }
+}
+
+
+static inline void __restore_debug_registers(struct vcpu *v)
+{
+    loaddebug(&v->arch.guest_context, 0);
+    loaddebug(&v->arch.guest_context, 1);
+    loaddebug(&v->arch.guest_context, 2);
+    loaddebug(&v->arch.guest_context, 3);
+}
+
+
+static inline void svm_restore_dr(struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if (!vmcb)
+        return;
+
+    if (unlikely(vmcb->dr7 & 0xFF))
+        __restore_debug_registers(v);
+}
+
 
 static int svm_realmode(struct vcpu *v)
 {
@@ -717,6 +761,7 @@ static void svm_ctxt_switch_from(struct 
 static void svm_ctxt_switch_from(struct vcpu *v)
 {
     svm_freeze_time(v);
+    svm_save_dr(v);
 }
 
 static void svm_ctxt_switch_to(struct vcpu *v)
@@ -732,6 +777,7 @@ static void svm_ctxt_switch_to(struct vc
     set_segment_register(es, 0);
     set_segment_register(ss, 0);
 #endif
+    svm_restore_dr(v);
 }
 
 
@@ -746,10 +792,10 @@ static void svm_final_setup_guest(struct
     if ( v != d->vcpu[0] )
         return;
 
-    if ( !shadow2_mode_external(d) )
+    if ( !shadow_mode_external(d) )
     {
         DPRINTK("Can't init HVM for dom %u vcpu %u: "
-                "not in shadow2 external mode\n", d->domain_id, v->vcpu_id);
+                "not in shadow external mode\n", d->domain_id, v->vcpu_id);
         domain_crash(d);
     }
 
@@ -914,7 +960,7 @@ static int svm_do_page_fault(unsigned lo
                 va, eip, (unsigned long)regs->error_code);
 //#endif
 
-    result = shadow2_fault(va, regs); 
+    result = shadow_fault(va, regs); 
 
     if( result ) {
         /* Let's make sure that the Guest TLB is flushed */
@@ -1183,55 +1229,16 @@ static inline void set_reg(unsigned int 
 }
                            
 
-static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
-                           struct cpu_user_regs *regs)
-{
-    unsigned long *reg_p = 0;
-    unsigned int gpreg = 0;
-    unsigned long eip;
-    int inst_len;
-    int index;
-    struct vmcb_struct *vmcb;
-    u8 buffer[MAX_INST_LEN];
-    u8 prefix = 0;
-
-    vmcb = v->arch.hvm_svm.vmcb;
-    
-    ASSERT(vmcb);
-
-    eip = vmcb->rip;
-    inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
-    index = skip_prefix_bytes(buffer, sizeof(buffer));
-    
-    ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
-
-    if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
-        prefix = buffer[index-1];
-
-    gpreg = decode_src_reg(prefix, buffer[index + 2]);
-    ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
-
-    HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
-                eip, reg, gpreg);
-
-    reg_p = get_reg_p(gpreg, regs, vmcb);
-        
-    switch (type) 
-    {
-    case TYPE_MOV_TO_DR: 
-        inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
-        v->arch.guest_context.debugreg[reg] = *reg_p;
-        break;
-    case TYPE_MOV_FROM_DR:
-        inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
-        *reg_p = v->arch.guest_context.debugreg[reg];
-        break;
-    default:
-        __hvm_bug(regs);
-        break;
-    }
-    ASSERT(inst_len > 0);
-    __update_guest_eip(vmcb, inst_len);
+static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    v->arch.hvm_vcpu.flag_dr_dirty = 1;
+
+    __restore_debug_registers(v);
+
+    /* allow the guest full access to the debug registers */
+    vmcb->dr_intercepts = 0;
 }
 
 
@@ -1562,7 +1569,7 @@ static int svm_set_cr0(unsigned long val
         v->arch.guest_table = pagetable_from_pfn(mfn);
         if ( old_base_mfn )
             put_page(mfn_to_page(old_base_mfn));
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
                     (unsigned long) (mfn << PAGE_SHIFT));
@@ -1588,14 +1595,14 @@ static int svm_set_cr0(unsigned long val
             svm_inject_exception(v, TRAP_gp_fault, 1, 0);
             return 0;
         }
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
         vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
     else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
     {
         /* we should take care of this kind of situation */
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
         vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
         set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
     }
@@ -1706,7 +1713,7 @@ static int mov_to_cr(int gpreg, int cr, 
             mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
             if (mfn != pagetable_get_pfn(v->arch.guest_table))
                 __hvm_bug(regs);
-            shadow2_update_cr3(v);
+            shadow_update_cr3(v);
         }
         else 
         {
@@ -1771,7 +1778,7 @@ static int mov_to_cr(int gpreg, int cr, 
                 v->arch.guest_table = pagetable_from_pfn(mfn);
                 if ( old_base_mfn )
                     put_page(mfn_to_page(old_base_mfn));
-                shadow2_update_paging_modes(v);
+                shadow_update_paging_modes(v);
 
                 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                             (unsigned long) (mfn << PAGE_SHIFT));
@@ -1808,7 +1815,7 @@ static int mov_to_cr(int gpreg, int cr, 
         if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
         {
             set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-            shadow2_update_paging_modes(v);
+            shadow_update_paging_modes(v);
         }
         break;
     }
@@ -2149,7 +2156,7 @@ void svm_handle_invlpg(const short invlp
 
     /* Overkill, we may not this */
     set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
-    shadow2_invlpg(v, g_vaddr);
+    shadow_invlpg(v, g_vaddr);
 }
 
 
@@ -2520,7 +2527,7 @@ void walk_shadow_and_guest_pt(unsigned l
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     unsigned long gpa;
 
-    gpa = shadow2_gva_to_gpa(current, gva);
+    gpa = shadow_gva_to_gpa(current, gva);
     printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
     if( !svm_paging_enabled(v) || mmio_space(gpa) )
         return;
@@ -2591,7 +2598,7 @@ asmlinkage void svm_vmexit_handler(struc
         if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 
         {
             if (svm_paging_enabled(v) && 
-                !mmio_space(shadow2_gva_to_gpa(current, vmcb->exitinfo2)))
+                !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2)))
             {
                 printk("I%08ld,ExC=%s(%d),IP=%x:%llx,"
                        "I1=%llx,I2=%llx,INT=%llx, "
@@ -2601,7 +2608,7 @@ asmlinkage void svm_vmexit_handler(struc
                        (unsigned long long) vmcb->exitinfo1,
                        (unsigned long long) vmcb->exitinfo2,
                        (unsigned long long) vmcb->exitintinfo.bytes,
-                       (unsigned long long) shadow2_gva_to_gpa(current, 
vmcb->exitinfo2));
+                       (unsigned long long) shadow_gva_to_gpa(current, 
vmcb->exitinfo2));
             }
             else 
             {
@@ -2862,53 +2869,9 @@ asmlinkage void svm_vmexit_handler(struc
     case VMEXIT_CR8_WRITE:
         svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
         break;
-
-    case VMEXIT_DR0_READ:
-        svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
-        break;
-
-    case VMEXIT_DR1_READ:
-        svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
-        break;
-
-    case VMEXIT_DR2_READ:
-        svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
-        break;
-
-    case VMEXIT_DR3_READ:
-        svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
-        break;
-
-    case VMEXIT_DR6_READ:
-        svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
-        break;
-
-    case VMEXIT_DR7_READ:
-        svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
-        break;
-
-    case VMEXIT_DR0_WRITE:
-        svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
-        break;
-
-    case VMEXIT_DR1_WRITE:
-        svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
-        break;
-
-    case VMEXIT_DR2_WRITE:
-        svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
-        break;
-
-    case VMEXIT_DR3_WRITE:
-        svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
-        break;
-
-    case VMEXIT_DR6_WRITE:
-        svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
-        break;
-
-    case VMEXIT_DR7_WRITE:
-        svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
+       
+    case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
+        svm_dr_access(v, &regs);
         break;
 
     case VMEXIT_IOIO:
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Mon Aug 28 16:26:37 2006 -0600
@@ -121,7 +121,7 @@ static int construct_vmcb_controls(struc
         GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
 
     /* read or write all debug registers 0 - 15 */
-    vmcb->dr_intercepts = 0;
+    vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
 
     /* RD/WR all control registers 0 - 15, but not read CR2 */
     vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE);
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Mon Aug 28 16:26:37 2006 -0600
@@ -35,7 +35,7 @@
 #include <xen/event.h>
 #include <xen/kernel.h>
 #include <xen/keyhandler.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
 
 static int vmcs_size;
 static int vmcs_order;
@@ -272,7 +272,7 @@ static void vmx_do_launch(struct vcpu *v
     error |= __vmwrite(GUEST_TR_BASE, 0);
     error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
 
-    shadow2_update_paging_modes(v);
+    shadow_update_paging_modes(v);
     printk("%s(): GUEST_CR3<=%08lx, HOST_CR3<=%08lx\n",
            __func__, v->arch.hvm_vcpu.hw_cr3, v->arch.cr3);
     __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Mon Aug 28 16:26:37 2006 -0600
@@ -40,7 +40,7 @@
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/hvm/vmx/cpu.h>
-#include <asm/shadow2.h>
+#include <asm/shadow.h>
 #include <public/sched.h>
 #include <public/hvm/ioreq.h>
 #include <asm/hvm/vpic.h>
@@ -66,10 +66,10 @@ static int vmx_initialize_guest_resource
     if ( v->vcpu_id != 0 )
         return 1;
 
-    if ( !shadow2_mode_external(d) )
+    if ( !shadow_mode_external(d) )
     {
         DPRINTK("Can't init HVM for dom %u vcpu %u: "
-                "not in shadow2 external mode\n", 
+                "not in shadow external mode\n", 
                 d->domain_id, v->vcpu_id);
         domain_crash(d);
     }
@@ -865,7 +865,7 @@ static int vmx_do_page_fault(unsigned lo
     }
 #endif
 
-    result = shadow2_fault(va, regs);
+    result = shadow_fault(va, regs);
 
     TRACE_VMEXIT (2,result);
 #if 0
@@ -1039,7 +1039,7 @@ static void vmx_vmexit_do_invlpg(unsigne
      * We do the safest things first, then try to update the shadow
      * copying from guest
      */
-    shadow2_invlpg(v, va);
+    shadow_invlpg(v, va);
 }
 
 
@@ -1301,7 +1301,7 @@ vmx_world_restore(struct vcpu *v, struct
 
  skip_cr3:
 
-    shadow2_update_paging_modes(v);
+    shadow_update_paging_modes(v);
     if (!vmx_paging_enabled(v))
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
     else
@@ -1504,7 +1504,7 @@ static int vmx_set_cr0(unsigned long val
         v->arch.guest_table = pagetable_from_pfn(mfn);
         if (old_base_mfn)
             put_page(mfn_to_page(old_base_mfn));
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
 
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
                     (unsigned long) (mfn << PAGE_SHIFT));
@@ -1577,7 +1577,7 @@ static int vmx_set_cr0(unsigned long val
     else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
     {
         __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
-        shadow2_update_paging_modes(v);
+        shadow_update_paging_modes(v);
     }
 
     return 1;
@@ -1662,7 +1662,7 @@ static int mov_to_cr(int gp, int cr, str
             mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
             if (mfn != pagetable_get_pfn(v->arch.guest_table))
                 __hvm_bug(regs);
-            shadow2_update_cr3(v);
+            shadow_update_cr3(v);
         } else {
             /*
              * If different, make a shadow. Check if the PDBR is valid
@@ -1755,7 +1755,7 @@ static int mov_to_cr(int gp, int cr, str
          * all TLB entries except global entries.
          */
         if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
-            shadow2_update_paging_modes(v);
+            shadow_update_paging_modes(v);
         break;
     }
     default:
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/mm.c Mon Aug 28 16:26:37 2006 -0600
@@ -454,12 +454,12 @@ int map_ldt_shadow_page(unsigned int off
 
     res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
 
-    if ( !res && unlikely(shadow2_mode_refcounts(d)) )
-    {
-        shadow2_lock(d);
-        shadow2_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
+    if ( !res && unlikely(shadow_mode_refcounts(d)) )
+    {
+        shadow_lock(d);
+        shadow_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0);
         res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
-        shadow2_unlock(d);
+        shadow_unlock(d);
     }
 
     if ( unlikely(!res) )
@@ -527,7 +527,7 @@ get_linear_pagetable(
     struct page_info *page;
     unsigned long pfn;
 
-    ASSERT( !shadow2_mode_refcounts(d) );
+    ASSERT( !shadow_mode_refcounts(d) );
 
     if ( (root_get_flags(re) & _PAGE_RW) )
     {
@@ -602,12 +602,12 @@ get_page_from_l1e(
         d = dom_io;
     }
 
-    /* Foreign mappings into guests in shadow2 external mode don't
+    /* Foreign mappings into guests in shadow external mode don't
      * contribute to writeable mapping refcounts.  (This allows the
      * qemu-dm helper process in dom0 to map the domain's memory without
      * messing up the count of "real" writable mappings.) */
     okay = (((l1e_get_flags(l1e) & _PAGE_RW) && 
-             !(unlikely(shadow2_mode_external(d) && (d != current->domain))))
+             !(unlikely(shadow_mode_external(d) && (d != current->domain))))
             ? get_page_and_type(page, d, PGT_writable_page)
             : get_page(page, d));
     if ( !okay )
@@ -771,9 +771,9 @@ void put_page_from_l1e(l1_pgentry_t l1e,
     }
 
     /* Remember we didn't take a type-count of foreign writable mappings
-     * to shadow2 external domains */
+     * to shadow external domains */
     if ( (l1e_get_flags(l1e) & _PAGE_RW) && 
-         !(unlikely((e != d) && shadow2_mode_external(e))) )
+         !(unlikely((e != d) && shadow_mode_external(e))) )
     {
         put_page_and_type(page);
     }
@@ -830,7 +830,7 @@ static int alloc_l1_table(struct page_in
     l1_pgentry_t  *pl1e;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
 
     pl1e = map_domain_page(pfn);
 
@@ -883,7 +883,7 @@ static int create_pae_xen_mappings(l3_pg
      *     a. alloc_l3_table() calls this function and this check will fail
      *     b. mod_l3_entry() disallows updates to slot 3 in an existing table
      *
-     * XXX -- this needs revisiting for shadow2_mode_refcount()==true...
+     * XXX -- this needs revisiting for shadow_mode_refcount()==true...
      */
     page = l3e_get_page(l3e3);
     BUG_ON(page->u.inuse.type_info & PGT_pinned);
@@ -1007,7 +1007,7 @@ static int alloc_l2_table(struct page_in
     l2_pgentry_t  *pl2e;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
     
     pl2e = map_domain_page(pfn);
 
@@ -1059,7 +1059,7 @@ static int alloc_l3_table(struct page_in
     l3_pgentry_t  *pl3e;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
 
 #ifdef CONFIG_X86_PAE
     /*
@@ -1120,7 +1120,7 @@ static int alloc_l4_table(struct page_in
     unsigned long vaddr;
     int            i;
 
-    ASSERT(!shadow2_mode_refcounts(d));
+    ASSERT(!shadow_mode_refcounts(d));
 
     for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
     {
@@ -1234,8 +1234,8 @@ static inline int update_l1e(l1_pgentry_
                              struct vcpu *v)
 {
     int rv = 1;
-    if ( unlikely(shadow2_mode_enabled(v->domain)) )
-        shadow2_lock(v->domain);
+    if ( unlikely(shadow_mode_enabled(v->domain)) )
+        shadow_lock(v->domain);
 #ifndef PTE_UPDATE_WITH_CMPXCHG
     rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e)));
 #else
@@ -1266,10 +1266,10 @@ static inline int update_l1e(l1_pgentry_
         }
     }
 #endif
-    if ( unlikely(shadow2_mode_enabled(v->domain)) )
-    {
-        shadow2_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
-        shadow2_unlock(v->domain);    
+    if ( unlikely(shadow_mode_enabled(v->domain)) )
+    {
+        shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e);
+        shadow_unlock(v->domain);    
     }
     return rv;
 }
@@ -1339,13 +1339,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
 #endif
 #define UPDATE_ENTRY(_t,_p,_o,_n,_m)  ({                            \
     int rv;                                                         \
-    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
-        shadow2_lock(current->domain);                              \
+    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
+        shadow_lock(current->domain);                              \
     rv = _UPDATE_ENTRY(_t, _p, _o, _n);                             \
-    if ( unlikely(shadow2_mode_enabled(current->domain)) )          \
+    if ( unlikely(shadow_mode_enabled(current->domain)) )          \
     {                                                               \
-        shadow2_validate_guest_entry(current, _mfn(_m), (_p));      \
-        shadow2_unlock(current->domain);                            \
+        shadow_validate_guest_entry(current, _mfn(_m), (_p));      \
+        shadow_unlock(current->domain);                            \
     }                                                               \
     rv;                                                             \
 })
@@ -1581,21 +1581,21 @@ void free_page_type(struct page_info *pa
          */
         this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
 
-        if ( unlikely(shadow2_mode_enabled(owner)
-                 && !shadow2_lock_is_acquired(owner)) )
+        if ( unlikely(shadow_mode_enabled(owner)
+                 && !shadow_lock_is_acquired(owner)) )
         {
             /* Raw page tables are rewritten during save/restore. */
-            if ( !shadow2_mode_translate(owner) )
+            if ( !shadow_mode_translate(owner) )
                 mark_dirty(owner, page_to_mfn(page));
 
-            if ( shadow2_mode_refcounts(owner) )
+            if ( shadow_mode_refcounts(owner) )
                 return;
 
             gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
             ASSERT(VALID_M2P(gmfn));
-            shadow2_lock(owner);
-            shadow2_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
-            shadow2_unlock(owner);
+            shadow_lock(owner);
+            shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
+            shadow_unlock(owner);
         }
     }
 
@@ -1760,7 +1760,7 @@ int get_page_type(struct page_info *page
 #endif
                     /* Fixme: add code to propagate va_unknown to subtables. */
                     if ( ((type & PGT_type_mask) >= PGT_l2_page_table) &&
-                         !shadow2_mode_refcounts(page_get_owner(page)) )
+                         !shadow_mode_refcounts(page_get_owner(page)) )
                         return 0;
                     /* This table is possibly mapped at multiple locations. */
                     nx &= ~PGT_va_mask;
@@ -1810,7 +1810,7 @@ int new_guest_cr3(unsigned long mfn)
     if ( hvm_guest(v) && !hvm_paging_enabled(v) )
         domain_crash_synchronous();
 
-    if ( shadow2_mode_refcounts(d) )
+    if ( shadow_mode_refcounts(d) )
     {
         okay = get_page_from_pagenr(mfn, d);
         if ( unlikely(!okay) )
@@ -1858,7 +1858,7 @@ int new_guest_cr3(unsigned long mfn)
 
     if ( likely(old_base_mfn != 0) )
     {
-        if ( shadow2_mode_refcounts(d) )
+        if ( shadow_mode_refcounts(d) )
             put_page(mfn_to_page(old_base_mfn));
         else
             put_page_and_type(mfn_to_page(old_base_mfn));
@@ -2043,7 +2043,7 @@ int do_mmuext_op(
             type = PGT_root_page_table;
 
         pin_page:
-            if ( shadow2_mode_refcounts(FOREIGNDOM) )
+            if ( shadow_mode_refcounts(FOREIGNDOM) )
                 break;
 
             okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
@@ -2065,7 +2065,7 @@ int do_mmuext_op(
             break;
 
         case MMUEXT_UNPIN_TABLE:
-            if ( shadow2_mode_refcounts(d) )
+            if ( shadow_mode_refcounts(d) )
                 break;
 
             if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
@@ -2078,11 +2078,11 @@ int do_mmuext_op(
             {
                 put_page_and_type(page);
                 put_page(page);
-                if ( shadow2_mode_enabled(d) )
+                if ( shadow_mode_enabled(d) )
                 {
-                    shadow2_lock(d);
-                    shadow2_remove_all_shadows(v, _mfn(mfn));
-                    shadow2_unlock(d);
+                    shadow_lock(d);
+                    shadow_remove_all_shadows(v, _mfn(mfn));
+                    shadow_unlock(d);
                 }
             }
             else
@@ -2125,8 +2125,8 @@ int do_mmuext_op(
             break;
     
         case MMUEXT_INVLPG_LOCAL:
-            if ( !shadow2_mode_enabled(d) 
-                 || shadow2_invlpg(v, op.arg1.linear_addr) != 0 )
+            if ( !shadow_mode_enabled(d) 
+                 || shadow_invlpg(v, op.arg1.linear_addr) != 0 )
                 local_flush_tlb_one(op.arg1.linear_addr);
             break;
 
@@ -2173,7 +2173,7 @@ int do_mmuext_op(
             unsigned long ptr  = op.arg1.linear_addr;
             unsigned long ents = op.arg2.nr_ents;
 
-            if ( shadow2_mode_external(d) )
+            if ( shadow_mode_external(d) )
             {
                 MEM_LOG("ignoring SET_LDT hypercall from external "
                         "domain %u", d->domain_id);
@@ -2319,7 +2319,7 @@ int do_mmu_update(
             case PGT_l3_page_table:
             case PGT_l4_page_table:
             {
-                if ( shadow2_mode_refcounts(d) )
+                if ( shadow_mode_refcounts(d) )
                 {
                     DPRINTK("mmu update on shadow-refcounted domain!");
                     break;
@@ -2372,16 +2372,16 @@ int do_mmu_update(
                 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
                     break;
 
-                if ( unlikely(shadow2_mode_enabled(d)) )
-                    shadow2_lock(d);
+                if ( unlikely(shadow_mode_enabled(d)) )
+                    shadow_lock(d);
 
                 *(intpte_t *)va = req.val;
                 okay = 1;
 
-                if ( unlikely(shadow2_mode_enabled(d)) )
+                if ( unlikely(shadow_mode_enabled(d)) )
                 {
-                    shadow2_validate_guest_entry(v, _mfn(mfn), va);
-                    shadow2_unlock(d);
+                    shadow_validate_guest_entry(v, _mfn(mfn), va);
+                    shadow_unlock(d);
                 }
 
                 put_page_type(page);
@@ -2405,8 +2405,8 @@ int do_mmu_update(
                 break;
             }
 
-            if ( shadow2_mode_translate(FOREIGNDOM) )
-                shadow2_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn);
+            if ( shadow_mode_translate(FOREIGNDOM) )
+                shadow_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn);
             else 
                 set_gpfn_from_mfn(mfn, gpfn);
             okay = 1;
@@ -2492,7 +2492,7 @@ static int create_grant_pte_mapping(
         goto failed;
     } 
 
-    if ( !shadow2_mode_refcounts(d) )
+    if ( !shadow_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
     put_page_type(page);
@@ -2590,7 +2590,7 @@ static int create_grant_va_mapping(
                     l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
         return GNTST_general_error;
 
-    if ( !shadow2_mode_refcounts(d) )
+    if ( !shadow_mode_refcounts(d) )
         put_page_from_l1e(ol1e, d);
 
     return GNTST_okay;
@@ -2714,10 +2714,10 @@ int do_update_va_mapping(unsigned long v
 
     perfc_incrc(calls_to_update_va);
 
-    if ( unlikely(!__addr_ok(va) && !shadow2_mode_external(d)) )
+    if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
         return -EINVAL;
 
-    if ( unlikely(shadow2_mode_refcounts(d)) )
+    if ( unlikely(shadow_mode_refcounts(d)) )
     {
         DPRINTK("Grant op on a shadow-refcounted domain\n");
         return -EINVAL; 
@@ -2725,11 +2725,11 @@ int do_update_va_mapping(unsigned long v
 
     LOCK_BIGLOCK(d);
 
-    if ( likely(rc == 0) && unlikely(shadow2_mode_enabled(d)) )
+    if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
     {
         if ( unlikely(this_cpu(percpu_mm_info).foreign &&
-                      (shadow2_mode_translate(d) ||
-                       shadow2_mode_translate(
+                      (shadow_mode_translate(d) ||
+                       shadow_mode_translate(
                            this_cpu(percpu_mm_info).foreign))) )
         {
             /*
@@ -2770,8 +2770,8 @@ int do_update_va_mapping(unsigned long v
         switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
         {
         case UVMF_LOCAL:
-            if ( !shadow2_mode_enabled(d) 
-                 || (shadow2_invlpg(current, va) != 0) ) 
+            if ( !shadow_mode_enabled(d) 
+                 || (shadow_invlpg(current, va) != 0) ) 
                 local_flush_tlb_one(va);
             break;
         case UVMF_ALL:
@@ -3006,7 +3006,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
             break;
         }
 
-        if ( !shadow2_mode_translate(d) || (mfn == 0) )
+        if ( !shadow_mode_translate(d) || (mfn == 0) )
         {
             put_domain(d);
             return -EINVAL;
@@ -3196,21 +3196,21 @@ static int ptwr_emulated_update(
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
     if ( do_cmpxchg )
     {
-        if ( shadow2_mode_enabled(d) )
-            shadow2_lock(d);
+        if ( shadow_mode_enabled(d) )
+            shadow_lock(d);
         ol1e = l1e_from_intpte(old);
         if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
         {
-            if ( shadow2_mode_enabled(d) )
-                shadow2_unlock(d);
+            if ( shadow_mode_enabled(d) )
+                shadow_unlock(d);
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
             return X86EMUL_CMPXCHG_FAILED;
         }
-        if ( unlikely(shadow2_mode_enabled(v->domain)) )
-        {
-            shadow2_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
-            shadow2_unlock(v->domain);    
+        if ( unlikely(shadow_mode_enabled(v->domain)) )
+        {
+            shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e);
+            shadow_unlock(v->domain);    
         }
     }
     else
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/arch/x86/traps.c      Mon Aug 28 16:26:37 2006 -0600
@@ -870,8 +870,8 @@ static int fixup_page_fault(unsigned lon
 
     if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
     {
-        if ( shadow2_mode_external(d) && guest_mode(regs) )
-            return shadow2_fault(addr, regs);
+        if ( shadow_mode_external(d) && guest_mode(regs) )
+            return shadow_fault(addr, regs);
         if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
             return handle_gdt_ldt_mapping_fault(
                 addr - GDT_LDT_VIRT_START, regs);
@@ -890,8 +890,8 @@ static int fixup_page_fault(unsigned lon
          ptwr_do_page_fault(d, addr, regs) )
         return EXCRET_fault_fixed;
 
-    if ( shadow2_mode_enabled(d) )
-        return shadow2_fault(addr, regs);
+    if ( shadow_mode_enabled(d) )
+        return shadow_fault(addr, regs);
 
     return 0;
 }
diff -r 896fcdd49c7f -r 684fdcfb251a xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/include/asm-x86/domain.h      Mon Aug 28 16:26:37 2006 -0600
@@ -59,10 +59,10 @@ extern void hypercall_page_initialise(st
 
 struct shadow_domain {
     u32               mode;  /* flags to control shadow operation */
-    spinlock_t        lock;  /* shadow2 domain lock */
+    spinlock_t        lock;  /* shadow domain lock */
     int               locker; /* processor which holds the lock */
     const char       *locker_function; /* Func that took it */
-    struct list_head  freelists[SHADOW2_MAX_ORDER + 1]; 
+    struct list_head  freelists[SHADOW_MAX_ORDER + 1]; 
     struct list_head  p2m_freelist;
     struct list_head  p2m_inuse;
     struct list_head  toplevel_shadows;
@@ -70,10 +70,10 @@ struct shadow_domain {
     unsigned int      free_pages;   /* number of pages on freelists */
     unsigned int      p2m_pages;    /* number of pages in p2m map */
 
-    /* Shadow2 hashtable */
-    struct shadow2_hash_entry *hash_table;
-    struct shadow2_hash_entry *hash_freelist;
-    struct shadow2_hash_entry *hash_allocations;
+    /* Shadow hashtable */
+    struct shadow_hash_entry *hash_table;
+    struct shadow_hash_entry *hash_freelist;
+    struct shadow_hash_entry *hash_allocations;
     int hash_walking;  /* Some function is walking the hash table */
 
     /* Shadow log-dirty bitmap */
@@ -107,7 +107,7 @@ struct arch_domain
     /* Shadow-translated guest: Pseudophys base address of reserved area. */
     unsigned long first_reserved_pfn;
 
-    struct shadow_domain shadow2;
+    struct shadow_domain shadow;
 
     /* Shadow translated domain: P2M mapping */
     pagetable_t phys_table;
@@ -135,7 +135,7 @@ struct pae_l3_cache { };
 
 struct shadow_vcpu {
     /* Pointers to mode-specific entry points. */
-    struct shadow2_paging_mode *mode;
+    struct shadow_paging_mode *mode;
     /* Last MFN that we emulated a write to. */
     unsigned long last_emulated_mfn;
     /* HVM guest: paging enabled (CR0.PG)?  */
@@ -201,7 +201,7 @@ struct arch_vcpu
     /* Current LDT details. */
     unsigned long shadow_ldt_mapcnt;
 
-    struct shadow_vcpu shadow2;
+    struct shadow_vcpu shadow;
 } __cacheline_aligned;
 
 /* shorthands to improve code legibility */
diff -r 896fcdd49c7f -r 684fdcfb251a xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h        Mon Aug 28 16:26:37 2006 -0600
@@ -113,6 +113,51 @@ enum CRInterceptBits
     CR_INTERCEPT_CR14_WRITE = 1 << 30,
     CR_INTERCEPT_CR15_WRITE = 1 << 31,
 };
+
+
+/* debug register intercepts */
+enum DRInterceptBits
+{
+    DR_INTERCEPT_DR0_READ   = 1 << 0,
+    DR_INTERCEPT_DR1_READ   = 1 << 1,
+    DR_INTERCEPT_DR2_READ   = 1 << 2,
+    DR_INTERCEPT_DR3_READ   = 1 << 3,
+    DR_INTERCEPT_DR4_READ   = 1 << 4,
+    DR_INTERCEPT_DR5_READ   = 1 << 5,
+    DR_INTERCEPT_DR6_READ   = 1 << 6,
+    DR_INTERCEPT_DR7_READ   = 1 << 7,
+    DR_INTERCEPT_DR8_READ   = 1 << 8,
+    DR_INTERCEPT_DR9_READ   = 1 << 9,
+    DR_INTERCEPT_DR10_READ  = 1 << 10,
+    DR_INTERCEPT_DR11_READ  = 1 << 11,
+    DR_INTERCEPT_DR12_READ  = 1 << 12,
+    DR_INTERCEPT_DR13_READ  = 1 << 13,
+    DR_INTERCEPT_DR14_READ  = 1 << 14,
+    DR_INTERCEPT_DR15_READ  = 1 << 15,
+    DR_INTERCEPT_DR0_WRITE  = 1 << 16,
+    DR_INTERCEPT_DR1_WRITE  = 1 << 17,
+    DR_INTERCEPT_DR2_WRITE  = 1 << 18,
+    DR_INTERCEPT_DR3_WRITE  = 1 << 19,
+    DR_INTERCEPT_DR4_WRITE  = 1 << 20,
+    DR_INTERCEPT_DR5_WRITE  = 1 << 21,
+    DR_INTERCEPT_DR6_WRITE  = 1 << 22,
+    DR_INTERCEPT_DR7_WRITE  = 1 << 23,
+    DR_INTERCEPT_DR8_WRITE  = 1 << 24,
+    DR_INTERCEPT_DR9_WRITE  = 1 << 25,
+    DR_INTERCEPT_DR10_WRITE = 1 << 26,
+    DR_INTERCEPT_DR11_WRITE = 1 << 27,
+    DR_INTERCEPT_DR12_WRITE = 1 << 28,
+    DR_INTERCEPT_DR13_WRITE = 1 << 29,
+    DR_INTERCEPT_DR14_WRITE = 1 << 30,
+    DR_INTERCEPT_DR15_WRITE = 1 << 31,
+};
+
+/* for lazy save/restore we'd like to intercept all DR writes */
+#define DR_INTERCEPT_ALL_WRITES \
+    (DR_INTERCEPT_DR0_WRITE|DR_INTERCEPT_DR1_WRITE|DR_INTERCEPT_DR2_WRITE \
+    |DR_INTERCEPT_DR3_WRITE|DR_INTERCEPT_DR4_WRITE|DR_INTERCEPT_DR5_WRITE \
+    |DR_INTERCEPT_DR6_WRITE|DR_INTERCEPT_DR7_WRITE) 
+
 
 enum VMEXIT_EXITCODE
 {
diff -r 896fcdd49c7f -r 684fdcfb251a xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/include/asm-x86/mm.h  Mon Aug 28 16:26:37 2006 -0600
@@ -22,7 +22,7 @@ struct page_info
     /* Each frame can be threaded onto a doubly-linked list. */
     union {
         struct list_head list;
-        /* Shadow2 uses this field as an up-pointer in lower-level shadows */
+        /* Shadow uses this field as an up-pointer in lower-level shadows */
         paddr_t up;
     };
 
@@ -59,7 +59,7 @@ struct page_info
         /* Only used on guest pages with a shadow.
          * Guest pages with a shadow must have a non-zero type count, so this
          * does not conflict with the tlbflush timestamp. */
-        u32 shadow2_flags;
+        u32 shadow_flags;
 
         // XXX -- we expect to add another field here, to be used for min/max
         // purposes, which is only used for shadow pages.
@@ -76,7 +76,7 @@ struct page_info
 #define PGT_ldt_page        (6U<<29) /* using this page in an LDT? */
 #define PGT_writable_page   (7U<<29) /* has writable mappings of this page? */
 
-#ifndef SHADOW2
+#ifndef SHADOW
 #define PGT_l1_shadow       PGT_l1_page_table
 #define PGT_l2_shadow       PGT_l2_page_table
 #define PGT_l3_shadow       PGT_l3_page_table
@@ -117,7 +117,7 @@ struct page_info
  /* 16-bit count of uses of this frame as its current type. */
 #define PGT_count_mask      ((1U<<16)-1)
 
-#ifndef SHADOW2
+#ifndef SHADOW
 #ifdef __x86_64__
 #define PGT_high_mfn_shift  52
 #define PGT_high_mfn_mask   (0xfffUL << PGT_high_mfn_shift)
@@ -132,7 +132,7 @@ struct page_info
 #define PGT_score_shift     23
 #define PGT_score_mask      (((1U<<4)-1)<<PGT_score_shift)
 #endif
-#endif /* SHADOW2 */
+#endif /* SHADOW */
 
  /* Cleared when the owning guest 'frees' this page. */
 #define _PGC_allocated      31
@@ -146,38 +146,38 @@ struct page_info
  /* 29-bit count of references to this frame. */
 #define PGC_count_mask      ((1U<<29)-1)
 
-/* shadow2 uses the count_info on shadow pages somewhat differently */
-/* NB: please coordinate any changes here with the SH2F's in shadow2.h */
-#define PGC_SH2_none           (0U<<28) /* on the shadow2 free list */
-#define PGC_SH2_min_shadow     (1U<<28)
-#define PGC_SH2_l1_32_shadow   (1U<<28) /* shadowing a 32-bit L1 guest page */
-#define PGC_SH2_fl1_32_shadow  (2U<<28) /* L1 shadow for a 32b 4M superpage */
-#define PGC_SH2_l2_32_shadow   (3U<<28) /* shadowing a 32-bit L2 guest page */
-#define PGC_SH2_l1_pae_shadow  (4U<<28) /* shadowing a pae L1 page */
-#define PGC_SH2_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
-#define PGC_SH2_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
-#define PGC_SH2_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
-#define PGC_SH2_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
-#define PGC_SH2_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
-#define PGC_SH2_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
-#define PGC_SH2_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
-#define PGC_SH2_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
-#define PGC_SH2_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
-#define PGC_SH2_max_shadow    (13U<<28)
-#define PGC_SH2_p2m_table     (14U<<28) /* in use as the p2m table */
-#define PGC_SH2_monitor_table (15U<<28) /* in use as a monitor table */
-#define PGC_SH2_unused        (15U<<28)
-
-#define PGC_SH2_type_mask     (15U<<28)
-#define PGC_SH2_type_shift          28
-
-#define PGC_SH2_pinned         (1U<<27)
-
-#define _PGC_SH2_log_dirty          26
-#define PGC_SH2_log_dirty      (1U<<26)
+/* shadow uses the count_info on shadow pages somewhat differently */
+/* NB: please coordinate any changes here with the SHF's in shadow.h */
+#define PGC_SH_none           (0U<<28) /* on the shadow free list */
+#define PGC_SH_min_shadow     (1U<<28)
+#define PGC_SH_l1_32_shadow   (1U<<28) /* shadowing a 32-bit L1 guest page */
+#define PGC_SH_fl1_32_shadow  (2U<<28) /* L1 shadow for a 32b 4M superpage */
+#define PGC_SH_l2_32_shadow   (3U<<28) /* shadowing a 32-bit L2 guest page */
+#define PGC_SH_l1_pae_shadow  (4U<<28) /* shadowing a pae L1 page */
+#define PGC_SH_fl1_pae_shadow (5U<<28) /* L1 shadow for pae 2M superpg */
+#define PGC_SH_l2_pae_shadow  (6U<<28) /* shadowing a pae L2-low page */
+#define PGC_SH_l2h_pae_shadow (7U<<28) /* shadowing a pae L2-high page */
+#define PGC_SH_l3_pae_shadow  (8U<<28) /* shadowing a pae L3 page */
+#define PGC_SH_l1_64_shadow   (9U<<28) /* shadowing a 64-bit L1 page */
+#define PGC_SH_fl1_64_shadow (10U<<28) /* L1 shadow for 64-bit 2M superpg */
+#define PGC_SH_l2_64_shadow  (11U<<28) /* shadowing a 64-bit L2 page */
+#define PGC_SH_l3_64_shadow  (12U<<28) /* shadowing a 64-bit L3 page */
+#define PGC_SH_l4_64_shadow  (13U<<28) /* shadowing a 64-bit L4 page */
+#define PGC_SH_max_shadow    (13U<<28)
+#define PGC_SH_p2m_table     (14U<<28) /* in use as the p2m table */
+#define PGC_SH_monitor_table (15U<<28) /* in use as a monitor table */
+#define PGC_SH_unused        (15U<<28)
+
+#define PGC_SH_type_mask     (15U<<28)
+#define PGC_SH_type_shift          28
+
+#define PGC_SH_pinned         (1U<<27)
+
+#define _PGC_SH_log_dirty          26
+#define PGC_SH_log_dirty      (1U<<26)
 
 /* 26 bit ref count for shadow pages */
-#define PGC_SH2_count_mask    ((1U<<26) - 1)
+#define PGC_SH_count_mask    ((1U<<26) - 1)
 
 /* We trust the slab allocator in slab.c, and our use of it. */
 #define PageSlab(page)     (1)
@@ -201,9 +201,9 @@ static inline u32 pickle_domptr(struct d
 
 /* The order of the largest allocation unit we use for shadow pages */
 #if CONFIG_PAGING_LEVELS == 2
-#define SHADOW2_MAX_ORDER 0 /* Only ever need 4k allocations */
+#define SHADOW_MAX_ORDER 0 /* Only ever need 4k allocations */
 #else  
-#define SHADOW2_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
+#define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
 #endif
 
 #define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
@@ -227,7 +227,7 @@ extern int shadow_remove_all_write_acces
 extern int shadow_remove_all_write_access(
     struct domain *d, unsigned long gmfn, unsigned long mfn);
 extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
-extern int _shadow2_mode_refcounts(struct domain *d);
+extern int _shadow_mode_refcounts(struct domain *d);
 
 static inline void put_page(struct page_info *page)
 {
@@ -259,7 +259,7 @@ static inline int get_page(struct page_i
              unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
              unlikely(d != _domain) )                /* Wrong owner? */
         {
-            if ( !_shadow2_mode_refcounts(domain) )
+            if ( !_shadow_mode_refcounts(domain) )
                 DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" 
                         PRtype_info "\n",
                         page_to_mfn(page), domain, unpickle_domptr(d),
@@ -345,11 +345,11 @@ int check_descriptor(struct desc_struct 
 
 
 #define mfn_to_gmfn(_d, mfn)                            \
-    ( (shadow2_mode_translate(_d))                      \
+    ( (shadow_mode_translate(_d))                      \
       ? get_gpfn_from_mfn(mfn)                          \
       : (mfn) )
 
-#define gmfn_to_mfn(_d, gpfn)  mfn_x(sh2_gfn_to_mfn(_d, gpfn))
+#define gmfn_to_mfn(_d, gpfn)  mfn_x(sh_gfn_to_mfn(_d, gpfn))
 
 
 /*
diff -r 896fcdd49c7f -r 684fdcfb251a xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/include/asm-x86/perfc_defn.h  Mon Aug 28 16:26:37 2006 -0600
@@ -30,59 +30,59 @@ PERFCOUNTER_CPU(exception_fixed,        
 PERFCOUNTER_CPU(exception_fixed,        "pre-exception fixed")
 
 
-/* Shadow2 counters */
-PERFCOUNTER_CPU(shadow2_alloc,          "calls to shadow2_alloc")
-PERFCOUNTER_CPU(shadow2_alloc_tlbflush, "shadow2_alloc flushed TLBs")
+/* Shadow counters */
+PERFCOUNTER_CPU(shadow_alloc,          "calls to shadow_alloc")
+PERFCOUNTER_CPU(shadow_alloc_tlbflush, "shadow_alloc flushed TLBs")
 
 /* STATUS counters do not reset when 'P' is hit */
-PERFSTATUS(shadow2_alloc_count,         "number of shadow pages in use")
-PERFCOUNTER_CPU(shadow2_free,           "calls to shadow2_free")
-PERFCOUNTER_CPU(shadow2_prealloc_1,     "shadow2 recycles old shadows")
-PERFCOUNTER_CPU(shadow2_prealloc_2,     "shadow2 recycles in-use shadows")
-PERFCOUNTER_CPU(shadow2_linear_map_failed, "shadow2 hit read-only linear map")
-PERFCOUNTER_CPU(shadow2_a_update,       "shadow2 A bit update")
-PERFCOUNTER_CPU(shadow2_ad_update,      "shadow2 A&D bit update")
-PERFCOUNTER_CPU(shadow2_fault,          "calls to shadow2_fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_bad_gfn, "shadow2_fault guest bad gfn")
-PERFCOUNTER_CPU(shadow2_fault_bail_not_present, 
-                                        "shadow2_fault guest not-present")
-PERFCOUNTER_CPU(shadow2_fault_bail_nx,  "shadow2_fault guest NX fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_ro_mapping, "shadow2_fault guest R/W fault")
-PERFCOUNTER_CPU(shadow2_fault_bail_user_supervisor, 
-                                        "shadow2_fault guest U/S fault")
-PERFCOUNTER_CPU(shadow2_fault_emulate_read, "shadow2_fault emulates a read")
-PERFCOUNTER_CPU(shadow2_fault_emulate_write, "shadow2_fault emulates a write")
-PERFCOUNTER_CPU(shadow2_fault_emulate_failed, "shadow2_fault emulator fails")
-PERFCOUNTER_CPU(shadow2_fault_mmio,     "shadow2_fault handled as mmio")
-PERFCOUNTER_CPU(shadow2_fault_fixed,    "shadow2_fault fixed fault")
-PERFCOUNTER_CPU(shadow2_ptwr_emulate,   "shadow2 causes ptwr to emulate")
-PERFCOUNTER_CPU(shadow2_validate_gl1e_calls, "calls to shadow2_validate_gl1e")
-PERFCOUNTER_CPU(shadow2_validate_gl2e_calls, "calls to shadow2_validate_gl2e")
-PERFCOUNTER_CPU(shadow2_validate_gl3e_calls, "calls to shadow2_validate_gl3e")
-PERFCOUNTER_CPU(shadow2_validate_gl4e_calls, "calls to shadow2_validate_gl4e")
-PERFCOUNTER_CPU(shadow2_hash_lookups,   "calls to shadow2_hash_lookup")
-PERFCOUNTER_CPU(shadow2_hash_lookup_head, "shadow2 hash hit in bucket head")
-PERFCOUNTER_CPU(shadow2_hash_lookup_miss, "shadow2 hash misses")
-PERFCOUNTER_CPU(shadow2_get_shadow_status, "calls to get_shadow_status")
-PERFCOUNTER_CPU(shadow2_hash_inserts,   "calls to shadow2_hash_insert")
-PERFCOUNTER_CPU(shadow2_hash_deletes,   "calls to shadow2_hash_delete")
-PERFCOUNTER_CPU(shadow2_writeable,      "shadow2 removes write access")
-PERFCOUNTER_CPU(shadow2_writeable_h_1,  "shadow2 writeable: 32b w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_2,  "shadow2 writeable: 32pae w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_3,  "shadow2 writeable: 64b w2k3")
-PERFCOUNTER_CPU(shadow2_writeable_h_4,  "shadow2 writeable: 32b linux low")
-PERFCOUNTER_CPU(shadow2_writeable_bf,   "shadow2 writeable brute-force")
-PERFCOUNTER_CPU(shadow2_mappings,       "shadow2 removes all mappings")
-PERFCOUNTER_CPU(shadow2_mappings_bf,    "shadow2 rm-mappings brute-force")
-PERFCOUNTER_CPU(shadow2_early_unshadow, "shadow2 unshadows for fork/exit")
-PERFCOUNTER_CPU(shadow2_early_unshadow_top, "shadow2 unhooks for fork/exit")
-PERFCOUNTER_CPU(shadow2_unshadow,       "shadow2 unshadows a page")
-PERFCOUNTER_CPU(shadow2_up_pointer,     "shadow2 unshadow by up-pointer")
-PERFCOUNTER_CPU(shadow2_unshadow_bf,    "shadow2 unshadow brute-force")
-PERFCOUNTER_CPU(shadow2_get_page_fail,  "shadow2_get_page_from_l1e failed")
-PERFCOUNTER_CPU(shadow2_guest_walk,     "shadow2 walks guest tables")
-PERFCOUNTER_CPU(shadow2_walk_cache_hit, "shadow2 walk-cache hits")
-PERFCOUNTER_CPU(shadow2_walk_cache_miss, "shadow2 walk-cache misses")
+PERFSTATUS(shadow_alloc_count,         "number of shadow pages in use")
+PERFCOUNTER_CPU(shadow_free,           "calls to shadow_free")
+PERFCOUNTER_CPU(shadow_prealloc_1,     "shadow recycles old shadows")
+PERFCOUNTER_CPU(shadow_prealloc_2,     "shadow recycles in-use shadows")
+PERFCOUNTER_CPU(shadow_linear_map_failed, "shadow hit read-only linear map")
+PERFCOUNTER_CPU(shadow_a_update,       "shadow A bit update")
+PERFCOUNTER_CPU(shadow_ad_update,      "shadow A&D bit update")
+PERFCOUNTER_CPU(shadow_fault,          "calls to shadow_fault")
+PERFCOUNTER_CPU(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
+PERFCOUNTER_CPU(shadow_fault_bail_not_present, 
+                                        "shadow_fault guest not-present")
+PERFCOUNTER_CPU(shadow_fault_bail_nx,  "shadow_fault guest NX fault")
+PERFCOUNTER_CPU(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault")
+PERFCOUNTER_CPU(shadow_fault_bail_user_supervisor, 
+                                        "shadow_fault guest U/S fault")
+PERFCOUNTER_CPU(shadow_fault_emulate_read, "shadow_fault emulates a read")
+PERFCOUNTER_CPU(shadow_fault_emulate_write, "shadow_fault emulates a write")
+PERFCOUNTER_CPU(shadow_fault_emulate_failed, "shadow_fault emulator fails")
+PERFCOUNTER_CPU(shadow_fault_mmio,     "shadow_fault handled as mmio")
+PERFCOUNTER_CPU(shadow_fault_fixed,    "shadow_fault fixed fault")
+PERFCOUNTER_CPU(shadow_ptwr_emulate,   "shadow causes ptwr to emulate")
+PERFCOUNTER_CPU(shadow_validate_gl1e_calls, "calls to shadow_validate_gl1e")
+PERFCOUNTER_CPU(shadow_validate_gl2e_calls, "calls to shadow_validate_gl2e")
+PERFCOUNTER_CPU(shadow_validate_gl3e_calls, "calls to shadow_validate_gl3e")
+PERFCOUNTER_CPU(shadow_validate_gl4e_calls, "calls to shadow_validate_gl4e")
+PERFCOUNTER_CPU(shadow_hash_lookups,   "calls to shadow_hash_lookup")
+PERFCOUNTER_CPU(shadow_hash_lookup_head, "shadow hash hit in bucket head")
+PERFCOUNTER_CPU(shadow_hash_lookup_miss, "shadow hash misses")
+PERFCOUNTER_CPU(shadow_get_shadow_status, "calls to get_shadow_status")
+PERFCOUNTER_CPU(shadow_hash_inserts,   "calls to shadow_hash_insert")
+PERFCOUNTER_CPU(shadow_hash_deletes,   "calls to shadow_hash_delete")
+PERFCOUNTER_CPU(shadow_writeable,      "shadow removes write access")
+PERFCOUNTER_CPU(shadow_writeable_h_1,  "shadow writeable: 32b w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_2,  "shadow writeable: 32pae w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_3,  "shadow writeable: 64b w2k3")
+PERFCOUNTER_CPU(shadow_writeable_h_4,  "shadow writeable: 32b linux low")
+PERFCOUNTER_CPU(shadow_writeable_bf,   "shadow writeable brute-force")
+PERFCOUNTER_CPU(shadow_mappings,       "shadow removes all mappings")
+PERFCOUNTER_CPU(shadow_mappings_bf,    "shadow rm-mappings brute-force")
+PERFCOUNTER_CPU(shadow_early_unshadow, "shadow unshadows for fork/exit")
+PERFCOUNTER_CPU(shadow_early_unshadow_top, "shadow unhooks for fork/exit")
+PERFCOUNTER_CPU(shadow_unshadow,       "shadow unshadows a page")
+PERFCOUNTER_CPU(shadow_up_pointer,     "shadow unshadow by up-pointer")
+PERFCOUNTER_CPU(shadow_unshadow_bf,    "shadow unshadow brute-force")
+PERFCOUNTER_CPU(shadow_get_page_fail,  "shadow_get_page_from_l1e failed")
+PERFCOUNTER_CPU(shadow_guest_walk,     "shadow walks guest tables")
+PERFCOUNTER_CPU(shadow_walk_cache_hit, "shadow walk-cache hits")
+PERFCOUNTER_CPU(shadow_walk_cache_miss, "shadow walk-cache misses")
 
 
 /*#endif*/ /* __XEN_PERFC_DEFN_H__ */
diff -r 896fcdd49c7f -r 684fdcfb251a xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Mon Aug 28 16:16:07 2006 -0600
+++ b/xen/include/asm-x86/shadow.h      Mon Aug 28 16:26:37 2006 -0600
@@ -1,7 +1,9 @@
 /******************************************************************************
  * include/asm-x86/shadow.h
  * 
- * Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -21,26 +23,608 @@
 #ifndef _XEN_SHADOW_H
 #define _XEN_SHADOW_H
 
-/* This file is just a wrapper around the new Shadow2 header,
- * providing names that must be defined in any shadow implementation. */
-
-#include <asm/shadow2.h>
+#include <public/domctl.h> 
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <asm/flushtlb.h>
 
 /* How to make sure a page is not referred to in a shadow PT */
 /* This will need to be a for_each_vcpu if we go to per-vcpu shadows */ 
 #define shadow_drop_references(_d, _p)                      \
-    shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
+    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
 #define shadow_sync_and_drop_references(_d, _p)             \
-    shadow2_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
-
-/* Whether we are translating the domain's frame numbers for it */
-#define shadow_mode_translate(d)  shadow2_mode_translate(d)
-
-/* ...and  if so, how to add and remove entries in the mapping */
+    shadow_remove_all_mappings((_d)->vcpu[0], _mfn(page_to_mfn(_p)))
+
+/* How to add and remove entries in the p2m mapping. */
 #define guest_physmap_add_page(_d, _p, _m)                  \
-    shadow2_guest_physmap_add_page((_d), (_p), (_m))
+    shadow_guest_physmap_add_page((_d), (_p), (_m))
 #define guest_physmap_remove_page(_d, _p, _m   )            \
-    shadow2_guest_physmap_remove_page((_d), (_p), (_m))
+    shadow_guest_physmap_remove_page((_d), (_p), (_m))
+
+/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
+
+#define SHM2_shift 10
+/* We're in one of the shadow modes */
+#define SHM2_enable    (1U << SHM2_shift)
+/* Refcounts based on shadow tables instead of guest tables */
+#define SHM2_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << SHM2_shift)
+/* Enable log dirty mode */
+#define SHM2_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << SHM2_shift)
+/* Xen does p2m translation, not guest */
+#define SHM2_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << SHM2_shift)
+/* Xen does not steal address space from the domain for its own booking;
+ * requires VT or similar mechanisms */
+#define SHM2_external  (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift)
+
+#define shadow_mode_enabled(_d)   ((_d)->arch.shadow.mode)
+#define shadow_mode_refcounts(_d) ((_d)->arch.shadow.mode & SHM2_refcounts)
+#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow.mode & SHM2_log_dirty)
+#define shadow_mode_translate(_d) ((_d)->arch.shadow.mode & SHM2_translate)
+#define shadow_mode_external(_d)  ((_d)->arch.shadow.mode & SHM2_external)
+
+/* Xen traps & emulates all reads of all page table pages:
+ *not yet supported
+ */
+#define shadow_mode_trap_reads(_d) ({ (void)(_d); 0; })
+
+// flags used in the return value of the shadow_set_lXe() functions...
+#define SHADOW_SET_CHANGED            0x1
+#define SHADOW_SET_FLUSH              0x2
+#define SHADOW_SET_ERROR              0x4
+#define SHADOW_SET_L3PAE_RECOPY       0x8
+
+// How do we tell that we have a 32-bit PV guest in a 64-bit Xen?
+#ifdef __x86_64__
+#define pv_32bit_guest(_v) 0 // not yet supported
+#else
+#define pv_32bit_guest(_v) !hvm_guest(v)
+#endif
+
+/* The shadow lock.
+ *
+ * This lock is per-domain.  It is intended to allow us to make atomic
+ * updates to the software TLB that the shadow tables provide.
+ * 
+ * Specifically, it protects:
+ *   - all changes to shadow page table pages
+ *   - the shadow hash table
+ *   - the shadow page allocator 
+ *   - all changes to guest page table pages; if/when the notion of
+ *     out-of-sync pages is added to this code, then the shadow lock is
+ *     protecting all guest page table pages which are not listed as
+ *     currently as both guest-writable and out-of-sync...
+ *     XXX -- need to think about this relative to writable page tables.
+ *   - all changes to the page_info->tlbflush_timestamp
+ *   - the page_info->count fields on shadow pages
+ *   - the shadow dirty bit array and count
+ *   - XXX
+ */
+#ifndef CONFIG_SMP
+#error shadow.h currently requires CONFIG_SMP
+#endif
+
+#define shadow_lock_init(_d)                                   \
+    do {                                                        \
+        spin_lock_init(&(_d)->arch.shadow.lock);               \
+        (_d)->arch.shadow.locker = -1;                         \
+        (_d)->arch.shadow.locker_function = "nobody";          \
+    } while (0)
+
+#define shadow_lock_is_acquired(_d)                            \
+    (current->processor == (_d)->arch.shadow.locker)
+
+#define shadow_lock(_d)                                                 \
+    do {                                                                 \
+        if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \
+        {                                                                \
+            printk("Error: shadow lock held by %s\n",                   \
+                   (_d)->arch.shadow.locker_function);                  \
+            BUG();                                                       \
+        }                                                                \
+        spin_lock(&(_d)->arch.shadow.lock);                             \
+        ASSERT((_d)->arch.shadow.locker == -1);                         \
+        (_d)->arch.shadow.locker = current->processor;                  \
+        (_d)->arch.shadow.locker_function = __func__;                   \
+    } while (0)
+
+#define shadow_unlock(_d)                                              \
+    do {                                                                \
+        ASSERT((_d)->arch.shadow.locker == current->processor);        \
+        (_d)->arch.shadow.locker = -1;                                 \
+        (_d)->arch.shadow.locker_function = "nobody";                  \
+        spin_unlock(&(_d)->arch.shadow.lock);                          \
+    } while (0)
+
+/* 
+ * Levels of self-test and paranoia
+ * XXX should go in config files somewhere?  
+ */
+#define SHADOW_AUDIT_HASH           0x01  /* Check current hash bucket */
+#define SHADOW_AUDIT_HASH_FULL      0x02  /* Check every hash bucket */
+#define SHADOW_AUDIT_ENTRIES        0x04  /* Check this walk's shadows */
+#define SHADOW_AUDIT_ENTRIES_FULL   0x08  /* Check every shadow */
+#define SHADOW_AUDIT_ENTRIES_MFNS   0x10  /* Check gfn-mfn map in shadows */
+#define SHADOW_AUDIT_P2M            0x20  /* Check the p2m table */
+
+#ifdef NDEBUG
+#define SHADOW_AUDIT                   0
+#define SHADOW_AUDIT_ENABLE            0
+#else
+#define SHADOW_AUDIT                0x15  /* Basic audit of all except p2m. */
+#define SHADOW_AUDIT_ENABLE         shadow_audit_enable
+extern int shadow_audit_enable;
+#endif
+
+/* 
+ * Levels of optimization
+ * XXX should go in config files somewhere?  
+ */
+#define SHOPT_WRITABLE_HEURISTIC  0x01  /* Guess at RW PTEs via linear maps */
+#define SHOPT_EARLY_UNSHADOW      0x02  /* Unshadow l1s on fork or exit */
+
+#define SHADOW_OPTIMIZATIONS      0x03
+
+
+/* With shadow pagetables, the different kinds of address start 
+ * to get get confusing.
+ * 
+ * Virtual addresses are what they usually are: the addresses that are used 
+ * to accessing memory while the guest is running.  The MMU translates from 
+ * virtual addresses to machine addresses. 
+ * 
+ * (Pseudo-)physical addresses are the abstraction of physical memory the
+ * guest uses for allocation and so forth.  For the purposes of this code, 
+ * we can largely ignore them.
+ *
+ * Guest frame numbers (gfns) are the entries that the guest puts in its
+ * pagetables.  For normal paravirtual guests, they are actual frame numbers,
+ * with the translation done by the guest.  
+ * 
+ * Machine frame numbers (mfns) are the entries that the hypervisor puts
+ * in the shadow page tables.
+ *
+ * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
+ * to a "machine frame number, from the guest's perspective", or in other
+ * words, pseudo-physical frame numbers.  However, in the shadow code, the
+ * term "gmfn" means "the mfn of a guest page"; this combines naturally with
+ * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
+ * guest L2 page), etc...
+ */
+
+/* With this defined, we do some ugly things to force the compiler to
+ * give us type safety between mfns and gfns and other integers.
+ * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions 
+ * that translate beween int and foo_t.
+ * 
+ * It does have some performance cost because the types now have 
+ * a different storage attribute, so may not want it on all the time. */
+#ifndef NDEBUG
+#define TYPE_SAFETY 1
+#endif
+
+#ifdef TYPE_SAFETY
+#define TYPE_SAFE(_type,_name)                                  \
+typedef struct { _type _name; } _name##_t;                      \
+static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
+static inline _type _name##_x(_name##_t n) { return n._name; }
+#else
+#define TYPE_SAFE(_type,_name)                                          \
+typedef _type _name##_t;                                                \
+static inline _name##_t _##_name(_type n) { return n; }                 \
+static inline _type _name##_x(_name##_t n) { return n; }
+#endif
+
+TYPE_SAFE(unsigned long,mfn)
+#define SH_PRI_mfn "05lx"
+
+static inline int
+valid_mfn(mfn_t m)
+{
+    return VALID_MFN(mfn_x(m));
+}
+
+static inline mfn_t
+pagetable_get_mfn(pagetable_t pt)
+{
+    return _mfn(pagetable_get_pfn(pt));
+}
+
+static inline pagetable_t
+pagetable_from_mfn(mfn_t mfn)
+{
+    return pagetable_from_pfn(mfn_x(mfn));
+}
+
+static inline int
+shadow_vcpu_mode_translate(struct vcpu *v)
+{
+    // Returns true if this VCPU needs to be using the P2M table to translate
+    // between GFNs and MFNs.
+    //
+    // This is true of translated HVM domains on a vcpu which has paging
+    // enabled.  (HVM vcpu's with paging disabled are using the p2m table as
+    // its paging table, so no translation occurs in this case.)
+    //
+    return v->arch.shadow.hvm_paging_enabled;
+}
+
+
+/**************************************************************************/
+/* Mode-specific entry points into the shadow code */
+
+struct x86_emulate_ctxt;
+struct shadow_paging_mode {
+    int           (*page_fault            )(struct vcpu *v, unsigned long va,
+                                            struct cpu_user_regs *regs);
+    int           (*invlpg                )(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gpa            )(struct vcpu *v, unsigned long va);
+    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
+    void          (*update_cr3            )(struct vcpu *v);
+    int           (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    int           (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn,
+                                            void *new_guest_entry, u32 size);
+    void          (*detach_old_tables     )(struct vcpu *v);
+    int           (*x86_emulate_write     )(struct vcpu *v, unsigned long va,
+                                            void *src, u32 bytes,
+                                            struct x86_emulate_ctxt *ctxt);
+    int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
+                                            unsigned long old, 
+                                            unsigned long new,
+                                            unsigned int bytes,
+                                            struct x86_emulate_ctxt *ctxt);
+    int           (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
+                                            unsigned long old_lo, 
+                                            unsigned long old_hi, 
+                                            unsigned long new_lo,
+                                            unsigned long new_hi,
+                                            struct x86_emulate_ctxt *ctxt);
+    mfn_t         (*make_monitor_table    )(struct vcpu *v);
+    void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    int           (*guess_wrmap           )(struct vcpu *v, 
+                                            unsigned long vaddr, mfn_t gmfn);
+#endif
+    /* For outsiders to tell what mode we're in */
+    unsigned int shadow_levels;
+    unsigned int guest_levels;
+};
+
+static inline int shadow_guest_paging_levels(struct vcpu *v)
+{
+    ASSERT(v->arch.shadow.mode != NULL);
+    return v->arch.shadow.mode->guest_levels;
+}
+
+/**************************************************************************/
+/* Entry points into the shadow code */
+
+/* Turning on shadow test mode */
+int shadow_test_enable(struct domain *d);
+
+/* Handler for shadow control ops: enabling and disabling shadow modes, 
+ * and log-dirty bitmap ops all happen through here. */
+int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
+
+/* Call when destroying a domain */
+void shadow_teardown(struct domain *d);
+
+/* Call once all of the references to the domain have gone away */
+void shadow_final_teardown(struct domain *d);
+
+
+/* Mark a page as dirty in the bitmap */
+void sh_do_mark_dirty(struct domain *d, mfn_t gmfn);
+static inline void mark_dirty(struct domain *d, unsigned long gmfn)
+{
+    if ( shadow_mode_log_dirty(d) )
+    {
+        shadow_lock(d);
+        sh_do_mark_dirty(d, _mfn(gmfn));
+        shadow_unlock(d);
+    }
+}
+
+/* Internal version, for when the shadow lock is already held */
+static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+    ASSERT(shadow_lock_is_acquired(d));
+    if ( shadow_mode_log_dirty(d) )
+        sh_do_mark_dirty(d, gmfn);
+}
+
+static inline int
+shadow_fault(unsigned long va, struct cpu_user_regs *regs)
+/* Called from pagefault handler in Xen, and from the HVM trap handlers
+ * for pagefaults.  Returns 1 if this fault was an artefact of the
+ * shadow code (and the guest should retry) or 0 if it is not (and the
+ * fault should be handled elsewhere or passed to the guest). */
+{
+    struct vcpu *v = current;
+    perfc_incrc(shadow_fault);
+    return v->arch.shadow.mode->page_fault(v, va, regs);
+}
+
+static inline int
+shadow_invlpg(struct vcpu *v, unsigned long va)
+/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
+ * instruction should be issued on the hardware, or 0 if it's safe not
+ * to do so. */
+{
+    return v->arch.shadow.mode->invlpg(v, va);
+}
+
+static inline unsigned long
+shadow_gva_to_gpa(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    return v->arch.shadow.mode->gva_to_gpa(v, va);
+}
+
+static inline unsigned long
+shadow_gva_to_gfn(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    return v->arch.shadow.mode->gva_to_gfn(v, va);
+}
+
+static inline void
+shadow_update_cr3(struct vcpu *v)
+/* Updates all the things that are derived from the guest's CR3. 
+ * Called when the guest changes CR3. */
+{
+    shadow_lock(v->domain);
+    v->arch.shadow.mode->update_cr3(v);
+    shadow_unlock(v->domain);
+}
+
+
+/* Should be called after CR3 is updated.
+ * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
+ * 
+ * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
+ * shadow_vtable, etc).
+ *
+ * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
+ * for HVM guests, arch.monitor_table and hvm's guest CR3.
+ *
+ * Update ref counts to shadow tables appropriately.
+ * For PAE, relocate L3 entries, if necessary, into low memory.
+ */
+static inline void update_cr3(struct vcpu *v)
+{
+    unsigned long cr3_mfn=0;
+
+    if ( shadow_mode_enabled(v->domain) )
+    {
+        shadow_update_cr3(v);
+        return;
+    }
+
+#if CONFIG_PAGING_LEVELS == 4
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
+    else
+#endif
+        cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
+
+    make_cr3(v, cr3_mfn);
+}
+
+extern void sh_update_paging_modes(struct vcpu *v);
+
+/* Should be called to initialise paging structures if the paging mode
+ * has changed, and when bringing up a VCPU for the first time. */
+static inline void shadow_update_paging_modes(struct vcpu *v)
+{
+    ASSERT(shadow_mode_enabled(v->domain));
+    shadow_lock(v->domain);
+    sh_update_paging_modes(v);
+    shadow_unlock(v->domain);
+}
+
+static inline void
+shadow_detach_old_tables(struct vcpu *v)
+{
+    if ( v->arch.shadow.mode )
+        v->arch.shadow.mode->detach_old_tables(v);
+}
+
+static inline mfn_t
+shadow_make_monitor_table(struct vcpu *v)
+{
+    return v->arch.shadow.mode->make_monitor_table(v);
+}
+
+static inline void
+shadow_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
+{
+    v->arch.shadow.mode->destroy_monitor_table(v, mmfn);
+}
+
+/* Validate a pagetable change from the guest and update the shadows. */
+extern int shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn,
+                                        void *new_guest_entry);
+
+/* Update the shadows in response to a pagetable write from a HVM guest */
+extern void shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 
+                                            void *entry, u32 size);
+
+/* Remove all writeable mappings of a guest frame from the shadows.
+ * Returns non-zero if we need to flush TLBs. 
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access. */
+extern int shadow_remove_write_access(struct vcpu *v, mfn_t readonly_mfn,
+                                       unsigned int level,
+                                       unsigned long fault_addr);
+
+/* Remove all mappings of the guest mfn from the shadows. 
+ * Returns non-zero if we need to flush TLBs. */
+extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
+
+void
+shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
+/* This is a HVM page that we thing is no longer a pagetable.
+ * Unshadow it, and recursively unshadow pages that reference it. */
+
+/* Remove all shadows of the guest mfn. */
+extern void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all);
+static inline void shadow_remove_all_shadows(struct vcpu *v, mfn_t gmfn)
+{
+    sh_remove_shadows(v, gmfn, 1);
+}
+
+/* Add a page to a domain */
+void
+shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+                               unsigned long mfn);
+
+/* Remove a page from a domain */
+void
+shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+                                  unsigned long mfn);
+
+/*
+ * Definitions for the shadow_flags field in page_info.
+ * These flags are stored on *guest* pages...
+ * Bits 1-13 are encodings for the shadow types.
+ */
+#define PGC_SH_type_to_index(_type) ((_type) >> PGC_SH_type_shift)
+#define SHF_page_type_mask \
+    (((1u << (PGC_SH_type_to_index(PGC_SH_max_shadow) + 1u)) - 1u) - \
+     ((1u << PGC_SH_type_to_index(PGC_SH_min_shadow)) - 1u))
+
+#define SHF_L1_32   (1u << PGC_SH_type_to_index(PGC_SH_l1_32_shadow))
+#define SHF_FL1_32  (1u << PGC_SH_type_to_index(PGC_SH_fl1_32_shadow))
+#define SHF_L2_32   (1u << PGC_SH_type_to_index(PGC_SH_l2_32_shadow))
+#define SHF_L1_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l1_pae_shadow))
+#define SHF_FL1_PAE (1u << PGC_SH_type_to_index(PGC_SH_fl1_pae_shadow))
+#define SHF_L2_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l2_pae_shadow))
+#define SHF_L2H_PAE (1u << PGC_SH_type_to_index(PGC_SH_l2h_pae_shadow))
+#define SHF_L3_PAE  (1u << PGC_SH_type_to_index(PGC_SH_l3_pae_shadow))
+#define SHF_L1_64   (1u << PGC_SH_type_to_index(PGC_SH_l1_64_shadow))
+#define SHF_FL1_64  (1u << PGC_SH_type_to_index(PGC_SH_fl1_64_shadow))
+#define SHF_L2_64   (1u << PGC_SH_type_to_index(PGC_SH_l2_64_shadow))
+#define SHF_L3_64   (1u << PGC_SH_type_to_index(PGC_SH_l3_64_shadow))
+#define SHF_L4_64   (1u << PGC_SH_type_to_index(PGC_SH_l4_64_shadow))
+
+/* Used for hysteresis when automatically unhooking mappings on fork/exit */
+#define SHF_unhooked_mappings (1u<<31)
+
+/* 
+ * Allocation of shadow pages 
+ */
+
+/* Return the minumum acceptable number of shadow pages a domain needs */
+unsigned int shadow_min_acceptable_pages(struct domain *d);
+
+/* Set the pool of shadow pages to the required number of MB.
+ * Input will be rounded up to at least min_acceptable_shadow_pages().
+ * Returns 0 for success, 1 for failure. */
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted);
+
+/* Return the size of the shadow pool, rounded up to the nearest MB */
+static inline unsigned int shadow_get_allocation(struct domain *d)
+{
+    unsigned int pg = d->arch.shadow.total_pages;
+    return ((pg >> (20 - PAGE_SHIFT))
+            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
+}
+
+/*
+ * Linked list for chaining entries in the shadow hash table. 
+ */
+struct shadow_hash_entry {
+    struct shadow_hash_entry *next;
+    mfn_t smfn;                 /* MFN of the shadow */
+#ifdef _x86_64_ /* Shorten 'n' so we don't waste a whole word on storing 't' */
+    unsigned long n:56;         /* MFN of guest PT or GFN of guest superpage */
+#else
+    unsigned long n;            /* MFN of guest PT or GFN of guest superpage */
+#endif
+    unsigned char t;            /* shadow type bits, or 0 for empty */
+};
+
+#define SHADOW_HASH_BUCKETS 251
+/* Other possibly useful primes are 509, 1021, 2039, 4093, 8191, 16381 */
+
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_CACHE_WALKS
+/* Optimization: cache the results of guest walks.  This helps with MMIO
+ * and emulated writes, which tend to issue very similar walk requests
+ * repeatedly.  We keep the results of the last few walks, and blow
+ * away the cache on guest cr3 write, mode change, or page fault. */
+
+#define SH_WALK_CACHE_ENTRIES 4
+
+/* Rather than cache a guest walk, which would include mapped pointers 
+ * to pages, we cache what a TLB would remember about the walk: the 
+ * permissions and the l1 gfn */
+struct shadow_walk_cache {
+    unsigned long va;           /* The virtual address (or 0 == unused) */
+    unsigned long gfn;          /* The gfn from the effective l1e   */
+    u32 permissions;            /* The aggregated permission bits   */
+};
+#endif
+
+
+/**************************************************************************/
+/* Guest physmap (p2m) support */
+
+/* Walk another domain's P2M table, mapping pages as we go */
+extern mfn_t
+sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
+
+
+/* General conversion function from gfn to mfn */
+static inline mfn_t
+sh_gfn_to_mfn(struct domain *d, unsigned long gfn)
+{
+    if ( !shadow_mode_translate(d) )
+        return _mfn(gfn);
+    else if ( likely(current->domain == d) )
+        return _mfn(get_mfn_from_gpfn(gfn));
+    else
+        return sh_gfn_to_mfn_foreign(d, gfn);
+}
+
+// vcpu-specific version of gfn_to_mfn().  This is where we hide the dirty
+// little secret that, for hvm guests with paging disabled, nearly all of the
+// shadow code actually think that the guest is running on *untranslated* page
+// tables (which is actually domain->phys_table).
+//
+static inline mfn_t
+sh_vcpu_gfn_to_mfn(struct vcpu *v, unsigned long gfn)
+{ 
+    if ( !shadow_vcpu_mode_translate(v) )
+        return _mfn(gfn);
+    if ( likely(current->domain == v->domain) )
+        return _mfn(get_mfn_from_gpfn(gfn));
+    return sh_gfn_to_mfn_foreign(v->domain, gfn);
+}
+
+static inline unsigned long
+sh_mfn_to_gfn(struct domain *d, mfn_t mfn)
+{
+    if ( shadow_mode_translate(d) )
+        return get_gpfn_from_mfn(mfn_x(mfn));
+    else
+        return mfn_x(mfn);
+}
+
+
 
 #endif /* _XEN_SHADOW_H */
 
@@ -49,7 +633,7 @@
  * mode: C
  * c-set-style: "BSD"
  * c-basic-offset: 4
- * tab-width: 4
  * indent-tabs-mode: nil
  * End:
  */
+      
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/mm/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/Makefile  Mon Aug 28 16:26:37 2006 -0600
@@ -0,0 +1,1 @@
+subdir-y += shadow
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/mm/shadow/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/Makefile   Mon Aug 28 16:26:37 2006 -0600
@@ -0,0 +1,15 @@
+ifneq ($(pae),n)
+obj-$(x86_32) += common.o g2_on_s3.o g3_on_s3.o
+else
+obj-$(x86_32) += common.o g2_on_s2.o
+endif
+
+obj-$(x86_64) += common.o g4_on_s4.o g3_on_s3.o g2_on_s3.o
+
+guest_levels  = $(subst g,,$(filter g%,$(subst ., ,$(subst _, ,$(1)))))
+shadow_levels = $(subst s,,$(filter s%,$(subst ., ,$(subst _, ,$(1)))))
+shadow_defns  = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1)) \
+                -DSHADOW_PAGING_LEVELS=$(call shadow_levels,$(1))
+
+g%.o: multi.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) $(call shadow_defns,$(@F)) -c $< -o $@
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/mm/shadow/common.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Mon Aug 28 16:26:37 2006 -0600
@@ -0,0 +1,3407 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/common.c
+ *
+ * Shadow code that does not need to be multiply compiled.
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define SHADOW 1
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/irq.h>
+#include <xen/domain_page.h>
+#include <xen/guest_access.h>
+#include <xen/keyhandler.h>
+#include <asm/event.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/shadow.h>
+#include "private.h"
+
+#if SHADOW_AUDIT
+int shadow_audit_enable = 0;
+
+static void shadow_audit_key(unsigned char key)
+{
+    shadow_audit_enable = !shadow_audit_enable;
+    printk("%s shadow_audit_enable=%d\n",
+           __func__, shadow_audit_enable);
+}
+
+static int __init shadow_audit_key_init(void)
+{
+    register_keyhandler(
+        'O', shadow_audit_key,  "toggle shadow audits");
+    return 0;
+}
+__initcall(shadow_audit_key_init);
+#endif /* SHADOW_AUDIT */
+
+static void sh_free_log_dirty_bitmap(struct domain *d);
+
+int _shadow_mode_refcounts(struct domain *d)
+{
+    return shadow_mode_refcounts(d);
+}
+
+
+/**************************************************************************/
+/* x86 emulator support for the shadow code
+ */
+
+static int
+sh_x86_emulate_read_std(unsigned long addr,
+                         unsigned long *val,
+                         unsigned int bytes,
+                         struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+    if ( hvm_guest(v) )
+    {
+        *val = 0;
+        // XXX -- this is WRONG.
+        //        It entirely ignores the permissions in the page tables.
+        //        In this case, that is only a user vs supervisor access check.
+        //
+        if ( hvm_copy(val, addr, bytes, HVM_COPY_IN) )
+        {
+#if 0
+            SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                           v->domain->domain_id, v->vcpu_id, 
+                           addr, *val, bytes);
+#endif
+            return X86EMUL_CONTINUE;
+        }
+
+        /* If we got here, there was nothing mapped here, or a bad GFN 
+         * was mapped here.  This should never happen: we're here because
+         * of a write fault at the end of the instruction we're emulating. */ 
+        SHADOW_PRINTK("read failed to va %#lx\n", addr);
+        return X86EMUL_PROPAGATE_FAULT;
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int
+sh_x86_emulate_write_std(unsigned long addr,
+                          unsigned long val,
+                          unsigned int bytes,
+                          struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        // XXX -- this is WRONG.
+        //        It entirely ignores the permissions in the page tables.
+        //        In this case, that includes user vs supervisor, and
+        //        write access.
+        //
+        if ( hvm_copy(&val, addr, bytes, HVM_COPY_OUT) )
+            return X86EMUL_CONTINUE;
+
+        /* If we got here, there was nothing mapped here, or a bad GFN 
+         * was mapped here.  This should never happen: we're here because
+         * of a write fault at the end of the instruction we're emulating,
+         * which should be handled by sh_x86_emulate_write_emulated. */ 
+        SHADOW_PRINTK("write failed to va %#lx\n", addr);
+        return X86EMUL_PROPAGATE_FAULT;
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int
+sh_x86_emulate_write_emulated(unsigned long addr,
+                               unsigned long val,
+                               unsigned int bytes,
+                               struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
+                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_write(v, addr, &val, bytes, 
ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int 
+sh_x86_emulate_cmpxchg_emulated(unsigned long addr,
+                                 unsigned long old,
+                                 unsigned long new,
+                                 unsigned int bytes,
+                                 struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n",
+                   v->domain->domain_id, v->vcpu_id, addr, old, new, bytes);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_cmpxchg(v, addr, old, new, 
+                                                    bytes, ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+static int 
+sh_x86_emulate_cmpxchg8b_emulated(unsigned long addr,
+                                   unsigned long old_lo,
+                                   unsigned long old_hi,
+                                   unsigned long new_lo,
+                                   unsigned long new_hi,
+                                   struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *v = current;
+#if 0
+    SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx:%lx n:=%#lx:%lx\n",
+                   v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo,
+                   new_hi, new_lo, ctxt);
+#endif
+    if ( hvm_guest(v) )
+    {
+        return v->arch.shadow.mode->x86_emulate_cmpxchg8b(v, addr, old_lo, 
old_hi,
+                                                      new_lo, new_hi, ctxt);
+    }
+    else 
+    {
+        SHADOW_PRINTK("this operation is not emulated yet\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+}
+
+
+struct x86_emulate_ops shadow_emulator_ops = {
+    .read_std           = sh_x86_emulate_read_std,
+    .write_std          = sh_x86_emulate_write_std,
+    .read_emulated      = sh_x86_emulate_read_std,
+    .write_emulated     = sh_x86_emulate_write_emulated,
+    .cmpxchg_emulated   = sh_x86_emulate_cmpxchg_emulated,
+    .cmpxchg8b_emulated = sh_x86_emulate_cmpxchg8b_emulated,
+};
+
+
+/**************************************************************************/
+/* Code for "promoting" a guest page to the point where the shadow code is
+ * willing to let it be treated as a guest page table.  This generally
+ * involves making sure there are no writable mappings available to the guest
+ * for this page.
+ */
+void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    unsigned long type_info;
+
+    ASSERT(valid_mfn(gmfn));
+
+    /* We should never try to promote a gmfn that has writeable mappings */
+    ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0);
+
+    // Is the page already shadowed?
+    if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
+    {
+        // No prior shadow exists...
+
+        // Grab a type-ref.  We don't really care if we are racing with another
+        // vcpu or not, or even what kind of type we get; we just want the type
+        // count to be > 0.
+        //
+        do {
+            type_info =
+                page->u.inuse.type_info & (PGT_type_mask | PGT_va_mask);
+        } while ( !get_page_type(page, type_info) );
+
+        // Now that the type ref is non-zero, we can safely use the
+        // shadow_flags.
+        //
+        page->shadow_flags = 0;
+    }
+
+    ASSERT(!test_bit(type >> PGC_SH_type_shift, &page->shadow_flags));
+    set_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
+}
+
+void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+
+    ASSERT(test_bit(_PGC_page_table, &page->count_info));
+    ASSERT(test_bit(type >> PGC_SH_type_shift, &page->shadow_flags));
+
+    clear_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
+
+    if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
+    {
+        // release the extra type ref
+        put_page_type(page);
+
+        // clear the is-a-page-table bit.
+        clear_bit(_PGC_page_table, &page->count_info);
+    }
+}
+
+/**************************************************************************/
+/* Validate a pagetable change from the guest and update the shadows.
+ * Returns a bitmask of SHADOW_SET_* flags. */
+
+static int
+__shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 
+                               void *entry, u32 size)
+{
+    int result = 0;
+    struct page_info *page = mfn_to_page(gmfn);
+
+    sh_mark_dirty(v->domain, gmfn);
+    
+    // Determine which types of shadows are affected, and update each.
+    //
+    // Always validate L1s before L2s to prevent another cpu with a linear
+    // mapping of this gmfn from seeing a walk that results from 
+    // using the new L2 value and the old L1 value.  (It is OK for such a
+    // guest to see a walk that uses the old L2 value with the new L1 value,
+    // as hardware could behave this way if one level of the pagewalk occurs
+    // before the store, and the next level of the pagewalk occurs after the
+    // store.
+    //
+    // Ditto for L2s before L3s, etc.
+    //
+
+    if ( !(page->count_info & PGC_page_table) )
+        return 0;  /* Not shadowed at all */
+
+#if CONFIG_PAGING_LEVELS == 2
+    if ( page->shadow_flags & SHF_L1_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 2, 2)
+            (v, gmfn, entry, size);
+#else 
+    if ( page->shadow_flags & SHF_L1_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 2)
+            (v, gmfn, entry, size);
+#endif
+
+#if CONFIG_PAGING_LEVELS == 2
+    if ( page->shadow_flags & SHF_L2_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 2, 2)
+            (v, gmfn, entry, size);
+#else 
+    if ( page->shadow_flags & SHF_L2_32 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 2)
+            (v, gmfn, entry, size);
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3 
+    if ( page->shadow_flags & SHF_L1_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2H_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2he, 3, 3)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L3_PAE ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 3, 3)
+            (v, gmfn, entry, size);
+#else /* 32-bit non-PAE hypervisor does not support PAE guests */
+    ASSERT((page->shadow_flags & (SHF_L3_PAE|SHF_L2_PAE|SHF_L1_PAE)) == 0);
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4 
+    if ( page->shadow_flags & SHF_L1_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl1e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L2_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl2e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L3_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl3e, 4, 4)
+            (v, gmfn, entry, size);
+    if ( page->shadow_flags & SHF_L4_64 ) 
+        result |= SHADOW_INTERNAL_NAME(sh_map_and_validate_gl4e, 4, 4)
+            (v, gmfn, entry, size);
+#else /* 32-bit/PAE hypervisor does not support 64-bit guests */
+    ASSERT((page->shadow_flags 
+            & (SHF_L4_64|SHF_L3_64|SHF_L2_64|SHF_L1_64)) == 0);
+#endif
+
+    return result;
+}
+
+
+int
+shadow_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry)
+/* This is the entry point from hypercalls. It returns a bitmask of all the 
+ * results of shadow_set_l*e() calls, so the caller knows to do TLB flushes. */
+{
+    int rc;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    rc = __shadow_validate_guest_entry(v, gmfn, entry, sizeof(l1_pgentry_t));
+    shadow_audit_tables(v);
+    return rc;
+}
+
+void
+shadow_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn,
+                                void *entry, u32 size)
+/* This is the entry point for emulated writes to pagetables in HVM guests */
+{
+    struct domain *d = v->domain;
+    int rc;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    rc = __shadow_validate_guest_entry(v, gmfn, entry, size);
+    if ( rc & SHADOW_SET_FLUSH )
+    {
+        // Flush everyone except the local processor, which will flush when it
+        // re-enters the HVM guest.
+        //
+        cpumask_t mask = d->domain_dirty_cpumask;
+        cpu_clear(v->processor, mask);
+        flush_tlb_mask(mask);
+    }
+    if ( rc & SHADOW_SET_ERROR ) 
+    {
+        /* This page is probably not a pagetable any more: tear it out of the 
+         * shadows, along with any tables that reference it */
+        shadow_remove_all_shadows_and_parents(v, gmfn);
+    }
+    /* We ignore the other bits: since we are about to change CR3 on
+     * VMENTER we don't need to do any extra TLB flushes. */ 
+}
+
+
+/**************************************************************************/
+/* Memory management for shadow pages. */ 
+
+/* Meaning of the count_info field in shadow pages
+ * ----------------------------------------------
+ * 
+ * A count of all references to this page from other shadow pages and
+ * guest CR3s (a.k.a. v->arch.shadow.table).  
+ *
+ * The top bits hold the shadow type and the pinned bit.  Top-level
+ * shadows are pinned so that they don't disappear when not in a CR3
+ * somewhere.
+ *
+ * We don't need to use get|put_page for this as the updates are all
+ * protected by the shadow lock.  We can't use get|put_page for this
+ * as the size of the count on shadow pages is different from that on
+ * normal guest pages.
+ */
+
+/* Meaning of the type_info field in shadow pages
+ * ----------------------------------------------
+ * 
+ * type_info use depends on the shadow type (from count_info)
+ * 
+ * PGC_SH_none : This page is in the shadow free pool.  type_info holds
+ *                the chunk order for our freelist allocator.
+ *
+ * PGC_SH_l*_shadow : This page is in use as a shadow. type_info 
+ *                     holds the mfn of the guest page being shadowed,
+ *
+ * PGC_SH_fl1_*_shadow : This page is being used to shatter a superpage.
+ *                        type_info holds the gfn being shattered.
+ *
+ * PGC_SH_monitor_table : This page is part of a monitor table.
+ *                         type_info is not used.
+ */
+
+/* Meaning of the _domain field in shadow pages
+ * --------------------------------------------
+ *
+ * In shadow pages, this field will always have its least significant bit
+ * set.  This ensures that all attempts to get_page() will fail (as all
+ * valid pickled domain pointers have a zero for their least significant bit).
+ * Instead, the remaining upper bits are used to record the shadow generation
+ * counter when the shadow was created.
+ */
+
+/* Meaning of the shadow_flags field
+ * ----------------------------------
+ * 
+ * In guest pages that are shadowed, one bit for each kind of shadow they have.
+ * 
+ * In shadow pages, will be used for holding a representation of the populated
+ * entries in this shadow (either a min/max, or a bitmap, or ...)
+ *
+ * In monitor-table pages, holds the level of the particular page (to save
+ * spilling the shadow types into an extra bit by having three types of monitor
+ * page).
+ */
+
+/* Meaning of the list_head struct in shadow pages
+ * -----------------------------------------------
+ *
+ * In free shadow pages, this is used to hold the free-lists of chunks.
+ *
+ * In top-level shadow tables, this holds a linked-list of all top-level
+ * shadows (used for recovering memory and destroying shadows). 
+ *
+ * In lower-level shadows, this holds the physical address of a higher-level
+ * shadow entry that holds a reference to this shadow (or zero).
+ */
+
+/* Allocating shadow pages
+ * -----------------------
+ *
+ * Most shadow pages are allocated singly, but there are two cases where we 
+ * need to allocate multiple pages together.
+ * 
+ * 1: Shadowing 32-bit guest tables on PAE or 64-bit shadows.
+ *    A 32-bit guest l1 table covers 4MB of virtuial address space,
+ *    and needs to be shadowed by two PAE/64-bit l1 tables (covering 2MB
+ *    of virtual address space each).  Similarly, a 32-bit guest l2 table 
+ *    (4GB va) needs to be shadowed by four PAE/64-bit l2 tables (1GB va 
+ *    each).  These multi-page shadows are contiguous and aligned; 
+ *    functions for handling offsets into them are defined in shadow.c 
+ *    (shadow_l1_index() etc.)
+ *    
+ * 2: Shadowing PAE top-level pages.  Each guest page that contains
+ *    any PAE top-level pages requires two shadow pages to shadow it.
+ *    They contain alternating l3 tables and pae_l3_bookkeeping structs.
+ *
+ * This table shows the allocation behaviour of the different modes:
+ *
+ * Xen paging      32b  pae  pae  64b  64b  64b
+ * Guest paging    32b  32b  pae  32b  pae  64b
+ * PV or HVM        *   HVM   *   HVM  HVM   * 
+ * Shadow paging   32b  pae  pae  pae  pae  64b
+ *
+ * sl1 size         4k   8k   4k   8k   4k   4k
+ * sl2 size         4k  16k   4k  16k   4k   4k
+ * sl3 size         -    -    8k   -    8k   4k
+ * sl4 size         -    -    -    -    -    4k
+ *
+ * We allocate memory from xen in four-page units and break them down
+ * with a simple buddy allocator.  Can't use the xen allocator to handle
+ * this as it only works for contiguous zones, and a domain's shadow
+ * pool is made of fragments.
+ *
+ * In HVM guests, the p2m table is built out of shadow pages, and we provide 
+ * a function for the p2m management to steal pages, in max-order chunks, from 
+ * the free pool.  We don't provide for giving them back, yet.
+ */
+
+/* Figure out the least acceptable quantity of shadow memory.
+ * The minimum memory requirement for always being able to free up a
+ * chunk of memory is very small -- only three max-order chunks per
+ * vcpu to hold the top level shadows and pages with Xen mappings in them.  
+ *
+ * But for a guest to be guaranteed to successfully execute a single
+ * instruction, we must be able to map a large number (about thirty) VAs
+ * at the same time, which means that to guarantee progress, we must
+ * allow for more than ninety allocated pages per vcpu.  We round that
+ * up to 128 pages, or half a megabyte per vcpu. */
+unsigned int shadow_min_acceptable_pages(struct domain *d) 
+{
+    u32 vcpu_count = 0;
+    struct vcpu *v;
+
+    for_each_vcpu(d, v)
+        vcpu_count++;
+
+    return (vcpu_count * 128);
+}
+
+/* Using the type_info field to store freelist order */
+#define SH_PFN_ORDER(_p) ((_p)->u.inuse.type_info)
+#define SH_SET_PFN_ORDER(_p, _o)                       \
+ do { (_p)->u.inuse.type_info = (_o); } while (0)
+ 
+
+/* Figure out the order of allocation needed for a given shadow type */
+static inline u32
+shadow_order(u32 shadow_type) 
+{
+#if CONFIG_PAGING_LEVELS > 2
+    static const u32 type_to_order[16] = {
+        0, /* PGC_SH_none           */
+        1, /* PGC_SH_l1_32_shadow   */
+        1, /* PGC_SH_fl1_32_shadow  */
+        2, /* PGC_SH_l2_32_shadow   */
+        0, /* PGC_SH_l1_pae_shadow  */
+        0, /* PGC_SH_fl1_pae_shadow */
+        0, /* PGC_SH_l2_pae_shadow  */
+        0, /* PGC_SH_l2h_pae_shadow */
+        1, /* PGC_SH_l3_pae_shadow  */
+        0, /* PGC_SH_l1_64_shadow   */
+        0, /* PGC_SH_fl1_64_shadow  */
+        0, /* PGC_SH_l2_64_shadow   */
+        0, /* PGC_SH_l3_64_shadow   */
+        0, /* PGC_SH_l4_64_shadow   */
+        2, /* PGC_SH_p2m_table      */
+        0  /* PGC_SH_monitor_table  */
+        };
+    u32 type = (shadow_type & PGC_SH_type_mask) >> PGC_SH_type_shift;
+    return type_to_order[type];
+#else  /* 32-bit Xen only ever shadows 32-bit guests on 32-bit shadows. */
+    return 0;
+#endif
+}
+
+
+/* Do we have a free chunk of at least this order? */
+static inline int chunk_is_available(struct domain *d, int order)
+{
+    int i;
+    
+    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
+        if ( !list_empty(&d->arch.shadow.freelists[i]) )
+            return 1;
+    return 0;
+}
+
+/* Dispatcher function: call the per-mode function that will unhook the
+ * non-Xen mappings in this top-level shadow mfn */
+void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    switch ( (pg->count_info & PGC_SH_type_mask) >> PGC_SH_type_shift )
+    {
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,2,2)(v,smfn);
+#else
+        SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings,3,2)(v,smfn);
+#endif
+        break;
+#if CONFIG_PAGING_LEVELS >= 3
+    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_unhook_pae_mappings,3,3)(v,smfn);
+        break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l4_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_unhook_64b_mappings,4,4)(v,smfn);
+        break;
+#endif
+    default:
+        SHADOW_PRINTK("top-level shadow has bad type %08lx\n", 
+                       (unsigned long)((pg->count_info & PGC_SH_type_mask)
+                                       >> PGC_SH_type_shift));
+        BUG();
+    }
+}
+
+
+/* Make sure there is at least one chunk of the required order available
+ * in the shadow page pool. This must be called before any calls to
+ * shadow_alloc().  Since this will free existing shadows to make room,
+ * it must be called early enough to avoid freeing shadows that the
+ * caller is currently working on. */
+void shadow_prealloc(struct domain *d, unsigned int order)
+{
+    /* Need a vpcu for calling unpins; for now, since we don't have
+     * per-vcpu shadows, any will do */
+    struct vcpu *v = d->vcpu[0];
+    struct list_head *l, *t;
+    struct page_info *pg;
+    mfn_t smfn;
+
+    if ( chunk_is_available(d, order) ) return; 
+    
+    /* Stage one: walk the list of top-level pages, unpinning them */
+    perfc_incrc(shadow_prealloc_1);
+    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
+    {
+        pg = list_entry(l, struct page_info, list);
+        smfn = page_to_mfn(pg);
+
+#if CONFIG_PAGING_LEVELS >= 3
+        if ( (pg->count_info & PGC_SH_type_mask) == PGC_SH_l3_pae_shadow )
+        {
+            /* For PAE, we need to unpin each subshadow on this shadow */
+            SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn);
+        } 
+        else 
+#endif /* 32-bit code always takes this branch */
+        {
+            /* Unpin this top-level shadow */
+            sh_unpin(v, smfn);
+        }
+
+        /* See if that freed up a chunk of appropriate size */
+        if ( chunk_is_available(d, order) ) return;
+    }
+
+    /* Stage two: all shadow pages are in use in hierarchies that are
+     * loaded in cr3 on some vcpu.  Walk them, unhooking the non-Xen
+     * mappings. */
+    perfc_incrc(shadow_prealloc_2);
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+    /* Walk the list from the tail: recently used toplevels have been pulled
+     * to the head */
+    list_for_each_backwards_safe(l, t, &d->arch.shadow.toplevel_shadows)
+    {
+        pg = list_entry(l, struct page_info, list);
+        smfn = page_to_mfn(pg);
+        shadow_unhook_mappings(v, smfn);
+
+        /* Need to flush TLB if we've altered our own tables */
+        if ( !shadow_mode_external(d) 
+             && pagetable_get_pfn(current->arch.shadow_table) == mfn_x(smfn) )
+            local_flush_tlb();
+        
+        /* See if that freed up a chunk of appropriate size */
+        if ( chunk_is_available(d, order) ) return;
+    }
+    
+    /* Nothing more we can do: all remaining shadows are of pages that
+     * hold Xen mappings for some vcpu.  This can never happen. */
+    SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n"
+                   "  shadow pages total = %u, free = %u, p2m=%u\n",
+                   1 << order, 
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+    BUG();
+}
+
+
+/* Allocate another shadow's worth of (contiguous, aligned) pages,
+ * and fill in the type and backpointer fields of their page_infos. 
+ * Never fails to allocate. */
+mfn_t shadow_alloc(struct domain *d,  
+                    u32 shadow_type,
+                    unsigned long backpointer)
+{
+    struct page_info *pg = NULL;
+    unsigned int order = shadow_order(shadow_type);
+    cpumask_t mask;
+    void *p;
+    int i;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(order <= SHADOW_MAX_ORDER);
+    ASSERT(shadow_type != PGC_SH_none);
+    perfc_incrc(shadow_alloc);
+
+    /* Find smallest order which can satisfy the request. */
+    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
+        if ( !list_empty(&d->arch.shadow.freelists[i]) )
+        {
+            pg = list_entry(d->arch.shadow.freelists[i].next, 
+                            struct page_info, list);
+            list_del(&pg->list);
+            
+            /* We may have to halve the chunk a number of times. */
+            while ( i != order )
+            {
+                i--;
+                SH_SET_PFN_ORDER(pg, i);
+                list_add_tail(&pg->list, &d->arch.shadow.freelists[i]);
+                pg += 1 << i;
+            }
+            d->arch.shadow.free_pages -= 1 << order;
+
+            /* Init page info fields and clear the pages */
+            for ( i = 0; i < 1<<order ; i++ ) 
+            {
+                pg[i].u.inuse.type_info = backpointer;
+                pg[i].count_info = shadow_type;
+                pg[i].shadow_flags = 0;
+                INIT_LIST_HEAD(&pg[i].list);
+                /* Before we overwrite the old contents of this page, 
+                 * we need to be sure that no TLB holds a pointer to it. */
+                mask = d->domain_dirty_cpumask;
+                tlbflush_filter(mask, pg[i].tlbflush_timestamp);
+                if ( unlikely(!cpus_empty(mask)) )
+                {
+                    perfc_incrc(shadow_alloc_tlbflush);
+                    flush_tlb_mask(mask);
+                }
+                /* Now safe to clear the page for reuse */
+                p = sh_map_domain_page(page_to_mfn(pg+i));
+                ASSERT(p != NULL);
+                clear_page(p);
+                sh_unmap_domain_page(p);
+                perfc_incr(shadow_alloc_count);
+            }
+            return page_to_mfn(pg);
+        }
+    
+    /* If we get here, we failed to allocate. This should never happen.
+     * It means that we didn't call shadow_prealloc() correctly before
+     * we allocated.  We can't recover by calling prealloc here, because
+     * we might free up higher-level pages that the caller is working on. */
+    SHADOW_PRINTK("Can't allocate %i shadow pages!\n", 1 << order);
+    BUG();
+}
+
+
+/* Return some shadow pages to the pool. */
+void shadow_free(struct domain *d, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn); 
+    u32 shadow_type;
+    unsigned long order;
+    unsigned long mask;
+    int i;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    perfc_incrc(shadow_free);
+
+    shadow_type = pg->count_info & PGC_SH_type_mask;
+    ASSERT(shadow_type != PGC_SH_none);
+    ASSERT(shadow_type != PGC_SH_p2m_table);
+    order = shadow_order(shadow_type);
+
+    d->arch.shadow.free_pages += 1 << order;
+
+    for ( i = 0; i < 1<<order; i++ ) 
+    {
+        /* Strip out the type: this is now a free shadow page */
+        pg[i].count_info = 0;
+        /* Remember the TLB timestamp so we will know whether to flush 
+         * TLBs when we reuse the page.  Because the destructors leave the
+         * contents of the pages in place, we can delay TLB flushes until
+         * just before the allocator hands the page out again. */
+        pg[i].tlbflush_timestamp = tlbflush_current_time();
+        perfc_decr(shadow_alloc_count);
+    }
+
+    /* Merge chunks as far as possible. */
+    while ( order < SHADOW_MAX_ORDER )
+    {
+        mask = 1 << order;
+        if ( (mfn_x(page_to_mfn(pg)) & mask) ) {
+            /* Merge with predecessor block? */
+            if ( (((pg-mask)->count_info & PGC_SH_type_mask) != PGT_none) 
+                 || (SH_PFN_ORDER(pg-mask) != order) )
+                break;
+            list_del(&(pg-mask)->list);
+            pg -= mask;
+        } else {
+            /* Merge with successor block? */
+            if ( (((pg+mask)->count_info & PGC_SH_type_mask) != PGT_none)
+                 || (SH_PFN_ORDER(pg+mask) != order) )
+                break;
+            list_del(&(pg+mask)->list);
+        }
+        order++;
+    }
+
+    SH_SET_PFN_ORDER(pg, order);
+    list_add_tail(&pg->list, &d->arch.shadow.freelists[order]);
+}
+
+/* Divert some memory from the pool to be used by the p2m mapping.
+ * This action is irreversible: the p2m mapping only ever grows.
+ * That's OK because the p2m table only exists for external domains,
+ * and those domains can't ever turn off shadow mode.
+ * Also, we only ever allocate a max-order chunk, so as to preserve
+ * the invariant that shadow_prealloc() always works.
+ * Returns 0 iff it can't get a chunk (the caller should then
+ * free up some pages in domheap and call set_sh_allocation);
+ * returns non-zero on success.
+ */
+static int
+shadow_alloc_p2m_pages(struct domain *d)
+{
+    struct page_info *pg;
+    u32 i;
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    if ( d->arch.shadow.total_pages 
+         < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) )
+        return 0; /* Not enough shadow memory: need to increase it first */
+    
+    pg = mfn_to_page(shadow_alloc(d, PGC_SH_p2m_table, 0));
+    d->arch.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER);
+    d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
+    for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
+    {
+        /* Unlike shadow pages, mark p2m pages as owned by the domain */
+        page_set_owner(&pg[i], d);
+        list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist);
+    }
+    return 1;
+}
+
+// Returns 0 if no memory is available...
+mfn_t
+shadow_alloc_p2m_page(struct domain *d)
+{
+    struct list_head *entry;
+    mfn_t mfn;
+    void *p;
+
+    if ( list_empty(&d->arch.shadow.p2m_freelist) &&
+         !shadow_alloc_p2m_pages(d) )
+        return _mfn(0);
+    entry = d->arch.shadow.p2m_freelist.next;
+    list_del(entry);
+    list_add_tail(entry, &d->arch.shadow.p2m_inuse);
+    mfn = page_to_mfn(list_entry(entry, struct page_info, list));
+    sh_get_ref(mfn, 0);
+    p = sh_map_domain_page(mfn);
+    clear_page(p);
+    sh_unmap_domain_page(p);
+
+    return mfn;
+}
+
+#if CONFIG_PAGING_LEVELS == 3
+static void p2m_install_entry_in_monitors(struct domain *d, 
+                                          l3_pgentry_t *l3e) 
+/* Special case, only used for external-mode domains on PAE hosts:
+ * update the mapping of the p2m table.  Once again, this is trivial in
+ * other paging modes (one top-level entry points to the top-level p2m,
+ * no maintenance needed), but PAE makes life difficult by needing a
+ * copy the eight l3es of the p2m table in eight l2h slots in the
+ * monitor table.  This function makes fresh copies when a p2m l3e
+ * changes. */
+{
+    l2_pgentry_t *ml2e;
+    struct vcpu *v;
+    unsigned int index;
+
+    index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
+    ASSERT(index < MACHPHYS_MBYTES>>1);
+
+    for_each_vcpu(d, v) 
+    {
+        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
+            continue;
+        ASSERT(shadow_mode_external(v->domain));
+
+        SHADOW_DEBUG(P2M, "d=%u v=%u index=%u mfn=%#lx\n",
+                      d->domain_id, v->vcpu_id, index, l3e_get_pfn(*l3e));
+
+        if ( v == current ) /* OK to use linear map of monitor_table */
+            ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
+        else 
+        {
+            l3_pgentry_t *ml3e;
+            ml3e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
+            ml2e = sh_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
+            ml2e += l2_table_offset(RO_MPT_VIRT_START);
+            sh_unmap_domain_page(ml3e);
+        }
+        ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
+        if ( v != current )
+            sh_unmap_domain_page(ml2e);
+    }
+}
+#endif
+
+// Find the next level's P2M entry, checking for out-of-range gfn's...
+// Returns NULL on error.
+//
+static l1_pgentry_t *
+p2m_find_entry(void *table, unsigned long *gfn_remainder,
+                   unsigned long gfn, u32 shift, u32 max)
+{
+    u32 index;
+
+    index = *gfn_remainder >> shift;
+    if ( index >= max )
+    {
+        SHADOW_DEBUG(P2M, "gfn=0x%lx out of range "
+                      "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n",
+                       gfn, *gfn_remainder, shift, index, max);
+        return NULL;
+    }
+    *gfn_remainder &= (1 << shift) - 1;
+    return (l1_pgentry_t *)table + index;
+}
+
+// Walk one level of the P2M table, allocating a new table if required.
+// Returns 0 on error.
+//
+static int
+p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 
+               unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 
+               u32 max, unsigned long type)
+{
+    l1_pgentry_t *p2m_entry;
+    void *next;
+
+    if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
+                                      shift, max)) )
+        return 0;
+
+    if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
+    {
+        mfn_t mfn = shadow_alloc_p2m_page(d);
+        if ( mfn_x(mfn) == 0 )
+            return 0;
+        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+        mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated;
+        mfn_to_page(mfn)->count_info = 1;
+#if CONFIG_PAGING_LEVELS == 3
+        if (type == PGT_l2_page_table)
+        {
+            /* We have written to the p2m l3: need to sync the per-vcpu
+             * copies of it in the monitor tables */
+            p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry);
+        }
+#endif
+        /* The P2M can be shadowed: keep the shadows synced */
+        if ( d->vcpu[0] )
+            (void)__shadow_validate_guest_entry(d->vcpu[0], *table_mfn,
+                                                 p2m_entry, sizeof *p2m_entry);
+    }
+    *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
+    next = sh_map_domain_page(*table_mfn);
+    sh_unmap_domain_page(*table);
+    *table = next;
+
+    return 1;
+}
+
+// Returns 0 on error (out of memory)
+int
+shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+{
+    // XXX -- this might be able to be faster iff current->domain == d
+    mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
+    void *table = sh_map_domain_page(table_mfn);
+    unsigned long gfn_remainder = gfn;
+    l1_pgentry_t *p2m_entry;
+
+#if CONFIG_PAGING_LEVELS >= 4
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         L4_PAGETABLE_ENTRIES, PGT_l3_page_table) )
+        return 0;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    // When using PAE Xen, we only allow 33 bits of pseudo-physical
+    // address in translated guests (i.e. 8 GBytes).  This restriction
+    // comes from wanting to map the P2M table into the 16MB RO_MPT hole
+    // in Xen's address space for translated PV guests.
+    //
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L3_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         (CONFIG_PAGING_LEVELS == 3
+                          ? 8
+                          : L3_PAGETABLE_ENTRIES),
+                         PGT_l2_page_table) )
+        return 0;
+#endif
+    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
+                         L2_PAGETABLE_SHIFT - PAGE_SHIFT,
+                         L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
+        return 0;
+
+    p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
+                               0, L1_PAGETABLE_ENTRIES);
+    ASSERT(p2m_entry);
+    if ( valid_mfn(mfn) )
+        *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+    else
+        *p2m_entry = l1e_empty();
+
+    /* The P2M can be shadowed: keep the shadows synced */
+    (void) __shadow_validate_guest_entry(d->vcpu[0], table_mfn, 
+                                          p2m_entry, sizeof *p2m_entry);
+
+    sh_unmap_domain_page(table);
+
+    return 1;
+}
+
+// Allocate a new p2m table for a domain.
+//
+// The structure of the p2m table is that of a pagetable for xen (i.e. it is
+// controlled by CONFIG_PAGING_LEVELS).
+//
+// Returns 0 if p2m table could not be initialized
+//
+static int
+shadow_alloc_p2m_table(struct domain *d)
+{
+    mfn_t p2m_top;
+    struct list_head *entry;
+    unsigned int page_count = 0;
+    
+    SHADOW_PRINTK("allocating p2m table\n");
+    ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0);
+
+    p2m_top = shadow_alloc_p2m_page(d);
+    mfn_to_page(p2m_top)->count_info = 1;
+    mfn_to_page(p2m_top)->u.inuse.type_info = 
+#if CONFIG_PAGING_LEVELS == 4
+        PGT_l4_page_table
+#elif CONFIG_PAGING_LEVELS == 3
+        PGT_l3_page_table
+#elif CONFIG_PAGING_LEVELS == 2
+        PGT_l2_page_table
+#endif
+        | 1 | PGT_validated;
+   
+    if ( mfn_x(p2m_top) == 0 )
+        return 0;
+
+    d->arch.phys_table = pagetable_from_mfn(p2m_top);
+
+    SHADOW_PRINTK("populating p2m table\n");
+ 
+    for ( entry = d->page_list.next;
+          entry != &d->page_list;
+          entry = entry->next )
+    {
+        struct page_info *page = list_entry(entry, struct page_info, list);
+        mfn_t mfn = page_to_mfn(page);
+        unsigned long gfn = get_gpfn_from_mfn(mfn_x(mfn));
+        page_count++;
+        if (
+#ifdef __x86_64__
+            (gfn != 0x5555555555555555L)
+#else
+            (gfn != 0x55555555L)
+#endif
+             && gfn != INVALID_M2P_ENTRY
+             && !shadow_set_p2m_entry(d, gfn, mfn) )
+        {
+            SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" 
SH_PRI_mfn "\n",
+                           gfn, mfn_x(mfn));
+            return 0;
+        }
+    }
+
+    SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count);
+    return 1;
+}
+
+mfn_t
+sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
+/* Read another domain's p2m entries */
+{
+    mfn_t mfn;
+    unsigned long addr = gpfn << PAGE_SHIFT;
+    l2_pgentry_t *l2e;
+    l1_pgentry_t *l1e;
+    
+    ASSERT(shadow_mode_translate(d));
+    mfn = pagetable_get_mfn(d->arch.phys_table);
+
+
+#if CONFIG_PAGING_LEVELS > 2
+    if ( gpfn > (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t) ) 
+        /* This pfn is higher than the p2m map can hold */
+        return _mfn(INVALID_MFN);
+#endif
+
+
+#if CONFIG_PAGING_LEVELS >= 4
+    { 
+        l4_pgentry_t *l4e = sh_map_domain_page(mfn);
+        l4e += l4_table_offset(addr);
+        if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
+        {
+            sh_unmap_domain_page(l4e);
+            return _mfn(INVALID_MFN);
+        }
+        mfn = _mfn(l4e_get_pfn(*l4e));
+        sh_unmap_domain_page(l4e);
+    }
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+    {
+        l3_pgentry_t *l3e = sh_map_domain_page(mfn);
+        l3e += l3_table_offset(addr);
+        if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+        {
+            sh_unmap_domain_page(l3e);
+            return _mfn(INVALID_MFN);
+        }
+        mfn = _mfn(l3e_get_pfn(*l3e));
+        sh_unmap_domain_page(l3e);
+    }
+#endif
+
+    l2e = sh_map_domain_page(mfn);
+    l2e += l2_table_offset(addr);
+    if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
+    {
+        sh_unmap_domain_page(l2e);
+        return _mfn(INVALID_MFN);
+    }
+    mfn = _mfn(l2e_get_pfn(*l2e));
+    sh_unmap_domain_page(l2e);
+
+    l1e = sh_map_domain_page(mfn);
+    l1e += l1_table_offset(addr);
+    if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
+    {
+        sh_unmap_domain_page(l1e);
+        return _mfn(INVALID_MFN);
+    }
+    mfn = _mfn(l1e_get_pfn(*l1e));
+    sh_unmap_domain_page(l1e);
+
+    return mfn;
+}
+
+unsigned long
+shadow_gfn_to_mfn_foreign(unsigned long gpfn)
+{
+    return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn));
+}
+
+
+static void shadow_p2m_teardown(struct domain *d)
+/* Return all the p2m pages to Xen.
+ * We know we don't have any extra mappings to these pages */
+{
+    struct list_head *entry, *n;
+    struct page_info *pg;
+
+    d->arch.phys_table = pagetable_null();
+
+    list_for_each_safe(entry, n, &d->arch.shadow.p2m_inuse)
+    {
+        pg = list_entry(entry, struct page_info, list);
+        list_del(entry);
+        /* Should have just the one ref we gave it in alloc_p2m_page() */
+        if ( (pg->count_info & PGC_SH_count_mask) != 1 )
+        {
+            SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+                           pg->count_info, pg->u.inuse.type_info);
+        }
+        ASSERT(page_get_owner(pg) == d);
+        /* Free should not decrement domain's total allocation, since 
+         * these pages were allocated without an owner. */
+        page_set_owner(pg, NULL); 
+        free_domheap_pages(pg, 0);
+        d->arch.shadow.p2m_pages--;
+        perfc_decr(shadow_alloc_count);
+    }
+    list_for_each_safe(entry, n, &d->arch.shadow.p2m_freelist)
+    {
+        list_del(entry);
+        pg = list_entry(entry, struct page_info, list);
+        ASSERT(page_get_owner(pg) == d);
+        /* Free should not decrement domain's total allocation. */
+        page_set_owner(pg, NULL); 
+        free_domheap_pages(pg, 0);
+        d->arch.shadow.p2m_pages--;
+        perfc_decr(shadow_alloc_count);
+    }
+    ASSERT(d->arch.shadow.p2m_pages == 0);
+}
+
+/* Set the pool of shadow pages to the required number of pages.
+ * Input will be rounded up to at least shadow_min_acceptable_pages(),
+ * plus space for the p2m table.
+ * Returns 0 for success, non-zero for failure. */
+static unsigned int set_sh_allocation(struct domain *d, 
+                                       unsigned int pages,
+                                       int *preempted)
+{
+    struct page_info *pg;
+    unsigned int lower_bound;
+    int j;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    /* Don't allocate less than the minimum acceptable, plus one page per
+     * megabyte of RAM (for the p2m table) */
+    lower_bound = shadow_min_acceptable_pages(d) + (d->tot_pages / 256);
+    if ( pages > 0 && pages < lower_bound )
+        pages = lower_bound;
+    /* Round up to largest block size */
+    pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1);
+
+    SHADOW_PRINTK("current %i target %i\n", 
+                   d->arch.shadow.total_pages, pages);
+
+    while ( d->arch.shadow.total_pages != pages ) 
+    {
+        if ( d->arch.shadow.total_pages < pages ) 
+        {
+            /* Need to allocate more memory from domheap */
+            pg = alloc_domheap_pages(NULL, SHADOW_MAX_ORDER, 0); 
+            if ( pg == NULL ) 
+            { 
+                SHADOW_PRINTK("failed to allocate shadow pages.\n");
+                return -ENOMEM;
+            }
+            d->arch.shadow.free_pages += 1<<SHADOW_MAX_ORDER;
+            d->arch.shadow.total_pages += 1<<SHADOW_MAX_ORDER;
+            for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) 
+            {
+                pg[j].u.inuse.type_info = 0;  /* Free page */
+                pg[j].tlbflush_timestamp = 0; /* Not in any TLB */
+            }
+            SH_SET_PFN_ORDER(pg, SHADOW_MAX_ORDER);
+            list_add_tail(&pg->list, 
+                          &d->arch.shadow.freelists[SHADOW_MAX_ORDER]);
+        } 
+        else if ( d->arch.shadow.total_pages > pages ) 
+        {
+            /* Need to return memory to domheap */
+            shadow_prealloc(d, SHADOW_MAX_ORDER);
+            ASSERT(!list_empty(&d->arch.shadow.freelists[SHADOW_MAX_ORDER]));
+            pg = list_entry(d->arch.shadow.freelists[SHADOW_MAX_ORDER].next, 
+                            struct page_info, list);
+            list_del(&pg->list);
+            d->arch.shadow.free_pages -= 1<<SHADOW_MAX_ORDER;
+            d->arch.shadow.total_pages -= 1<<SHADOW_MAX_ORDER;
+            free_domheap_pages(pg, SHADOW_MAX_ORDER);
+        }
+
+        /* Check to see if we need to yield and try again */
+        if ( preempted && hypercall_preempt_check() )
+        {
+            *preempted = 1;
+            return 0;
+        }
+    }
+
+    return 0;
+}
+
+unsigned int shadow_set_allocation(struct domain *d, 
+                                    unsigned int megabytes,
+                                    int *preempted)
+/* Hypercall interface to set the shadow memory allocation */
+{
+    unsigned int rv;
+    shadow_lock(d);
+    rv = set_sh_allocation(d, megabytes << (20 - PAGE_SHIFT), preempted); 
+    SHADOW_PRINTK("dom %u allocation now %u pages (%u MB)\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages,
+                   shadow_get_allocation(d));
+    shadow_unlock(d);
+    return rv;
+}
+
+/**************************************************************************/
+/* Hash table for storing the guest->shadow mappings */
+
+/* Hash function that takes a gfn or mfn, plus another byte of type info */
+typedef u32 key_t;
+static inline key_t sh_hash(unsigned long n, u8 t) 
+{
+    unsigned char *p = (unsigned char *)&n;
+    key_t k = t;
+    int i;
+    for ( i = 0; i < sizeof(n) ; i++ ) k = (u32)p[i] + (k<<6) + (k<<16) - k;
+    return k;
+}
+
+#if SHADOW_AUDIT & (SHADOW_AUDIT_HASH|SHADOW_AUDIT_HASH_FULL)
+
+/* Before we get to the mechanism, define a pair of audit functions
+ * that sanity-check the contents of the hash table. */
+static void sh_hash_audit_bucket(struct domain *d, int bucket)
+/* Audit one bucket of the hash table */
+{
+    struct shadow_hash_entry *e, *x;
+    struct page_info *pg;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+    e = &d->arch.shadow.hash_table[bucket];
+    if ( e->t == 0 ) return; /* Bucket is empty */ 
+    while ( e )
+    {
+        /* Empty link? */
+        BUG_ON( e->t == 0 ); 
+        /* Bogus type? */
+        BUG_ON( e->t > (PGC_SH_max_shadow >> PGC_SH_type_shift) );
+        /* Wrong bucket? */
+        BUG_ON( sh_hash(e->n, e->t) % SHADOW_HASH_BUCKETS != bucket ); 
+        /* Duplicate entry? */
+        for ( x = e->next; x; x = x->next )
+            BUG_ON( x->n == e->n && x->t == e->t );
+        /* Bogus MFN? */
+        BUG_ON( !valid_mfn(e->smfn) );
+        pg = mfn_to_page(e->smfn);
+        /* Not a shadow? */
+        BUG_ON( page_get_owner(pg) != 0 );
+        /* Wrong kind of shadow? */
+        BUG_ON( (pg->count_info & PGC_SH_type_mask) >> PGC_SH_type_shift 
+                != e->t ); 
+        /* Bad backlink? */
+        BUG_ON( pg->u.inuse.type_info != e->n );
+        if ( e->t != (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+             && e->t != (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+             && e->t != (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift) )
+        {
+            /* Bad shadow flags on guest page? */
+            BUG_ON( !(mfn_to_page(_mfn(e->n))->shadow_flags & (1<<e->t)) );
+        }
+        /* That entry was OK; on we go */
+        e = e->next;
+    }
+}
+
+#else
+#define sh_hash_audit_bucket(_d, _b)
+#endif /* Hashtable bucket audit */
+
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_HASH_FULL
+
+static void sh_hash_audit(struct domain *d)
+/* Full audit: audit every bucket in the table */
+{
+    int i;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        sh_hash_audit_bucket(d, i);
+    }
+}
+
+#else
+#define sh_hash_audit(_d)
+#endif /* Hashtable bucket audit */
+
+/* Memory management interface for bucket allocation.
+ * These ought to come out of shadow memory, but at least on 32-bit
+ * machines we are forced to allocate them from xenheap so that we can
+ * address them. */
+static struct shadow_hash_entry *sh_alloc_hash_entry(struct domain *d)
+{
+    struct shadow_hash_entry *extra, *x;
+    int i;
+
+    /* We need to allocate a new node. Ensure the free list is not empty. 
+     * Allocate new entries in units the same size as the original table. */
+    if ( unlikely(d->arch.shadow.hash_freelist == NULL) )
+    {
+        size_t sz = sizeof(void *) + (SHADOW_HASH_BUCKETS * sizeof(*x));
+        extra = xmalloc_bytes(sz);
+
+        if ( extra == NULL )
+        {
+            /* No memory left! */
+            SHADOW_ERROR("xmalloc() failed when allocating hash buckets.\n");
+            domain_crash_synchronous();
+        }
+        memset(extra, 0, sz);
+
+        /* Record the allocation block so it can be correctly freed later. */
+        *((struct shadow_hash_entry **)&extra[SHADOW_HASH_BUCKETS]) = 
+            d->arch.shadow.hash_allocations;
+        d->arch.shadow.hash_allocations = &extra[0];
+
+        /* Thread a free chain through the newly-allocated nodes. */
+        for ( i = 0; i < (SHADOW_HASH_BUCKETS - 1); i++ )
+            extra[i].next = &extra[i+1];
+        extra[i].next = NULL;
+
+        /* Add the new nodes to the free list. */
+        d->arch.shadow.hash_freelist = &extra[0];
+    }
+
+    /* Allocate a new node from the free list. */
+    x = d->arch.shadow.hash_freelist;
+    d->arch.shadow.hash_freelist = x->next;
+    return x;
+}
+
+static void sh_free_hash_entry(struct domain *d, struct shadow_hash_entry *e)
+{
+    /* Mark the bucket as empty and return it to the free list */
+    e->t = 0; 
+    e->next = d->arch.shadow.hash_freelist;
+    d->arch.shadow.hash_freelist = e;
+}
+
+
+/* Allocate and initialise the table itself.  
+ * Returns 0 for success, 1 for error. */
+static int shadow_hash_alloc(struct domain *d)
+{
+    struct shadow_hash_entry *table;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(!d->arch.shadow.hash_table);
+
+    table = xmalloc_array(struct shadow_hash_entry, SHADOW_HASH_BUCKETS);
+    if ( !table ) return 1;
+    memset(table, 0, 
+           SHADOW_HASH_BUCKETS * sizeof (struct shadow_hash_entry));
+    d->arch.shadow.hash_table = table;
+    return 0;
+}
+
+/* Tear down the hash table and return all memory to Xen.
+ * This function does not care whether the table is populated. */
+static void shadow_hash_teardown(struct domain *d)
+{
+    struct shadow_hash_entry *a, *n;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+
+    /* Return the table itself */
+    xfree(d->arch.shadow.hash_table);
+    d->arch.shadow.hash_table = NULL;
+
+    /* Return any extra allocations */
+    a = d->arch.shadow.hash_allocations;
+    while ( a ) 
+    {
+        /* We stored a linked-list pointer at the end of each allocation */
+        n = *((struct shadow_hash_entry **)(&a[SHADOW_HASH_BUCKETS]));
+        xfree(a);
+        a = n;
+    }
+    d->arch.shadow.hash_allocations = NULL;
+    d->arch.shadow.hash_freelist = NULL;
+}
+
+
+mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, u8 t)
+/* Find an entry in the hash table.  Returns the MFN of the shadow,
+ * or INVALID_MFN if it doesn't exist */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *p, *x, *head;
+    key_t key;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_lookups);
+    key = sh_hash(n, t);
+
+    x = head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+    p = NULL;
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    do
+    {
+        ASSERT(x->t || ((x == head) && (x->next == NULL)));
+
+        if ( x->n == n && x->t == t )
+        {
+            /* Pull-to-front if 'x' isn't already the head item */
+            if ( unlikely(x != head) )
+            {
+                if ( unlikely(d->arch.shadow.hash_walking != 0) )
+                    /* Can't reorder: someone is walking the hash chains */
+                    return x->smfn;
+                else 
+                {
+                    /* Delete 'x' from list and reinsert after head. */
+                    p->next = x->next;
+                    x->next = head->next;
+                    head->next = x;
+                    
+                    /* Swap 'x' contents with head contents. */
+                    SWAP(head->n, x->n);
+                    SWAP(head->t, x->t);
+                    SWAP(head->smfn, x->smfn);
+                }
+            }
+            else
+            {
+                perfc_incrc(shadow_hash_lookup_head);
+            }
+            return head->smfn;
+        }
+
+        p = x;
+        x = x->next;
+    }
+    while ( x != NULL );
+
+    perfc_incrc(shadow_hash_lookup_miss);
+    return _mfn(INVALID_MFN);
+}
+
+void shadow_hash_insert(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
+/* Put a mapping (n,t)->smfn into the hash table */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *x, *head;
+    key_t key;
+    
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_inserts);
+    key = sh_hash(n, t);
+
+    head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    /* If the bucket is empty then insert the new page as the head item. */
+    if ( head->t == 0 )
+    {
+        head->n = n;
+        head->t = t;
+        head->smfn = smfn;
+        ASSERT(head->next == NULL);
+    }
+    else 
+    {
+        /* Insert a new entry directly after the head item. */
+        x = sh_alloc_hash_entry(d);
+        x->n = n; 
+        x->t = t;
+        x->smfn = smfn;
+        x->next = head->next;
+        head->next = x;
+    }
+    
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+}
+
+void shadow_hash_delete(struct vcpu *v, unsigned long n, u8 t, mfn_t smfn)
+/* Excise the mapping (n,t)->smfn from the hash table */
+{
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *p, *x, *head;
+    key_t key;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_table);
+    ASSERT(t);
+
+    sh_hash_audit(d);
+
+    perfc_incrc(shadow_hash_deletes);
+    key = sh_hash(n, t);
+
+    head = &d->arch.shadow.hash_table[key % SHADOW_HASH_BUCKETS];
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+
+    /* Match on head item? */
+    if ( head->n == n && head->t == t )
+    {
+        if ( (x = head->next) != NULL )
+        {
+            /* Overwrite head with contents of following node. */
+            head->n = x->n;
+            head->t = x->t;
+            head->smfn = x->smfn;
+
+            /* Delete following node. */
+            head->next = x->next;
+            sh_free_hash_entry(d, x);
+        }
+        else
+        {
+            /* This bucket is now empty. Initialise the head node. */
+            head->t = 0;
+        }
+    }
+    else 
+    {
+        /* Not at the head; need to walk the chain */
+        p = head;
+        x = head->next; 
+        
+        while(1)
+        {
+            ASSERT(x); /* We can't have hit the end, since our target is
+                        * still in the chain somehwere... */
+            if ( x->n == n && x->t == t )
+            {
+                /* Delete matching node. */
+                p->next = x->next;
+                sh_free_hash_entry(d, x);
+                break;
+            }
+            p = x;
+            x = x->next;
+        }
+    }
+
+    sh_hash_audit_bucket(d, key % SHADOW_HASH_BUCKETS);
+}
+
+typedef int (*hash_callback_t)(struct vcpu *v, mfn_t smfn, mfn_t other_mfn);
+
+static void hash_foreach(struct vcpu *v, 
+                         unsigned int callback_mask, 
+                         hash_callback_t callbacks[], 
+                         mfn_t callback_mfn)
+/* Walk the hash table looking at the types of the entries and 
+ * calling the appropriate callback function for each entry. 
+ * The mask determines which shadow types we call back for, and the array
+ * of callbacks tells us which function to call.
+ * Any callback may return non-zero to let us skip the rest of the scan. 
+ *
+ * WARNING: Callbacks MUST NOT add or remove hash entries unless they 
+ * then return non-zero to terminate the scan. */
+{
+    int i, done = 0;
+    struct domain *d = v->domain;
+    struct shadow_hash_entry *x;
+
+    /* Say we're here, to stop hash-lookups reordering the chains */
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d->arch.shadow.hash_walking == 0);
+    d->arch.shadow.hash_walking = 1;
+
+    callback_mask &= ~1; /* Never attempt to call back on empty buckets */
+    for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 
+    {
+        /* WARNING: This is not safe against changes to the hash table.
+         * The callback *must* return non-zero if it has inserted or
+         * deleted anything from the hash (lookups are OK, though). */
+        for ( x = &d->arch.shadow.hash_table[i]; x; x = x->next )
+        {
+            if ( callback_mask & (1 << x->t) ) 
+            {
+                ASSERT(x->t <= 15);
+                ASSERT(callbacks[x->t] != NULL);
+                if ( (done = callbacks[x->t](v, x->smfn, callback_mfn)) != 0 )
+                    break;
+            }
+        }
+        if ( done ) break; 
+    }
+    d->arch.shadow.hash_walking = 0; 
+}
+
+
+/**************************************************************************/
+/* Destroy a shadow page: simple dispatcher to call the per-type destructor
+ * which will decrement refcounts appropriately and return memory to the 
+ * free pool. */
+
+void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    u32 t = pg->count_info & PGC_SH_type_mask;
+
+
+    SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
+
+    /* Double-check, if we can, that the shadowed page belongs to this
+     * domain, (by following the back-pointer). */
+    ASSERT(t == PGC_SH_fl1_32_shadow  ||  
+           t == PGC_SH_fl1_pae_shadow ||  
+           t == PGC_SH_fl1_64_shadow  || 
+           t == PGC_SH_monitor_table  || 
+           (page_get_owner(mfn_to_page(_mfn(pg->u.inuse.type_info))) 
+            == v->domain)); 
+
+    /* The down-shifts here are so that the switch statement is on nice
+     * small numbers that the compiler will enjoy */
+    switch ( t >> PGC_SH_type_shift )
+    {
+#if CONFIG_PAGING_LEVELS == 2
+    case PGC_SH_l1_32_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 2, 2)(v, smfn); 
+        break;
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 2, 2)(v, smfn);
+        break;
+#else /* PAE or 64bit */
+    case PGC_SH_l1_32_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 2)(v, smfn);
+        break;
+    case PGC_SH_l2_32_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 2)(v, smfn);
+        break;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+    case PGC_SH_l1_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 3, 3)(v, smfn);
+        break;
+    case PGC_SH_l2_pae_shadow >> PGC_SH_type_shift:
+    case PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 3, 3)(v, smfn);
+        break;
+    case PGC_SH_l3_pae_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 3, 3)(v, smfn);
+        break;
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l1_64_shadow >> PGC_SH_type_shift:
+    case PGC_SH_fl1_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l1_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l2_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l2_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l3_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l3_shadow, 4, 4)(v, smfn);
+        break;
+    case PGC_SH_l4_64_shadow >> PGC_SH_type_shift:
+        SHADOW_INTERNAL_NAME(sh_destroy_l4_shadow, 4, 4)(v, smfn);
+        break;
+#endif
+    default:
+        SHADOW_PRINTK("tried to destroy shadow of bad type %08lx\n", 
+                       (unsigned long)t);
+        BUG();
+    }    
+}
+
+/**************************************************************************/
+/* Remove all writeable mappings of a guest frame from the shadow tables 
+ * Returns non-zero if we need to flush TLBs. 
+ * level and fault_addr desribe how we found this to be a pagetable;
+ * level==0 means we have some other reason for revoking write access.*/
+
+int shadow_remove_write_access(struct vcpu *v, mfn_t gmfn, 
+                                unsigned int level,
+                                unsigned long fault_addr)
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,2,2), /* fl1_32  */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,2), /* fl1_32  */
+#endif
+        NULL, /* l2_32   */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,3,3), /* fl1_pae */
+#else 
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#endif
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_write_access,4,4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    static unsigned int callback_mask = 
+          1 << (PGC_SH_l1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_64_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift)
+        ;
+    struct page_info *pg = mfn_to_page(gmfn);
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    /* Only remove writable mappings if we are doing shadow refcounts.
+     * In guest refcounting, we trust Xen to already be restricting
+     * all the writes to the guest page tables, so we do not need to
+     * do more. */
+    if ( !shadow_mode_refcounts(v->domain) )
+        return 0;
+
+    /* Early exit if it's already a pagetable, or otherwise not writeable */
+    if ( sh_mfn_is_a_page_table(gmfn) 
+         || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
+        return 0;
+
+    perfc_incrc(shadow_writeable);
+
+    /* If this isn't a "normal" writeable page, the domain is trying to 
+     * put pagetables in special memory of some kind.  We can't allow that. */
+    if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
+    {
+        SHADOW_ERROR("can't remove write access to mfn %lx, type_info is %" 
+                      PRtype_info "\n",
+                      mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
+        domain_crash(v->domain);
+    }
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+    if ( v == current && level != 0 )
+    {
+        unsigned long gfn;
+        /* Heuristic: there is likely to be only one writeable mapping,
+         * and that mapping is likely to be in the current pagetable,
+         * either in the guest's linear map (linux, windows) or in a
+         * magic slot used to map high memory regions (linux HIGHTPTE) */
+
+#define GUESS(_a, _h) do {                                              \
+            if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) )          \
+                perfc_incrc(shadow_writeable_h_ ## _h);                \
+            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
+                return 1;                                               \
+        } while (0)
+
+        
+        /* Linux lowmem: first 1GB is mapped 1-to-1 above 0xC0000000 */
+        if ( v == current 
+             && (gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x40000000 )
+            GUESS(0xC0000000 + (gfn << PAGE_SHIFT), 4);
+
+        if ( v->arch.shadow.mode->guest_levels == 2 )
+        {
+            if ( level == 1 )
+                /* 32bit non-PAE w2k3: linear map at 0xC0000000 */
+                GUESS(0xC0000000UL + (fault_addr >> 10), 1);
+        }
+#if CONFIG_PAGING_LEVELS >= 3
+        else if ( v->arch.shadow.mode->guest_levels == 3 )
+        {
+            /* 32bit PAE w2k3: linear map at 0xC0000000 */
+            switch ( level ) 
+            {
+            case 1: GUESS(0xC0000000UL + (fault_addr >> 9), 2); break;
+            case 2: GUESS(0xC0600000UL + (fault_addr >> 18), 2); break;
+            }
+        }
+#if CONFIG_PAGING_LEVELS >= 4
+        else if ( v->arch.shadow.mode->guest_levels == 4 )
+        {
+            /* 64bit w2k3: linear map at 0x0000070000000000 */
+            switch ( level ) 
+            {
+            case 1: GUESS(0x70000000000UL + (fault_addr >> 9), 3); break;
+            case 2: GUESS(0x70380000000UL + (fault_addr >> 18), 3); break;
+            case 3: GUESS(0x70381C00000UL + (fault_addr >> 27), 3); break;
+            }
+        }
+#endif /* CONFIG_PAGING_LEVELS >= 4 */
+#endif /* CONFIG_PAGING_LEVELS >= 3 */
+
+#undef GUESS
+
+    }
+#endif
+    
+    /* Brute-force search of all the shadows, by walking the hash */
+    perfc_incrc(shadow_writeable_bf);
+    hash_foreach(v, callback_mask, callbacks, gmfn);
+
+    /* If that didn't catch the mapping, something is very wrong */
+    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) != 0 )
+    {
+        SHADOW_ERROR("can't find all writeable mappings of mfn %lx: "
+                      "%lu left\n", mfn_x(gmfn),
+                      (mfn_to_page(gmfn)->u.inuse.type_info&PGT_count_mask));
+        domain_crash(v->domain);
+    }
+    
+    /* We killed at least one writeable mapping, so must flush TLBs. */
+    return 1;
+}
+
+
+
+/**************************************************************************/
+/* Remove all mappings of a guest frame from the shadow tables.
+ * Returns non-zero if we need to flush TLBs. */
+
+int shadow_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
+{
+    struct page_info *page = mfn_to_page(gmfn);
+    int expected_count;
+
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,2,2), /* fl1_32  */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,2), /* fl1_32  */
+#endif
+        NULL, /* l2_32   */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,3,3), /* fl1_pae */
+#else 
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#endif
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_all_mappings,4,4), /* fl1_64  */
+#else
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#endif
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    static unsigned int callback_mask = 
+          1 << (PGC_SH_l1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_32_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_pae_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_l1_64_shadow >> PGC_SH_type_shift)
+        | 1 << (PGC_SH_fl1_64_shadow >> PGC_SH_type_shift)
+        ;
+
+    perfc_incrc(shadow_mappings);
+    if ( (page->count_info & PGC_count_mask) == 0 )
+        return 0;
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    /* XXX TODO: 
+     * Heuristics for finding the (probably) single mapping of this gmfn */
+    
+    /* Brute-force search of all the shadows, by walking the hash */
+    perfc_incrc(shadow_mappings_bf);
+    hash_foreach(v, callback_mask, callbacks, gmfn);
+
+    /* If that didn't catch the mapping, something is very wrong */
+    expected_count = (page->count_info & PGC_allocated) ? 1 : 0;
+    if ( (page->count_info & PGC_count_mask) != expected_count )
+    {
+        /* Don't complain if we're in HVM and there's one extra mapping: 
+         * The qemu helper process has an untyped mapping of this dom's RAM */
+        if ( !(shadow_mode_external(v->domain)
+               && (page->count_info & PGC_count_mask) <= 2
+               && (page->u.inuse.type_info & PGT_count_mask) == 0) )
+        {
+            SHADOW_ERROR("can't find all mappings of mfn %lx: "
+                          "c=%08x t=%08lx\n", mfn_x(gmfn), 
+                          page->count_info, page->u.inuse.type_info);
+        }
+    }
+
+    /* We killed at least one mapping, so must flush TLBs. */
+    return 1;
+}
+
+
+/**************************************************************************/
+/* Remove all shadows of a guest frame from the shadow tables */
+
+static int sh_remove_shadow_via_pointer(struct vcpu *v, mfn_t smfn)
+/* Follow this shadow's up-pointer, if it has one, and remove the reference
+ * found there.  Returns 1 if that was the only reference to this shadow */
+{
+    struct page_info *pg = mfn_to_page(smfn);
+    mfn_t pmfn;
+    void *vaddr;
+    int rc;
+
+    ASSERT((pg->count_info & PGC_SH_type_mask) > 0);
+    ASSERT((pg->count_info & PGC_SH_type_mask) < PGC_SH_max_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l2_32_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l3_pae_shadow);
+    ASSERT((pg->count_info & PGC_SH_type_mask) != PGC_SH_l4_64_shadow);
+    
+    if (pg->up == 0) return 0;
+    pmfn = _mfn(pg->up >> PAGE_SHIFT);
+    ASSERT(valid_mfn(pmfn));
+    vaddr = sh_map_domain_page(pmfn);
+    ASSERT(vaddr);
+    vaddr += pg->up & (PAGE_SIZE-1);
+    ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
+    
+    /* Is this the only reference to this shadow? */
+    rc = ((pg->count_info & PGC_SH_count_mask) == 1) ? 1 : 0;
+
+    /* Blank the offending entry */
+    switch ((pg->count_info & PGC_SH_type_mask)) 
+    {
+    case PGC_SH_l1_32_shadow:
+    case PGC_SH_l2_32_shadow:
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,2,2)(v, vaddr, pmfn);
+#else
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,2)(v, vaddr, pmfn);
+#endif
+        break;
+#if CONFIG_PAGING_LEVELS >=3
+    case PGC_SH_l1_pae_shadow:
+    case PGC_SH_l2_pae_shadow:
+    case PGC_SH_l2h_pae_shadow:
+    case PGC_SH_l3_pae_shadow:
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,3,3)(v, vaddr, pmfn);
+        break;
+#if CONFIG_PAGING_LEVELS >= 4
+    case PGC_SH_l1_64_shadow:
+    case PGC_SH_l2_64_shadow:
+    case PGC_SH_l3_64_shadow:
+    case PGC_SH_l4_64_shadow:
+        SHADOW_INTERNAL_NAME(sh_clear_shadow_entry,4,4)(v, vaddr, pmfn);
+        break;
+#endif
+#endif
+    default: BUG(); /* Some wierd unknown shadow type */
+    }
+    
+    sh_unmap_domain_page(vaddr);
+    if ( rc )
+        perfc_incrc(shadow_up_pointer);
+    else
+        perfc_incrc(shadow_unshadow_bf);
+
+    return rc;
+}
+
+void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int all)
+/* Remove the shadows of this guest page.  
+ * If all != 0, find all shadows, if necessary by walking the tables.
+ * Otherwise, just try the (much faster) heuristics, which will remove 
+ * at most one reference to each shadow of the page. */
+{
+    struct page_info *pg;
+    mfn_t smfn;
+    u32 sh_flags;
+    unsigned char t;
+
+    /* Dispatch table for getting per-type functions: each level must
+     * be called with the function to remove a lower-level shadow. */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+        NULL, /* l1_32   */
+        NULL, /* fl1_32  */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,2,2), /* l2_32   */
+#else 
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,2), /* l2_32   */
+#endif
+        NULL, /* l1_pae  */
+        NULL, /* fl1_pae */
+#if CONFIG_PAGING_LEVELS >= 3
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2_pae  */
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,3,3), /* l2h_pae */
+        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,3,3), /* l3_pae  */
+#else 
+        NULL, /* l2_pae  */
+        NULL, /* l2h_pae */
+        NULL, /* l3_pae  */
+#endif
+        NULL, /* l1_64   */
+        NULL, /* fl1_64  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_remove_l1_shadow,4,4), /* l2_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_l2_shadow,4,4), /* l3_64   */
+        SHADOW_INTERNAL_NAME(sh_remove_l3_shadow,4,4), /* l4_64   */
+#else
+        NULL, /* l2_64   */
+        NULL, /* l3_64   */
+        NULL, /* l4_64   */
+#endif
+        NULL, /* p2m     */
+        NULL  /* unused  */
+    };
+
+    /* Another lookup table, for choosing which mask to use */
+    static unsigned int masks[16] = {
+        0, /* none    */
+        1 << (PGC_SH_l2_32_shadow >> PGC_SH_type_shift), /* l1_32   */
+        0, /* fl1_32  */
+        0, /* l2_32   */
+        ((1 << (PGC_SH_l2h_pae_shadow >> PGC_SH_type_shift))
+         | (1 << (PGC_SH_l2_pae_shadow >> PGC_SH_type_shift))), /* l1_pae  */
+        0, /* fl1_pae */
+        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2_pae  */
+        1 << (PGC_SH_l3_pae_shadow >> PGC_SH_type_shift), /* l2h_pae  */
+        0, /* l3_pae  */
+        1 << (PGC_SH_l2_64_shadow >> PGC_SH_type_shift), /* l1_64   */
+        0, /* fl1_64  */
+        1 << (PGC_SH_l3_64_shadow >> PGC_SH_type_shift), /* l2_64   */
+        1 << (PGC_SH_l4_64_shadow >> PGC_SH_type_shift), /* l3_64   */
+        0, /* l4_64   */
+        0, /* p2m     */
+        0  /* unused  */
+    };
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    pg = mfn_to_page(gmfn);
+
+    /* Bale out now if the page is not shadowed */
+    if ( (pg->count_info & PGC_page_table) == 0 )
+        return;
+
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
+                   v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
+
+    /* Search for this shadow in all appropriate shadows */
+    perfc_incrc(shadow_unshadow);
+    sh_flags = pg->shadow_flags;
+
+    /* Lower-level shadows need to be excised from upper-level shadows.
+     * This call to hash_foreach() looks dangerous but is in fact OK: each
+     * call will remove at most one shadow, and terminate immediately when
+     * it does remove it, so we never walk the hash after doing a deletion.  */
+#define DO_UNSHADOW(_type) do {                                 \
+    t = (_type) >> PGC_SH_type_shift;                          \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);              \
+    if ( !sh_remove_shadow_via_pointer(v, smfn) && all )       \
+        hash_foreach(v, masks[t], callbacks, smfn);             \
+} while (0)
+
+    /* Top-level shadows need to be unpinned */
+#define DO_UNPIN(_type) do {                                             \
+    t = (_type) >> PGC_SH_type_shift;                                   \
+    smfn = shadow_hash_lookup(v, mfn_x(gmfn), t);                       \
+    if ( mfn_to_page(smfn)->count_info & PGC_SH_pinned )                \
+        sh_unpin(v, smfn);                                              \
+    if ( (_type) == PGC_SH_l3_pae_shadow )                              \
+        SHADOW_INTERNAL_NAME(sh_unpin_all_l3_subshadows,3,3)(v, smfn); \
+} while (0)
+
+    if ( sh_flags & SHF_L1_32 )   DO_UNSHADOW(PGC_SH_l1_32_shadow);
+    if ( sh_flags & SHF_L2_32 )   DO_UNPIN(PGC_SH_l2_32_shadow);
+#if CONFIG_PAGING_LEVELS >= 3
+    if ( sh_flags & SHF_L1_PAE )  DO_UNSHADOW(PGC_SH_l1_pae_shadow);
+    if ( sh_flags & SHF_L2_PAE )  DO_UNSHADOW(PGC_SH_l2_pae_shadow);
+    if ( sh_flags & SHF_L2H_PAE ) DO_UNSHADOW(PGC_SH_l2h_pae_shadow);
+    if ( sh_flags & SHF_L3_PAE )  DO_UNPIN(PGC_SH_l3_pae_shadow);
+#if CONFIG_PAGING_LEVELS >= 4
+    if ( sh_flags & SHF_L1_64 )   DO_UNSHADOW(PGC_SH_l1_64_shadow);
+    if ( sh_flags & SHF_L2_64 )   DO_UNSHADOW(PGC_SH_l2_64_shadow);
+    if ( sh_flags & SHF_L3_64 )   DO_UNSHADOW(PGC_SH_l3_64_shadow);
+    if ( sh_flags & SHF_L4_64 )   DO_UNPIN(PGC_SH_l4_64_shadow);
+#endif
+#endif
+
+#undef DO_UNSHADOW
+#undef DO_UNPIN
+
+
+#if CONFIG_PAGING_LEVELS > 2
+    /* We may have caused some PAE l3 entries to change: need to 
+     * fix up the copies of them in various places */
+    if ( sh_flags & (SHF_L2_PAE|SHF_L2H_PAE) )
+        sh_pae_recopy(v->domain);
+#endif
+
+    /* If that didn't catch the shadows, something is wrong */
+    if ( all && (pg->count_info & PGC_page_table) )
+    {
+        SHADOW_ERROR("can't find all shadows of mfn %05lx 
(shadow_flags=%08x)\n",
+                      mfn_x(gmfn), pg->shadow_flags);
+        domain_crash(v->domain);
+    }
+}
+
+void
+shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn)
+/* Even harsher: this is a HVM page that we thing is no longer a pagetable.
+ * Unshadow it, and recursively unshadow pages that reference it. */
+{
+    shadow_remove_all_shadows(v, gmfn);
+    /* XXX TODO:
+     * Rework this hashtable walker to return a linked-list of all 
+     * the shadows it modified, then do breadth-first recursion 
+     * to find the way up to higher-level tables and unshadow them too. 
+     *
+     * The current code (just tearing down each page's shadows as we
+     * detect that it is not a pagetable) is correct, but very slow. 
+     * It means extra emulated writes and slows down removal of mappings. */
+}
+
+/**************************************************************************/
+
+void sh_update_paging_modes(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct shadow_paging_mode *old_mode = v->arch.shadow.mode;
+    mfn_t old_guest_table;
+
+    ASSERT(shadow_lock_is_acquired(d));
+
+    // Valid transitions handled by this function:
+    // - For PV guests:
+    //     - after a shadow mode has been changed
+    // - For HVM guests:
+    //     - after a shadow mode has been changed
+    //     - changes in CR0.PG, CR4.PAE, CR4.PSE, or CR4.PGE
+    //
+
+    // Avoid determining the current shadow mode for uninitialized CPUs, as
+    // we can not yet determine whether it is an HVM or PV domain.
+    //
+    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+    {
+        printk("%s: postponing determination of shadow mode\n", __func__);
+        return;
+    }
+
+    // First, tear down any old shadow tables held by this vcpu.
+    //
+    shadow_detach_old_tables(v);
+
+    if ( !hvm_guest(v) )
+    {
+        ///
+        /// PV guest
+        ///
+#if CONFIG_PAGING_LEVELS == 4
+        if ( pv_32bit_guest(v) )
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,3);
+        else
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4);
+#elif CONFIG_PAGING_LEVELS == 3
+        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#elif CONFIG_PAGING_LEVELS == 2
+        v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
+#else
+#error unexpected paging mode
+#endif
+    }
+    else
+    {
+        ///
+        /// HVM guest
+        ///
+        ASSERT(shadow_mode_translate(d));
+        ASSERT(shadow_mode_external(d));
+
+        v->arch.shadow.hvm_paging_enabled = !!hvm_paging_enabled(v);
+        if ( !v->arch.shadow.hvm_paging_enabled )
+        {
+            
+            /* Set v->arch.guest_table to use the p2m map, and choose
+             * the appropriate shadow mode */
+            old_guest_table = pagetable_get_mfn(v->arch.guest_table);
+#if CONFIG_PAGING_LEVELS == 2
+            v->arch.guest_table =
+                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2);
+#elif CONFIG_PAGING_LEVELS == 3 
+            v->arch.guest_table =
+                pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#else /* CONFIG_PAGING_LEVELS == 4 */
+            { 
+                l4_pgentry_t *l4e; 
+                /* Use the start of the first l3 table as a PAE l3 */
+                ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+                l4e = 
sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+                ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+                v->arch.guest_table =
+                    pagetable_from_pfn(l4e_get_pfn(l4e[0]));
+                sh_unmap_domain_page(l4e);
+            }
+            v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3);
+#endif
+            /* Fix up refcounts on guest_table */
+            get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d);
+            if ( mfn_x(old_guest_table) != 0 )
+                put_page(mfn_to_page(old_guest_table));
+        }
+        else
+        {
+#ifdef __x86_64__
+            if ( hvm_long_mode_enabled(v) )
+            {
+                // long mode guest...
+                v->arch.shadow.mode =
+                    &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4);
+            }
+            else
+#endif
+                if ( hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PAE )
+                {
+#if CONFIG_PAGING_LEVELS >= 3
+                    // 32-bit PAE mode guest...
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3);
+#else
+                    SHADOW_ERROR("PAE not supported in 32-bit Xen\n");
+                    domain_crash(d);
+                    return;
+#endif
+                }
+                else
+                {
+                    // 32-bit 2 level guest...
+#if CONFIG_PAGING_LEVELS >= 3
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2);
+#else
+                    v->arch.shadow.mode =
+                        &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2);
+#endif
+                }
+        }
+
+        if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
+        {
+            mfn_t mmfn = shadow_make_monitor_table(v);
+            v->arch.monitor_table = pagetable_from_mfn(mmfn);
+            v->arch.monitor_vtable = sh_map_domain_page(mmfn);
+        } 
+
+        if ( v->arch.shadow.mode != old_mode )
+        {
+            SHADOW_PRINTK("new paging mode: d=%u v=%u g=%u s=%u "
+                           "(was g=%u s=%u)\n",
+                           d->domain_id, v->vcpu_id, 
+                           v->arch.shadow.mode->guest_levels,
+                           v->arch.shadow.mode->shadow_levels,
+                           old_mode ? old_mode->guest_levels : 0,
+                           old_mode ? old_mode->shadow_levels : 0);
+            if ( old_mode &&
+                 (v->arch.shadow.mode->shadow_levels !=
+                  old_mode->shadow_levels) )
+            {
+                /* Need to make a new monitor table for the new mode */
+                mfn_t new_mfn, old_mfn;
+
+                if ( v != current ) 
+                {
+                    SHADOW_ERROR("Some third party (d=%u v=%u) is changing "
+                                  "this HVM vcpu's (d=%u v=%u) paging mode!\n",
+                                  current->domain->domain_id, current->vcpu_id,
+                                  v->domain->domain_id, v->vcpu_id);
+                    domain_crash(v->domain);
+                    return;
+                }
+
+                sh_unmap_domain_page(v->arch.monitor_vtable);
+                old_mfn = pagetable_get_mfn(v->arch.monitor_table);
+                v->arch.monitor_table = pagetable_null();
+                new_mfn = v->arch.shadow.mode->make_monitor_table(v);          
  
+                v->arch.monitor_table = pagetable_from_mfn(new_mfn);
+                v->arch.monitor_vtable = sh_map_domain_page(new_mfn);
+                SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n",
+                               mfn_x(new_mfn));
+
+                /* Don't be running on the old monitor table when we 
+                 * pull it down!  Switch CR3, and warn the HVM code that
+                 * its host cr3 has changed. */
+                make_cr3(v, mfn_x(new_mfn));
+                write_ptbase(v);
+                hvm_update_host_cr3(v);
+                old_mode->destroy_monitor_table(v, old_mfn);
+            }
+        }
+
+        // XXX -- Need to deal with changes in CR4.PSE and CR4.PGE.
+        //        These are HARD: think about the case where two CPU's have
+        //        different values for CR4.PSE and CR4.PGE at the same time.
+        //        This *does* happen, at least for CR4.PGE...
+    }
+
+    v->arch.shadow.mode->update_cr3(v);
+}
+
+/**************************************************************************/
+/* Turning on and off shadow features */
+
+static void sh_new_mode(struct domain *d, u32 new_mode)
+/* Inform all the vcpus that the shadow mode has been changed */
+{
+    struct vcpu *v;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(d != current->domain);
+    d->arch.shadow.mode = new_mode;
+    if ( new_mode & SHM2_translate ) 
+        shadow_audit_p2m(d);
+    for_each_vcpu(d, v)
+        sh_update_paging_modes(v);
+}
+
+static int shadow_enable(struct domain *d, u32 mode)
+/* Turn on "permanent" shadow features: external, translate, refcount.
+ * Can only be called once on a domain, and these features cannot be
+ * disabled. 
+ * Returns 0 for success, -errno for failure. */
+{    
+    unsigned int old_pages;
+    int rv = 0;
+
+    mode |= SHM2_enable;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    /* Sanity check the arguments */
+    if ( (d == current->domain) ||
+         shadow_mode_enabled(d) ||
+         ((mode & SHM2_external) && !(mode & SHM2_translate)) )
+    {
+        rv = -EINVAL;
+        goto out;
+    }
+
+    // XXX -- eventually would like to require that all memory be allocated
+    // *after* shadow_enabled() is called...  So here, we would test to make
+    // sure that d->page_list is empty.
+#if 0
+    spin_lock(&d->page_alloc_lock);
+    if ( !list_empty(&d->page_list) )
+    {
+        spin_unlock(&d->page_alloc_lock);
+        rv = -EINVAL;
+        goto out;
+    }
+    spin_unlock(&d->page_alloc_lock);
+#endif
+
+    /* Init the shadow memory allocation if the user hasn't done so */
+    old_pages = d->arch.shadow.total_pages;
+    if ( old_pages == 0 )
+        if ( set_sh_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */
+        {
+            set_sh_allocation(d, 0, NULL);
+            rv = -ENOMEM;
+            goto out;
+        }
+
+    /* Init the hash table */
+    if ( shadow_hash_alloc(d) != 0 )
+    {
+        set_sh_allocation(d, old_pages, NULL);            
+        rv = -ENOMEM;
+        goto out;
+    }
+
+    /* Init the P2M table */
+    if ( mode & SHM2_translate )
+        if ( !shadow_alloc_p2m_table(d) )
+        {
+            shadow_hash_teardown(d);
+            set_sh_allocation(d, old_pages, NULL);
+            shadow_p2m_teardown(d);
+            rv = -ENOMEM;
+            goto out;
+        }
+
+    /* Update the bits */
+    sh_new_mode(d, mode);
+    shadow_audit_p2m(d);
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return 0;
+}
+
+void shadow_teardown(struct domain *d)
+/* Destroy the shadow pagetables of this domain and free its shadow memory.
+ * Should only be called for dying domains. */
+{
+    struct vcpu *v;
+    mfn_t mfn;
+
+    ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
+    ASSERT(d != current->domain);
+
+    if ( !shadow_lock_is_acquired(d) )
+        shadow_lock(d); /* Keep various asserts happy */
+
+    if ( shadow_mode_enabled(d) )
+    {
+        /* Release the shadow and monitor tables held by each vcpu */
+        for_each_vcpu(d, v)
+        {
+            shadow_detach_old_tables(v);
+            if ( shadow_mode_external(d) )
+            {
+                mfn = pagetable_get_mfn(v->arch.monitor_table);
+                if ( valid_mfn(mfn) && (mfn_x(mfn) != 0) )
+                    shadow_destroy_monitor_table(v, mfn);
+                v->arch.monitor_table = pagetable_null();
+            }
+        }
+    }
+
+    if ( d->arch.shadow.total_pages != 0 )
+    {
+        SHADOW_PRINTK("teardown of domain %u starts."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        /* Destroy all the shadows and release memory to domheap */
+        set_sh_allocation(d, 0, NULL);
+        /* Release the hash table back to xenheap */
+        if (d->arch.shadow.hash_table) 
+            shadow_hash_teardown(d);
+        /* Release the log-dirty bitmap of dirtied pages */
+        sh_free_log_dirty_bitmap(d);
+        /* Should not have any more memory held */
+        SHADOW_PRINTK("teardown done."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        ASSERT(d->arch.shadow.total_pages == 0);
+    }
+
+    /* We leave the "permanent" shadow modes enabled, but clear the
+     * log-dirty mode bit.  We don't want any more mark_dirty()
+     * calls now that we've torn down the bitmap */
+    d->arch.shadow.mode &= ~SHM2_log_dirty;
+
+    shadow_unlock(d);
+}
+
+void shadow_final_teardown(struct domain *d)
+/* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */
+{
+
+    SHADOW_PRINTK("dom %u final teardown starts."
+                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+
+    /* Double-check that the domain didn't have any shadow memory.  
+     * It is possible for a domain that never got domain_kill()ed
+     * to get here with its shadow allocation intact. */
+    if ( d->arch.shadow.total_pages != 0 )
+        shadow_teardown(d);
+
+    /* It is now safe to pull down the p2m map. */
+    if ( d->arch.shadow.p2m_pages != 0 )
+        shadow_p2m_teardown(d);
+
+    SHADOW_PRINTK("dom %u final teardown done."
+                   "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                   d->domain_id,
+                   d->arch.shadow.total_pages, 
+                   d->arch.shadow.free_pages, 
+                   d->arch.shadow.p2m_pages);
+}
+
+static int shadow_one_bit_enable(struct domain *d, u32 mode)
+/* Turn on a single shadow mode feature */
+{
+    ASSERT(shadow_lock_is_acquired(d));
+
+    /* Sanity check the call */
+    if ( d == current->domain || (d->arch.shadow.mode & mode) )
+    {
+        return -EINVAL;
+    }
+
+    if ( d->arch.shadow.mode == 0 )
+    {
+        /* Init the shadow memory allocation and the hash table */
+        if ( set_sh_allocation(d, 1, NULL) != 0 
+             || shadow_hash_alloc(d) != 0 )
+        {
+            set_sh_allocation(d, 0, NULL);
+            return -ENOMEM;
+        }
+    }
+
+    /* Update the bits */
+    sh_new_mode(d, d->arch.shadow.mode | mode);
+
+    return 0;
+}
+
+static int shadow_one_bit_disable(struct domain *d, u32 mode) 
+/* Turn off a single shadow mode feature */
+{
+    struct vcpu *v;
+    ASSERT(shadow_lock_is_acquired(d));
+
+    /* Sanity check the call */
+    if ( d == current->domain || !(d->arch.shadow.mode & mode) )
+    {
+        return -EINVAL;
+    }
+
+    /* Update the bits */
+    sh_new_mode(d, d->arch.shadow.mode & ~mode);
+    if ( d->arch.shadow.mode == 0 )
+    {
+        /* Get this domain off shadows */
+        SHADOW_PRINTK("un-shadowing of domain %u starts."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+        for_each_vcpu(d, v)
+        {
+            shadow_detach_old_tables(v);
+#if CONFIG_PAGING_LEVELS == 4
+            if ( !(v->arch.flags & TF_kernel_mode) )
+                make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user));
+            else
+#endif
+                make_cr3(v, pagetable_get_pfn(v->arch.guest_table));
+
+        }
+
+        /* Pull down the memory allocation */
+        if ( set_sh_allocation(d, 0, NULL) != 0 )
+        {
+            // XXX - How can this occur?
+            //       Seems like a bug to return an error now that we've
+            //       disabled the relevant shadow mode.
+            //
+            return -ENOMEM;
+        }
+        shadow_hash_teardown(d);
+        SHADOW_PRINTK("un-shadowing of domain %u done."
+                       "  Shadow pages total = %u, free = %u, p2m=%u\n",
+                       d->domain_id,
+                       d->arch.shadow.total_pages, 
+                       d->arch.shadow.free_pages, 
+                       d->arch.shadow.p2m_pages);
+    }
+
+    return 0;
+}
+
+/* Enable/disable ops for the "test" and "log-dirty" modes */
+int shadow_test_enable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    if ( shadow_mode_enabled(d) )
+    {
+        SHADOW_ERROR("Don't support enabling test mode"
+                      "on already shadowed doms\n");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = shadow_one_bit_enable(d, SHM2_enable);
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+int shadow_test_disable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+    ret = shadow_one_bit_disable(d, SHM2_enable);
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+static int
+sh_alloc_log_dirty_bitmap(struct domain *d)
+{
+    ASSERT(d->arch.shadow.dirty_bitmap == NULL);
+    d->arch.shadow.dirty_bitmap_size =
+        (d->shared_info->arch.max_pfn + (BITS_PER_LONG - 1)) &
+        ~(BITS_PER_LONG - 1);
+    d->arch.shadow.dirty_bitmap =
+        xmalloc_array(unsigned long,
+                      d->arch.shadow.dirty_bitmap_size / BITS_PER_LONG);
+    if ( d->arch.shadow.dirty_bitmap == NULL )
+    {
+        d->arch.shadow.dirty_bitmap_size = 0;
+        return -ENOMEM;
+    }
+    memset(d->arch.shadow.dirty_bitmap, 0, d->arch.shadow.dirty_bitmap_size/8);
+
+    return 0;
+}
+
+static void
+sh_free_log_dirty_bitmap(struct domain *d)
+{
+    d->arch.shadow.dirty_bitmap_size = 0;
+    if ( d->arch.shadow.dirty_bitmap )
+    {
+        xfree(d->arch.shadow.dirty_bitmap);
+        d->arch.shadow.dirty_bitmap = NULL;
+    }
+}
+
+static int shadow_log_dirty_enable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    if ( shadow_mode_log_dirty(d) )
+    {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if ( shadow_mode_enabled(d) )
+    {
+        SHADOW_ERROR("Don't (yet) support enabling log-dirty"
+                      "on already shadowed doms\n");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = sh_alloc_log_dirty_bitmap(d);
+    if ( ret != 0 )
+    {
+        sh_free_log_dirty_bitmap(d);
+        goto out;
+    }
+
+    ret = shadow_one_bit_enable(d, SHM2_log_dirty);
+    if ( ret != 0 )
+        sh_free_log_dirty_bitmap(d);
+
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return ret;
+}
+
+static int shadow_log_dirty_disable(struct domain *d)
+{
+    int ret;
+
+    domain_pause(d);
+    shadow_lock(d);
+    ret = shadow_one_bit_disable(d, SHM2_log_dirty);
+    if ( !shadow_mode_log_dirty(d) )
+        sh_free_log_dirty_bitmap(d);
+    shadow_unlock(d);
+    domain_unpause(d);
+
+    return ret;
+}
+
+/**************************************************************************/
+/* P2M map manipulations */
+
+static void
+sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
+{
+    struct vcpu *v;
+
+    if ( !shadow_mode_translate(d) )
+        return;
+
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+
+
+    SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+    ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn);
+    //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn);
+
+    shadow_remove_all_shadows_and_parents(v, _mfn(mfn));
+    if ( shadow_remove_all_mappings(v, _mfn(mfn)) )
+        flush_tlb_mask(d->domain_dirty_cpumask);
+    shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
+    set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+}
+
+void
+shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn,
+                                  unsigned long mfn)
+{
+    shadow_lock(d);
+    shadow_audit_p2m(d);
+    sh_p2m_remove_page(d, gfn, mfn);
+    shadow_audit_p2m(d);
+    shadow_unlock(d);    
+}
+
+void
+shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn,
+                               unsigned long mfn)
+{
+    struct vcpu *v;
+    unsigned long ogfn;
+    mfn_t omfn;
+
+    if ( !shadow_mode_translate(d) )
+        return;
+
+    v = current;
+    if ( v->domain != d )
+        v = d->vcpu[0];
+
+    shadow_lock(d);
+    shadow_audit_p2m(d);
+
+    SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+    omfn = sh_gfn_to_mfn(d, gfn);
+    if ( valid_mfn(omfn) )
+    {
+        /* Get rid of the old mapping, especially any shadows */
+        shadow_remove_all_shadows_and_parents(v, omfn);
+        if ( shadow_remove_all_mappings(v, omfn) )
+            flush_tlb_mask(d->domain_dirty_cpumask);
+        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+    }        
+
+    ogfn = sh_mfn_to_gfn(d, _mfn(mfn));
+    if (
+#ifdef __x86_64__
+        (ogfn != 0x5555555555555555L)
+#else
+        (ogfn != 0x55555555L)
+#endif
+        && (ogfn != INVALID_M2P_ENTRY)
+        && (ogfn != gfn) )
+    {
+        /* This machine frame is already mapped at another physical address */
+        SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
+                       mfn, ogfn, gfn);
+        if ( valid_mfn(omfn = sh_gfn_to_mfn(d, ogfn)) ) 
+        {
+            SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", 
+                           ogfn , mfn_x(omfn));
+            if ( mfn_x(omfn) == mfn ) 
+                sh_p2m_remove_page(d, ogfn, mfn);
+        }
+    }
+
+    shadow_set_p2m_entry(d, gfn, _mfn(mfn));
+    set_gpfn_from_mfn(mfn, gfn);
+    shadow_audit_p2m(d);
+    shadow_unlock(d);
+}
+
+/**************************************************************************/
+/* Log-dirty mode support */
+
+/* Convert a shadow to log-dirty mode. */
+void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn)
+{
+    BUG();
+}
+
+
+/* Read a domain's log-dirty bitmap and stats.  
+ * If the operation is a CLEAN, clear the bitmap and stats as well. */
+static int shadow_log_dirty_op(
+    struct domain *d, struct xen_domctl_shadow_op *sc)
+{
+    int i, rv = 0, clean = 0;
+
+    domain_pause(d);
+    shadow_lock(d);
+
+    clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
+
+    SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 
+                  (clean) ? "clean" : "peek",
+                  d->domain_id,
+                  d->arch.shadow.fault_count, 
+                  d->arch.shadow.dirty_count);
+
+    sc->stats.fault_count = d->arch.shadow.fault_count;
+    sc->stats.dirty_count = d->arch.shadow.dirty_count;    
+        
+    if ( clean ) 
+    {
+        struct list_head *l, *t;
+        struct page_info *pg;
+
+        /* Need to revoke write access to the domain's pages again. 
+         * In future, we'll have a less heavy-handed approach to this, 
+         * but for now, we just unshadow everything except Xen. */
+        list_for_each_safe(l, t, &d->arch.shadow.toplevel_shadows)
+        {
+            pg = list_entry(l, struct page_info, list);
+            shadow_unhook_mappings(d->vcpu[0], page_to_mfn(pg));
+        }
+
+        d->arch.shadow.fault_count = 0;
+        d->arch.shadow.dirty_count = 0;
+    }
+
+    if ( guest_handle_is_null(sc->dirty_bitmap) ||
+         (d->arch.shadow.dirty_bitmap == NULL) )
+    {
+        rv = -EINVAL;
+        goto out;
+    }
+ 
+    if ( sc->pages > d->arch.shadow.dirty_bitmap_size )
+        sc->pages = d->arch.shadow.dirty_bitmap_size; 
+
+#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
+    for ( i = 0; i < sc->pages; i += CHUNK )
+    {
+        int bytes = ((((sc->pages - i) > CHUNK) 
+                      ? CHUNK 
+                      : (sc->pages - i)) + 7) / 8;
+     
+        if ( copy_to_guest_offset(
+                 sc->dirty_bitmap, 
+                 i/(8*sizeof(unsigned long)),
+                 d->arch.shadow.dirty_bitmap + (i/(8*sizeof(unsigned long))),
+                 (bytes + sizeof(unsigned long) - 1) / sizeof(unsigned long)) )
+        {
+            rv = -EINVAL;
+            goto out;
+        }
+
+        if ( clean )
+            memset(d->arch.shadow.dirty_bitmap + (i/(8*sizeof(unsigned long))),
+                   0, bytes);
+    }
+#undef CHUNK
+
+ out:
+    shadow_unlock(d);
+    domain_unpause(d);
+    return 0;
+}
+
+
+/* Mark a page as dirty */
+void sh_do_mark_dirty(struct domain *d, mfn_t gmfn)
+{
+    unsigned long pfn;
+
+    ASSERT(shadow_lock_is_acquired(d));
+    ASSERT(shadow_mode_log_dirty(d));
+
+    if ( !valid_mfn(gmfn) )
+        return;
+
+    ASSERT(d->arch.shadow.dirty_bitmap != NULL);
+
+    /* We /really/ mean PFN here, even for non-translated guests. */
+    pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+
+    /*
+     * Values with the MSB set denote MFNs that aren't really part of the 
+     * domain's pseudo-physical memory map (e.g., the shared info frame).
+     * Nothing to do here...
+     */
+    if ( unlikely(!VALID_M2P(pfn)) )
+        return;
+
+    /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
+    if ( likely(pfn < d->arch.shadow.dirty_bitmap_size) ) 
+    { 
+        if ( !__test_and_set_bit(pfn, d->arch.shadow.dirty_bitmap) )
+        {
+            SHADOW_DEBUG(LOGDIRTY, 
+                          "marked mfn %" SH_PRI_mfn " (pfn=%lx), dom %d\n",
+                          mfn_x(gmfn), pfn, d->domain_id);
+            d->arch.shadow.dirty_count++;
+        }
+    }
+    else
+    {
+        SHADOW_PRINTK("mark_dirty OOR! "
+                       "mfn=%" SH_PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+                       "owner=%d c=%08x t=%" PRtype_info "\n",
+                       mfn_x(gmfn), 
+                       pfn, 
+                       d->arch.shadow.dirty_bitmap_size,
+                       d->domain_id,
+                       (page_get_owner(mfn_to_page(gmfn))
+                        ? page_get_owner(mfn_to_page(gmfn))->domain_id
+                        : -1),
+                       mfn_to_page(gmfn)->count_info, 
+                       mfn_to_page(gmfn)->u.inuse.type_info);
+    }
+}
+
+
+/**************************************************************************/
+/* Shadow-control XEN_DOMCTL dispatcher */
+
+int shadow_domctl(struct domain *d, 
+                   xen_domctl_shadow_op_t *sc,
+                   XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
+{
+    int rc, preempted = 0;
+
+    if ( unlikely(d == current->domain) )
+    {
+        DPRINTK("Don't try to do a shadow op on yourself!\n");
+        return -EINVAL;
+    }
+
+    switch ( sc->op )
+    {
+    case XEN_DOMCTL_SHADOW_OP_OFF:
+        if ( shadow_mode_log_dirty(d) )
+            if ( (rc = shadow_log_dirty_disable(d)) != 0 ) 
+                return rc;
+        if ( d->arch.shadow.mode & SHM2_enable )
+            if ( (rc = shadow_test_disable(d)) != 0 ) 
+                return rc;
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
+        return shadow_test_enable(d);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+        return shadow_log_dirty_enable(d);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
+        return shadow_enable(d, SHM2_refcounts|SHM2_translate);
+
+    case XEN_DOMCTL_SHADOW_OP_CLEAN:
+    case XEN_DOMCTL_SHADOW_OP_PEEK:
+        return shadow_log_dirty_op(d, sc);
+
+    case XEN_DOMCTL_SHADOW_OP_ENABLE:
+        if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
+            return shadow_log_dirty_enable(d);
+        return shadow_enable(d, sc->mode << SHM2_shift);
+
+    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
+        sc->mb = shadow_get_allocation(d);
+        return 0;
+
+    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+        rc = shadow_set_allocation(d, sc->mb, &preempted);
+        if ( preempted )
+            /* Not finished.  Set up to re-run the call. */
+            rc = hypercall_create_continuation(
+                __HYPERVISOR_domctl, "h", u_domctl);
+        else 
+            /* Finished.  Return the new allocation */
+            sc->mb = shadow_get_allocation(d);
+        return rc;
+
+    default:
+        SHADOW_ERROR("Bad shadow op %u\n", sc->op);
+        return -EINVAL;
+    }
+}
+
+
+/**************************************************************************/
+/* Auditing shadow tables */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
+
+void shadow_audit_tables(struct vcpu *v) 
+{
+    /* Dispatch table for getting per-type functions */
+    static hash_callback_t callbacks[16] = {
+        NULL, /* none    */
+#if CONFIG_PAGING_LEVELS == 2
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,2,2),  /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,2,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,2,2),  /* l2_32   */
+#else 
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,2),  /* l1_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,2), /* fl1_32  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,2),  /* l2_32   */
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,3,3),  /* l1_pae  */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,3,3), /* fl1_pae */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2_pae  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,3,3),  /* l2h_pae */
+        SHADOW_INTERNAL_NAME(sh_audit_l3_table,3,3),  /* l3_pae  */
+#if CONFIG_PAGING_LEVELS >= 4
+        SHADOW_INTERNAL_NAME(sh_audit_l1_table,4,4),  /* l1_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_fl1_table,4,4), /* fl1_64  */
+        SHADOW_INTERNAL_NAME(sh_audit_l2_table,4,4),  /* l2_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_l3_table,4,4),  /* l3_64   */
+        SHADOW_INTERNAL_NAME(sh_audit_l4_table,4,4),  /* l4_64   */
+#endif /* CONFIG_PAGING_LEVELS >= 4 */
+#endif /* CONFIG_PAGING_LEVELS > 2 */
+        NULL  /* All the rest */
+    };
+    unsigned int mask; 
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+    
+    if ( SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL )
+        mask = ~1; /* Audit every table in the system */
+    else 
+    {
+        /* Audit only the current mode's tables */
+        switch ( v->arch.shadow.mode->guest_levels )
+        {
+        case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break;
+        case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE
+                        |SHF_L2H_PAE|SHF_L3_PAE); break;
+        case 4: mask = (SHF_L1_64|SHF_FL1_64|SHF_L2_64  
+                        |SHF_L3_64|SHF_L4_64); break;
+        default: BUG();
+        }
+    }
+
+    hash_foreach(v, ~1, callbacks, _mfn(INVALID_MFN));
+}
+
+#endif /* Shadow audit */
+
+
+/**************************************************************************/
+/* Auditing p2m tables */
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_P2M
+
+void shadow_audit_p2m(struct domain *d)
+{
+    struct list_head *entry;
+    struct page_info *page;
+    struct domain *od;
+    unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
+    mfn_t p2mfn;
+    unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
+    int test_linear;
+    
+    if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) )
+        return;
+
+    //SHADOW_PRINTK("p2m audit starts\n");
+
+    test_linear = ( (d == current->domain) && current->arch.monitor_vtable );
+    if ( test_linear )
+        local_flush_tlb(); 
+
+    /* Audit part one: walk the domain's page allocation list, checking 
+     * the m2p entries. */
+    for ( entry = d->page_list.next;
+          entry != &d->page_list;
+          entry = entry->next )
+    {
+        page = list_entry(entry, struct page_info, list);
+        mfn = mfn_x(page_to_mfn(page));
+
+        // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 
+
+        od = page_get_owner(page);
+
+        if ( od != d ) 
+        {
+            SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
+                           mfn, od, (od?od->domain_id:-1), d, d->domain_id);
+            continue;
+        }
+
+        gfn = get_gpfn_from_mfn(mfn);
+        if ( gfn == INVALID_M2P_ENTRY ) 
+        {
+            orphans_i++;
+            //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
+            //               mfn); 
+            continue;
+        }
+
+        if ( gfn == 0x55555555 ) 
+        {
+            orphans_d++;
+            //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 
+            //               mfn); 
+            continue;
+        }
+
+        p2mfn = sh_gfn_to_mfn_foreign(d, gfn);
+        if ( mfn_x(p2mfn) != mfn )
+        {
+            mpbad++;
+            SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
+                           " (-> gfn %#lx)\n",
+                           mfn, gfn, mfn_x(p2mfn),
+                           (mfn_valid(p2mfn)
+                            ? get_gpfn_from_mfn(mfn_x(p2mfn))
+                            : -1u));
+            /* This m2p entry is stale: the domain has another frame in
+             * this physical slot.  No great disaster, but for neatness,
+             * blow away the m2p entry. */ 
+            set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+        }
+
+        if ( test_linear )
+        {
+            lp2mfn = get_mfn_from_gpfn(gfn);
+            if ( lp2mfn != mfn_x(p2mfn) )
+            {
+                SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
+                               "(!= mfn %#lx)\n", gfn, lp2mfn, p2mfn);
+            }
+        }
+
+        // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 
+        //                mfn, gfn, p2mfn, lp2mfn); 
+    }   
+
+    /* Audit part two: walk the domain's p2m table, checking the entries. */
+    if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
+    {
+        l2_pgentry_t *l2e;
+        l1_pgentry_t *l1e;
+        int i1, i2;
+        
+#if CONFIG_PAGING_LEVELS == 4
+        l4_pgentry_t *l4e;
+        l3_pgentry_t *l3e;
+        int i3, i4;
+        l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#elif CONFIG_PAGING_LEVELS == 3
+        l3_pgentry_t *l3e;
+        int i3;
+        l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#else /* CONFIG_PAGING_LEVELS == 2 */
+        l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+#endif
+
+        gfn = 0;
+#if CONFIG_PAGING_LEVELS >= 3
+#if CONFIG_PAGING_LEVELS >= 4
+        for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
+        {
+            if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
+            {
+                gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
+                continue;
+            }
+            l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4])));
+#endif /* now at levels 3 or 4... */
+            for ( i3 = 0; 
+                  i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 
+                  i3++ )
+            {
+                if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
+                {
+                    gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
+                    continue;
+                }
+                l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3])));
+#endif /* all levels... */
+                for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
+                {
+                    if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
+                    {
+                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
+                        continue;
+                    }
+                    l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2])));
+                    
+                    for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
+                    {
+                        if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+                            continue;
+                        mfn = l1e_get_pfn(l1e[i1]);
+                        ASSERT(valid_mfn(_mfn(mfn)));
+                        m2pfn = get_gpfn_from_mfn(mfn);
+                        if ( m2pfn != gfn )
+                        {
+                            pmbad++;
+                            SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
+                                           " -> gfn %#lx\n", gfn, mfn, m2pfn);
+                            BUG();
+                        }
+                    }
+                    sh_unmap_domain_page(l1e);
+                }
+#if CONFIG_PAGING_LEVELS >= 3
+                sh_unmap_domain_page(l2e);
+            }
+#if CONFIG_PAGING_LEVELS >= 4
+            sh_unmap_domain_page(l3e);
+        }
+#endif
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+        sh_unmap_domain_page(l4e);
+#elif CONFIG_PAGING_LEVELS == 3
+        sh_unmap_domain_page(l3e);
+#else /* CONFIG_PAGING_LEVELS == 2 */
+        sh_unmap_domain_page(l2e);
+#endif
+
+    }
+
+    //SHADOW_PRINTK("p2m audit complete\n");
+    //if ( orphans_i | orphans_d | mpbad | pmbad ) 
+    //    SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
+    //                   orphans_i + orphans_d, orphans_i, orphans_d,
+    if ( mpbad | pmbad ) 
+        SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
+                       pmbad, mpbad);
+}
+
+#endif /* p2m audit */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff -r 896fcdd49c7f -r 684fdcfb251a xen/arch/x86/mm/shadow/multi.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Mon Aug 28 16:26:37 2006 -0600
@@ -0,0 +1,4492 @@
+/******************************************************************************
+ * arch/x86/mm/shadow/multi.c
+ *
+ * Simple, mostly-synchronous shadow page tables. 
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+// DESIGN QUESTIONS:
+// Why use subshadows for PAE guests?
+// - reduces pressure in the hash table
+// - reduces shadow size (64-vs-4096 bytes of shadow for 32 bytes of guest L3)
+// - would need to find space in the page_info to store 7 more bits of
+//   backpointer
+// - independent shadows of 32 byte chunks makes it non-obvious how to quickly
+//   figure out when to demote the guest page from l3 status
+//
+// PAE Xen HVM guests are restricted to 8GB of pseudo-physical address space.
+// - Want to map the P2M table into the 16MB RO_MPT hole in Xen's address
+//   space for both PV and HVM guests.
+//
+
+#define SHADOW 1
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/shadow.h>
+#include <asm/flushtlb.h>
+#include <asm/hvm/hvm.h>
+#include "private.h"
+#include "types.h"
+
+/* The first cut: an absolutely synchronous, trap-and-emulate version,
+ * supporting only HVM guests (and so only "external" shadow mode). 
+ *
+ * THINGS TO DO LATER:
+ * 
+ * FIX GVA_TO_GPA
+ * The current interface returns an unsigned long, which is not big enough
+ * to hold a physical address in PAE.  Should return a gfn instead.
+ * 
+ * TEARDOWN HEURISTICS
+ * Also: have a heuristic for when to destroy a previous paging-mode's 
+ * shadows.  When a guest is done with its start-of-day 32-bit tables
+ * and reuses the memory we want to drop those shadows.  Start with 
+ * shadows in a page in two modes as a hint, but beware of clever tricks 
+ * like reusing a pagetable for both PAE and 64-bit during boot...
+ *
+ * PAE LINEAR MAPS
+ * Rework shadow_get_l*e() to have the option of using map_domain_page()
+ * instead of linear maps.  Add appropriate unmap_l*e calls in the users. 
+ * Then we can test the speed difference made by linear maps.  If the 
+ * map_domain_page() version is OK on PAE, we could maybe allow a lightweight 
+ * l3-and-l2h-only shadow mode for PAE PV guests that would allow them 
+ * to share l2h pages again. 
+ *
+ * PAE L3 COPYING
+ * In this code, we copy all 32 bytes of a PAE L3 every time we change an 
+ * entry in it, and every time we change CR3.  We copy it for the linear 
+ * mappings (ugh! PAE linear mappings) and we copy it to the low-memory
+ * buffer so it fits in CR3.  Maybe we can avoid some of this recopying 
+ * by using the shadow directly in some places. 
+ * Also, for SMP, need to actually respond to seeing shadow.pae_flip_pending.
+ *
+ * GUEST_WALK_TABLES TLB FLUSH COALESCE
+ * guest_walk_tables can do up to three remote TLB flushes as it walks to
+ * the first l1 of a new pagetable.  Should coalesce the flushes to the end, 
+ * and if we do flush, re-do the walk.  If anything has changed, then 
+ * pause all the other vcpus and do the walk *again*.
+ *
+ * WP DISABLED
+ * Consider how to implement having the WP bit of CR0 set to 0.  
+ * Since we need to be able to cause write faults to pagetables, this might
+ * end up looking like not having the (guest) pagetables present at all in 
+ * HVM guests...
+ *
+ * PSE disabled / PSE36
+ * We don't support any modes other than PSE enabled, PSE36 disabled.
+ * Neither of those would be hard to change, but we'd need to be able to 
+ * deal with shadows made in one mode and used in another.
+ */
+
+#define FETCH_TYPE_PREFETCH 1
+#define FETCH_TYPE_DEMAND   2
+#define FETCH_TYPE_WRITE    4
+typedef enum {
+    ft_prefetch     = FETCH_TYPE_PREFETCH,
+    ft_demand_read  = FETCH_TYPE_DEMAND,
+    ft_demand_write = FETCH_TYPE_DEMAND | FETCH_TYPE_WRITE,
+} fetch_type_t;
+
+#ifdef DEBUG_TRACE_DUMP
+static char *fetch_type_names[] = {
+    [ft_prefetch]     "prefetch",
+    [ft_demand_read]  "demand read",
+    [ft_demand_write] "demand write",
+};
+#endif
+
+/* XXX forward declarations */
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+static unsigned long hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, 
int clear_res);
+#endif
+static inline void sh_update_linear_entries(struct vcpu *v);
+
+/**************************************************************************/
+/* Hash table mapping from guest pagetables to shadows
+ *
+ * Normal case: maps the mfn of a guest page to the mfn of its shadow page.
+ * FL1's:       maps the *gfn* of the start of a superpage to the mfn of a
+ *              shadow L1 which maps its "splinters".
+ * PAE CR3s:    maps the 32-byte aligned, 32-bit CR3 value to the mfn of the
+ *              PAE L3 info page for that CR3 value.
+ */
+
+static inline mfn_t 
+get_fl1_shadow_status(struct vcpu *v, gfn_t gfn)
+/* Look for FL1 shadows in the hash table */
+{
+    mfn_t smfn = shadow_hash_lookup(v, gfn_x(gfn),
+                                     PGC_SH_fl1_shadow >> PGC_SH_type_shift);
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
+    {
+        struct page_info *page = mfn_to_page(smfn);
+        if ( !(page->count_info & PGC_SH_log_dirty) )
+            shadow_convert_to_log_dirty(v, smfn);
+    }
+
+    return smfn;
+}
+
+static inline mfn_t 
+get_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
+/* Look for shadows in the hash table */
+{
+    mfn_t smfn = shadow_hash_lookup(v, mfn_x(gmfn),
+                                     shadow_type >> PGC_SH_type_shift);
+    perfc_incrc(shadow_get_shadow_status);
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain) && valid_mfn(smfn)) )
+    {
+        struct page_info *page = mfn_to_page(smfn);
+        if ( !(page->count_info & PGC_SH_log_dirty) )
+            shadow_convert_to_log_dirty(v, smfn);
+    }
+
+    return smfn;
+}
+
+static inline void 
+set_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
+/* Put an FL1 shadow into the hash table */
+{
+    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
+                   gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
+
+    if ( unlikely(shadow_mode_log_dirty(v->domain)) )
+        // mark this shadow as a log dirty shadow...
+        set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+    else
+        clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+
+    shadow_hash_insert(v, gfn_x(gfn),
+                        PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
+}
+
+static inline void 
+set_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
+/* Put a shadow into the hash table */
+{
+    struct domain *d = v->domain;
+    int res;
+
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
+                   d->domain_id, v->vcpu_id, mfn_x(gmfn),
+                   shadow_type, mfn_x(smfn));
+
+    if ( unlikely(shadow_mode_log_dirty(d)) )
+        // mark this shadow as a log dirty shadow...
+        set_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+    else
+        clear_bit(_PGC_SH_log_dirty, &mfn_to_page(smfn)->count_info);
+
+    res = get_page(mfn_to_page(gmfn), d);
+    ASSERT(res == 1);
+
+    shadow_hash_insert(v, mfn_x(gmfn), shadow_type >> PGC_SH_type_shift,
+                        smfn);
+}
+
+static inline void 
+delete_fl1_shadow_status(struct vcpu *v, gfn_t gfn, mfn_t smfn)
+/* Remove a shadow from the hash table */
+{
+    SHADOW_PRINTK("gfn=%"SH_PRI_gfn", type=%08x, smfn=%05lx\n",
+                   gfn_x(gfn), PGC_SH_fl1_shadow, mfn_x(smfn));
+
+    shadow_hash_delete(v, gfn_x(gfn),
+                        PGC_SH_fl1_shadow >> PGC_SH_type_shift, smfn);
+}
+
+static inline void 
+delete_shadow_status(struct vcpu *v, mfn_t gmfn, u32 shadow_type, mfn_t smfn)
+/* Remove a shadow from the hash table */
+{
+    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, type=%08x, smfn=%05lx\n",
+                   v->domain->domain_id, v->vcpu_id,
+                   mfn_x(gmfn), shadow_type, mfn_x(smfn));
+    shadow_hash_delete(v, mfn_x(gmfn),
+                        shadow_type >> PGC_SH_type_shift, smfn);
+    put_page(mfn_to_page(gmfn));
+}
+
+/**************************************************************************/
+/* CPU feature support querying */
+
+static inline int
+guest_supports_superpages(struct vcpu *v)
+{
+    /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
+     * CR4.PSE is set or the guest is in PAE or long mode */
+    return (hvm_guest(v) && (GUEST_PAGING_LEVELS != 2 
+                             || (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE)));
+}
+
+static inline int
+guest_supports_nx(struct vcpu *v)
+{
+    if ( !hvm_guest(v) )
+        return cpu_has_nx;
+
+    // XXX - fix this!
+    return 1;
+}
+
+
+/**************************************************************************/
+/* Functions for walking the guest page tables */
+
+
+/* Walk the guest pagetables, filling the walk_t with what we see. 
+ * Takes an uninitialised walk_t.  The caller must call unmap_walk() 
+ * on the walk_t before discarding it or calling guest_walk_tables again. 
+ * If "guest_op" is non-zero, we are serving a genuine guest memory access, 
+ * and must (a) be under the shadow lock, and (b) remove write access
+ * from any gueat PT pages we see, as we will be using their contents to 
+ * perform shadow updates.
+ * Returns 0 for success or non-zero if the guest pagetables are malformed.
+ * N.B. Finding a not-present entry does not cause a non-zero return code. */
+static inline int 
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
+{
+    ASSERT(!guest_op || shadow_lock_is_acquired(v->domain));
+
+    perfc_incrc(shadow_guest_walk);
+    memset(gw, 0, sizeof(*gw));
+    gw->va = va;
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    /* Get l4e from the top level table */
+    gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l4e = (guest_l4e_t *)v->arch.guest_vtable + guest_l4_table_offset(va);
+    /* Walk down to the l3e */
+    if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
+    gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e));
+    if ( !valid_mfn(gw->l3mfn) ) return 1;
+    /* This mfn is a pagetable: make sure the guest can't write to it. */
+    if ( guest_op && shadow_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
+        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+    gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
+        + guest_l3_table_offset(va);
+#else /* PAE only... */
+    /* Get l3e from the top level table */
+    gw->l3mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l3e = (guest_l3e_t *)v->arch.guest_vtable + guest_l3_table_offset(va);
+#endif /* PAE or 64... */
+    /* Walk down to the l2e */
+    if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
+    gw->l2mfn = vcpu_gfn_to_mfn(v, guest_l3e_get_gfn(*gw->l3e));
+    if ( !valid_mfn(gw->l2mfn) ) return 1;
+    /* This mfn is a pagetable: make sure the guest can't write to it. */
+    if ( guest_op && shadow_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
+        flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+    gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
+        + guest_l2_table_offset(va);
+#else /* 32-bit only... */
+    /* Get l2e from the top level table */
+    gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
+    gw->l2e = (guest_l2e_t *)v->arch.guest_vtable + guest_l2_table_offset(va);
+#endif /* All levels... */
+    
+    if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
+    if ( guest_supports_superpages(v) &&
+         (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) 
+    {
+        /* Special case: this guest VA is in a PSE superpage, so there's
+         * no guest l1e.  We make one up so that the propagation code
+         * can generate a shadow l1 table.  Start with the gfn of the 
+         * first 4k-page of the superpage. */
+        gfn_t start = guest_l2e_get_gfn(*gw->l2e);
+        /* Grant full access in the l1e, since all the guest entry's 
+         * access controls are enforced in the shadow l2e.  This lets 
+         * us reflect l2 changes later without touching the l1s. */
+        int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+                     _PAGE_ACCESSED|_PAGE_DIRTY);
+        /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
+         * of the level 1 */
+        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
+            flags |= _PAGE_PAT; 
+        /* Increment the pfn by the right number of 4k pages.  
+         * The ~0x1 is to mask out the PAT bit mentioned above. */
+        start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
+        gw->eff_l1e = guest_l1e_from_gfn(start, flags);
+        gw->l1e = NULL;
+        gw->l1mfn = _mfn(INVALID_MFN);
+    } 
+    else 
+    {
+        /* Not a superpage: carry on and find the l1e. */
+        gw->l1mfn = vcpu_gfn_to_mfn(v, guest_l2e_get_gfn(*gw->l2e));
+        if ( !valid_mfn(gw->l1mfn) ) return 1;
+        /* This mfn is a pagetable: make sure the guest can't write to it. */
+        if ( guest_op 
+             && shadow_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
+            flush_tlb_mask(v->domain->domain_dirty_cpumask); 
+        gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
+            + guest_l1_table_offset(va);
+        gw->eff_l1e = *gw->l1e;
+    }
+
+    return 0;
+}
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding frame number. */
+static inline gfn_t
+guest_walk_to_gfn(walk_t *gw)
+{
+    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+        return _gfn(INVALID_GFN);
+    return guest_l1e_get_gfn(gw->eff_l1e);
+}
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding physical address. */
+static inline paddr_t
+guest_walk_to_gpa(walk_t *gw)
+{
+    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
+        return 0;
+    return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
+}
+
+
+/* Unmap (and reinitialise) a guest walk.  
+ * Call this to dispose of any walk filled in by guest_walk_tables() */
+static void unmap_walk(struct vcpu *v, walk_t *gw)
+{
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+    if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
+#endif
+    if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
+#endif
+    if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
+#ifdef DEBUG
+    memset(gw, 0, sizeof(*gw));
+#endif
+}
+
+
+/* Pretty-print the contents of a guest-walk */
+static inline void print_gw(walk_t *gw)
+{
+    SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    SHADOW_PRINTK("   l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn));
+    SHADOW_PRINTK("   l4e=%p\n", gw->l4e);
+    if ( gw->l4e )
+        SHADOW_PRINTK("   *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
+#endif /* PAE or 64... */
+    SHADOW_PRINTK("   l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn));
+    SHADOW_PRINTK("   l3e=%p\n", gw->l3e);
+    if ( gw->l3e )
+        SHADOW_PRINTK("   *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
+#endif /* All levels... */
+    SHADOW_PRINTK("   l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn));
+    SHADOW_PRINTK("   l2e=%p\n", gw->l2e);
+    if ( gw->l2e )
+        SHADOW_PRINTK("   *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
+    SHADOW_PRINTK("   l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn));
+    SHADOW_PRINTK("   l1e=%p\n", gw->l1e);
+    if ( gw->l1e )
+        SHADOW_PRINTK("   *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
+    SHADOW_PRINTK("   eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
+}
+
+
+#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
+/* Lightweight audit: pass all the shadows associated with this guest walk
+ * through the audit mechanisms */
+static void sh_audit_gw(struct vcpu *v, walk_t *gw) 
+{
+    mfn_t smfn;
+
+    if ( !(SHADOW_AUDIT_ENABLE) )
+        return;
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+    if ( valid_mfn(gw->l4mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l4mfn, 
+                                                PGC_SH_l4_shadow))) )
+        (void) sh_audit_l4_table(v, smfn, _mfn(INVALID_MFN));
+#endif /* PAE or 64... */
+    if ( valid_mfn(gw->l3mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l3mfn, 
+                                                PGC_SH_l3_shadow))) )
+        (void) sh_audit_l3_table(v, smfn, _mfn(INVALID_MFN));
+#endif /* All levels... */
+    if ( valid_mfn(gw->l2mfn) )
+    {
+        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
+                                                 PGC_SH_l2_shadow))) )
+            (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
+#if GUEST_PAGING_LEVELS == 3
+        if ( valid_mfn((smfn = get_shadow_status(v, gw->l2mfn, 
+                                                 PGC_SH_l2h_shadow))) )
+            (void) sh_audit_l2_table(v, smfn, _mfn(INVALID_MFN));
+#endif
+    }
+    if ( valid_mfn(gw->l1mfn)
+         && valid_mfn((smfn = get_shadow_status(v, gw->l1mfn, 
+                                                PGC_SH_l1_shadow))) )
+        (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
+    else if ( gw->l2e
+              && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
+              && valid_mfn( 
+              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
+        (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
+}
+
+#else
+#define sh_audit_gw(_v, _gw) do {} while(0)
+#endif /* audit code */
+
+
+
+/**************************************************************************/
+/* Function to write to the guest tables, for propagating accessed and 
+ * dirty bits from the shadow to the guest.
+ * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
+ * and an operation type.  The guest entry is always passed as an l1e: 
+ * since we only ever write flags, that's OK.
+ * Returns the new flag bits of the guest entry. */
+
+static u32 guest_set_ad_bits(struct vcpu *v,
+                             mfn_t gmfn, 
+                             guest_l1e_t *ep,
+                             unsigned int level, 
+                             fetch_type_t ft)
+{
+    u32 flags, shflags, bit;
+    struct page_info *pg;
+    int res = 0;
+
+    ASSERT(valid_mfn(gmfn)
+           && (sh_mfn_is_a_page_table(gmfn)
+               || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 
+                   == 0)));
+    ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
+    ASSERT(level <= GUEST_PAGING_LEVELS);
+    ASSERT(ft == ft_demand_read || ft == ft_demand_write);
+    ASSERT(shadow_lock_is_acquired(v->domain));
+
+    flags = guest_l1e_get_flags(*ep);
+
+    /* PAE l3s do not have A and D bits */
+    if ( unlikely(GUEST_PAGING_LEVELS == 3 && level == 3) )
+        return flags;
+
+    /* Need the D bit as well for writes, in l1es and 32bit/PAE PSE l2es. */
+    if ( ft == ft_demand_write  
+         && (level == 1 || 
+             (level == 2 && GUEST_PAGING_LEVELS < 4 
+              && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
+    {
+        if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
+             == (_PAGE_DIRTY | _PAGE_ACCESSED) )
+            return flags;  /* Guest already has A and D bits set */
+        flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
+        perfc_incrc(shadow_ad_update);
+    }
+    else 
+    {
+        if ( flags & _PAGE_ACCESSED )
+            return flags;  /* Guest already has A bit set */
+        flags |= _PAGE_ACCESSED;
+        perfc_incrc(shadow_a_update);
+    }
+
+    /* Set the bit(s) */
+    sh_mark_dirty(v->domain, gmfn);
+    SHADOW_DEBUG(A_AND_D, "gfn = %"SH_PRI_gfn", "
+                  "old flags = %#x, new flags = %#x\n", 
+                  guest_l1e_get_gfn(*ep), guest_l1e_get_flags(*ep), flags);
+    *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
+    
+    /* May need to propagate this change forward to other kinds of shadow */
+    pg = mfn_to_page(gmfn);
+    if ( !sh_mfn_is_a_page_table(gmfn) ) 
+    {
+        /* This guest pagetable is not yet shadowed at all. */
+        // MAF: I think this assert is busted...  If this gmfn has not yet
+        // been promoted, then it seems perfectly reasonable for there to be
+        // outstanding type refs to it...
+        /* TJD: No. If the gmfn has not been promoted, we must at least 
+         * have recognised that it is a pagetable, and pulled write access.
+         * The type count should only be non-zero if it is actually a page 
+         * table.  The test above was incorrect, though, so I've fixed it. */
+        ASSERT((pg->u.inuse.type_info & PGT_count_mask) == 0);
+        return flags;  
+    }
+
+    shflags = pg->shadow_flags & SHF_page_type_mask;
+    while ( shflags )
+    {
+        bit = find_first_set_bit(shflags);
+        ASSERT(shflags & (1u << bit));
+        shflags &= ~(1u << bit);
+        if ( !(pg->shadow_flags & (1u << bit)) )
+            continue;
+        switch ( bit )
+        {
+        case PGC_SH_type_to_index(PGC_SH_l1_shadow):
+            if (level != 1) 
+                res |= sh_map_and_validate_gl1e(v, gmfn, ep, sizeof (*ep));
+            break;
+        case PGC_SH_type_to_index(PGC_SH_l2_shadow):
+            if (level != 2) 
+                res |= sh_map_and_validate_gl2e(v, gmfn, ep, sizeof (*ep));
+            break;
+#if GUEST_PAGING_LEVELS == 3 /* PAE only */
+        case PGC_SH_type_to_index(PGC_SH_l2h_shadow):
+            if (level != 2) 
+                res |= sh_map_and_validate_gl2he(v, gmfn, ep, sizeof (*ep));
+            break;
+#endif
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+        case PGC_SH_type_to_index(PGC_SH_l3_shadow):
+            if (level != 3) 
+                res |= sh_map_and_validate_gl3e(v, gmfn, ep, sizeof (*ep));
+            break;
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+        case PGC_SH_type_to_index(PGC_SH_l4_shadow):
+            if (level != 4) 
+                res |= sh_map_and_validate_gl4e(v, gmfn, ep, sizeof (*ep));
+            break;
+#endif 
+#endif
+        default:
+            SHADOW_ERROR("mfn %"SH_PRI_mfn" is shadowed in multiple "
+                          "modes: A&D bits may be out of sync (flags=%#x).\n", 
+                          mfn_x(gmfn), pg->shadow_flags); 
+            /* XXX Shadows in other modes will not be updated, so will
+             * have their A and D bits out of sync. */
+        }
+    }
+    
+    /* We should never need to flush the TLB or recopy PAE entries */
+    ASSERT( res == 0 || res == SHADOW_SET_CHANGED );
+    return flags;
+}
+
+/**************************************************************************/
+/* Functions to compute the correct index into a shadow page, given an
+ * index into the guest page (as returned by guest_get_index()).
+ * This is trivial when the shadow and guest use the same sized PTEs, but
+ * gets more interesting when those sizes are mismatched (e.g. 32-bit guest,
+ * PAE- or 64-bit shadows).
+ *
+ * These functions also increment the shadow mfn, when necessary.  When PTE
+ * sizes are mismatched, it takes 2 shadow L1 pages for a single guest L1
+ * page.  In this case, we allocate 2 contiguous pages for the shadow L1, and
+ * use simple pointer arithmetic on a pointer to the guest L1e to figure out
+ * which shadow page we really want.  Similarly, when PTE sizes are
+ * mismatched, we shadow a guest L2 page with 4 shadow L2 pages.  (The easiest
+ * way to see this is: a 32-bit guest L2 page maps 4GB of virtual address
+ * space, while a PAE- or 64-bit shadow L2 page maps 1GB of virtual address
+ * space.)
+ *
+ * For PAE guests, for every 32-bytes of guest L3 page table, we use 64-bytes
+ * of shadow (to store both the shadow, and the info that would normally be
+ * stored in page_info fields).  This arrangement allows the shadow and the
+ * "page_info" fields to always be stored in the same page (in fact, in
+ * the same cache line), avoiding an extra call to map_domain_page().
+ */
+
+static inline u32
+guest_index(void *ptr)
+{
+    return (u32)((unsigned long)ptr & ~PAGE_MASK) / sizeof(guest_l1e_t);
+}
+
+static inline u32
+shadow_l1_index(mfn_t *smfn, u32 guest_index)
+{
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / SHADOW_L1_PAGETABLE_ENTRIES));
+    return (guest_index % SHADOW_L1_PAGETABLE_ENTRIES);
+#else
+    return guest_index;
+#endif
+}
+
+static inline u32
+shadow_l2_index(mfn_t *smfn, u32 guest_index)
+{
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+    // Because we use 2 shadow l2 entries for each guest entry, the number of
+    // guest entries per shadow page is SHADOW_L2_PAGETABLE_ENTRIES/2
+    //
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
+
+    // We multiple by two to get the index of the first of the two entries
+    // used to shadow the specified guest entry.
+    return (guest_index % (SHADOW_L2_PAGETABLE_ENTRIES / 2)) * 2;
+#else
+    return guest_index;
+#endif
+}
+
+#if GUEST_PAGING_LEVELS >= 3
+
+static inline u32
+shadow_l3_index(mfn_t *smfn, u32 guest_index)
+{
+#if GUEST_PAGING_LEVELS == 3
+    u32 group_id;
+
+    // Because we use twice the space in L3 shadows as was consumed in guest
+    // L3s, the number of guest entries per shadow page is
+    // SHADOW_L2_PAGETABLE_ENTRIES/2.  (Note this is *not*
+    // SHADOW_L3_PAGETABLE_ENTRIES, which in this case is 4...)
+    //
+    *smfn = _mfn(mfn_x(*smfn) +
+                 (guest_index / (SHADOW_L2_PAGETABLE_ENTRIES / 2)));
+
+    // We store PAE L3 shadows in groups of 4, alternating shadows and
+    // pae_l3_bookkeeping structs.  So the effective shadow index is
+    // the the group_id * 8 + the offset within the group.
+    //
+    guest_index %= (SHADOW_L2_PAGETABLE_ENTRIES / 2);
+    group_id = guest_index / 4;
+    return (group_id * 8) + (guest_index % 4);
+#else
+    return guest_index;
+#endif
+}
+
+#endif // GUEST_PAGING_LEVELS >= 3
+
+#if GUEST_PAGING_LEVELS >= 4
+
+static inline u32
+shadow_l4_index(mfn_t *smfn, u32 guest_index)
+{
+    return guest_index;
+}
+
+#endif // GUEST_PAGING_LEVELS >= 4
+
+
+/**************************************************************************/
+/* Functions which compute shadow entries from their corresponding guest
+ * entries.
+ *
+ * These are the "heart" of the shadow code.
+ *
+ * There are two sets of these: those that are called on demand faults (read
+ * faults and write faults), and those that are essentially called to
+ * "prefetch" (or propagate) entries from the guest into the shadow.  The read
+ * fault and write fault are handled as two separate cases for L1 entries (due
+ * to the _PAGE_DIRTY bit handling), but for L[234], they are grouped together
+ * into the respective demand_fault functions.
+ */
+
+#define CHECK(_cond)                                    \
+do {                                                    \
+    if (unlikely(!(_cond)))                             \
+    {                                                   \
+        printk("%s %s %d ASSERTION (%s) FAILED\n",      \
+               __func__, __FILE__, __LINE__, #_cond);   \
+        return -1;                                      \
+    }                                                   \
+} while (0);
+
+// The function below tries to capture all of the flag manipulation for the
+// demand and propagate functions into one place.
+//
+static always_inline u32
+sh_propagate_flags(struct vcpu *v, mfn_t target_mfn, 
+                    u32 gflags, guest_l1e_t *guest_entry_ptr, mfn_t gmfn, 
+                    int mmio, int level, fetch_type_t ft)
+{
+    struct domain *d = v->domain;
+    u32 pass_thru_flags;
+    u32 sflags;
+
+    // XXX -- might want to think about PAT support for HVM guests...
+
+#ifndef NDEBUG
+    // MMIO can only occur from L1e's
+    //
+    if ( mmio )
+        CHECK(level == 1);
+
+    // We should always have a pointer to the guest entry if it's a non-PSE
+    // non-MMIO demand access.
+    if ( ft & FETCH_TYPE_DEMAND )
+        CHECK(guest_entry_ptr || level == 1);
+#endif
+
+    // A not-present guest entry has a special signature in the shadow table,
+    // so that we do not have to consult the guest tables multiple times...
+    //
+    if ( unlikely(!(gflags & _PAGE_PRESENT)) )
+        return _PAGE_SHADOW_GUEST_NOT_PRESENT;
+
+    // Must have a valid target_mfn, unless this is mmio, or unless this is a
+    // prefetch.  In the case of a prefetch, an invalid mfn means that we can
+    // not usefully shadow anything, and so we return early.
+    //
+    if ( !valid_mfn(target_mfn) )
+    {
+        CHECK((ft == ft_prefetch) || mmio);
+        if ( !mmio )
+            return 0;
+    }
+
+    // PAE does not allow NX, RW, USER, ACCESSED, or DIRTY bits in its L3e's...
+    //
+    if ( (SHADOW_PAGING_LEVELS == 3) && (level == 3) )
+        pass_thru_flags = _PAGE_PRESENT;
+    else
+    {
+        pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
+                           _PAGE_RW | _PAGE_PRESENT);
+        if ( guest_supports_nx(v) )
+            pass_thru_flags |= _PAGE_NX_BIT;
+    }
+
+    // PAE guests can not put NX, RW, USER, ACCESSED, or DIRTY bits into their
+    // L3e's; they are all implied.  So we emulate them here.
+    //
+    if ( (GUEST_PAGING_LEVELS == 3) && (level == 3) )
+        gflags = pass_thru_flags;
+
+    // Propagate bits from the guest to the shadow.
+    // Some of these may be overwritten, below.
+    // Since we know the guest's PRESENT bit is set, we also set the shadow's
+    // SHADOW_PRESENT bit.
+    //
+    sflags = (gflags & pass_thru_flags) | _PAGE_SHADOW_PRESENT;
+
+    // Copy the guest's RW bit into the SHADOW_RW bit.
+    //
+    if ( gflags & _PAGE_RW )
+        sflags |= _PAGE_SHADOW_RW;
+
+    // Set the A&D bits for higher level shadows.
+    // Higher level entries do not, strictly speaking, have dirty bits, but
+    // since we use shadow linear tables, each of these entries may, at some
+    // point in time, also serve as a shadow L1 entry.
+    // By setting both the  A&D bits in each of these, we eliminate the burden
+    // on the hardware to update these bits on initial accesses.
+    //
+    if ( (level > 1) && !((SHADOW_PAGING_LEVELS == 3) && (level == 3)) )
+        sflags |= _PAGE_ACCESSED | _PAGE_DIRTY;
+
+
+    // Set the A and D bits in the guest entry, if we need to.
+    if ( guest_entry_ptr && (ft & FETCH_TYPE_DEMAND) )
+        gflags = guest_set_ad_bits(v, gmfn, guest_entry_ptr, level, ft);
+    
+    // If the A or D bit has not yet been set in the guest, then we must
+    // prevent the corresponding kind of access.
+    //
+    if ( unlikely(!((GUEST_PAGING_LEVELS == 3) && (level == 3)) &&
+                  !(gflags & _PAGE_ACCESSED)) )
+        sflags &= ~_PAGE_PRESENT;
+
+    /* D bits exist in l1es, and 32bit/PAE PSE l2es, but not 64bit PSE l2es */
+    if ( unlikely( ((level == 1) 
+                    || ((level == 2) && (GUEST_PAGING_LEVELS < 4) 
+                        && guest_supports_superpages(v) &&
+                        (gflags & _PAGE_PSE)))
+                   && !(gflags & _PAGE_DIRTY)) )
+        sflags &= ~_PAGE_RW;
+
+    // MMIO caching
+    //
+    // MMIO mappings are marked as not present, but we set the SHADOW_MMIO bit
+    // to cache the fact that this entry  is in MMIO space.
+    //
+    if ( (level == 1) && mmio )
+    {
+        sflags &= ~(_PAGE_PRESENT);
+        sflags |= _PAGE_SHADOW_MMIO;
+    }
+    else 
+    {
+        // shadow_mode_log_dirty support
+        //
+        // Only allow the guest write access to a page a) on a demand fault,
+        // or b) if the page is already marked as dirty.
+        //
+        if ( unlikely((level == 1) &&
+                      !(ft & FETCH_TYPE_WRITE) &&
+                      shadow_mode_log_dirty(d) &&
+                      !sh_mfn_is_dirty(d, target_mfn)) )
+        {
+            sflags &= ~_PAGE_RW;
+        }
+        
+        // protect guest page tables
+        //
+        if ( unlikely((level == 1) &&
+                      sh_mfn_is_a_page_table(target_mfn)) )
+        {
+            if ( shadow_mode_trap_reads(d) )
+            {
+                // if we are trapping both reads & writes, then mark this page
+                // as not present...
+                //
+                sflags &= ~_PAGE_PRESENT;
+            }
+            else
+            {
+                // otherwise, just prevent any writes...
+                //
+                sflags &= ~_PAGE_RW;
+            }
+        }
+    }
+
+    return sflags;
+}
+
+#undef CHECK
+
+#if GUEST_PAGING_LEVELS >= 4
+static void
+l4e_propagate_from_guest(struct vcpu *v, 
+                         guest_l4e_t *gl4e,
+                         mfn_t gl4mfn,
+                         mfn_t sl3mfn,
+                         shadow_l4e_t *sl4p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l4e_get_flags(*gl4e);
+    u32 sflags = sh_propagate_flags(v, sl3mfn, gflags, (guest_l1e_t *) gl4e,
+                                     gl4mfn, 0, 4, ft);
+
+    *sl4p = shadow_l4e_from_mfn(sl3mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl4e=%" SH_PRI_gpte " sl4e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl4e->l4, sl4p->l4);
+    ASSERT(sflags != -1);
+}
+#endif // GUEST_PAGING_LEVELS >= 4
+
+#if GUEST_PAGING_LEVELS >= 3
+static void
+l3e_propagate_from_guest(struct vcpu *v,
+                         guest_l3e_t *gl3e,
+                         mfn_t gl3mfn, 
+                         mfn_t sl2mfn, 
+                         shadow_l3e_t *sl3p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l3e_get_flags(*gl3e);
+    u32 sflags = sh_propagate_flags(v, sl2mfn, gflags, (guest_l1e_t *) gl3e,
+                                     gl3mfn, 0, 3, ft);
+
+    *sl3p = shadow_l3e_from_mfn(sl2mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl3e=%" SH_PRI_gpte " sl3e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl3e->l3, sl3p->l3);
+    ASSERT(sflags != -1);
+}
+#endif // GUEST_PAGING_LEVELS >= 3
+
+static void
+l2e_propagate_from_guest(struct vcpu *v, 
+                         guest_l2e_t *gl2e,
+                         mfn_t gl2mfn,
+                         mfn_t sl1mfn, 
+                         shadow_l2e_t *sl2p,
+                         fetch_type_t ft)
+{
+    u32 gflags = guest_l2e_get_flags(*gl2e);
+    u32 sflags = sh_propagate_flags(v, sl1mfn, gflags, (guest_l1e_t *) gl2e, 
+                                     gl2mfn, 0, 2, ft);
+
+    *sl2p = shadow_l2e_from_mfn(sl1mfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "%s gl2e=%" SH_PRI_gpte " sl2e=%" SH_PRI_pte "\n",
+                  fetch_type_names[ft], gl2e->l2, sl2p->l2);
+    ASSERT(sflags != -1);
+}
+
+static inline int
+l1e_read_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
+               int mmio)
+/* returns 1 if emulation is required, and 0 otherwise */
+{
+    struct domain *d = v->domain;
+    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
+                                     mmio, 1, ft_demand_read);
+
+    if ( shadow_mode_trap_reads(d) && !mmio && sh_mfn_is_a_page_table(gmfn) )
+    {
+        // emulation required!
+        *sl1p = shadow_l1e_empty();
+        return 1;
+    }
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+    return 0;
+}
+
+static inline int
+l1e_write_fault(struct vcpu *v, walk_t *gw, mfn_t gmfn, shadow_l1e_t *sl1p,
+                int mmio)
+/* returns 1 if emulation is required, and 0 otherwise */
+{
+    struct domain *d = v->domain;
+    u32 gflags = guest_l1e_get_flags(gw->eff_l1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, gw->l1e, gw->l1mfn,
+                                     mmio, 1, ft_demand_write);
+
+    sh_mark_dirty(d, gmfn);
+
+    if ( !mmio && sh_mfn_is_a_page_table(gmfn) )
+    {
+        // emulation required!
+        *sl1p = shadow_l1e_empty();
+        return 1;
+    }
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "va=%p eff_gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  (void *)gw->va, gw->eff_l1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+    return 0;
+}
+
+static inline void
+l1e_propagate_from_guest(struct vcpu *v, guest_l1e_t gl1e, shadow_l1e_t *sl1p,
+                         int mmio)
+{
+    gfn_t gfn = guest_l1e_get_gfn(gl1e);
+    mfn_t gmfn = (mmio) ? _mfn(gfn_x(gfn)) : vcpu_gfn_to_mfn(v, gfn);
+    u32 gflags = guest_l1e_get_flags(gl1e);
+    u32 sflags = sh_propagate_flags(v, gmfn, gflags, 0, _mfn(INVALID_MFN), 
+                                     mmio, 1, ft_prefetch);
+
+    *sl1p = shadow_l1e_from_mfn(gmfn, sflags);
+
+    SHADOW_DEBUG(PROPAGATE,
+                  "gl1e=%" SH_PRI_gpte " sl1e=%" SH_PRI_pte "\n",
+                  gl1e.l1, sl1p->l1);
+
+    ASSERT(sflags != -1);
+}
+
+
+/**************************************************************************/
+/* These functions update shadow entries (and do bookkeeping on the shadow
+ * tables they are in).  It is intended that they are the only
+ * functions which ever write (non-zero) data onto a shadow page.
+ *
+ * They return a set of flags: 
+ * SHADOW_SET_CHANGED -- we actually wrote a new value to the shadow.
+ * SHADOW_SET_FLUSH   -- the caller must cause a TLB flush.
+ * SHADOW_SET_ERROR   -- the input is not a valid entry (for example, if
+ *                        shadow_get_page_from_l1e() fails).
+ * SHADOW_SET_L3PAE_RECOPY -- one or more vcpu's need to have their local
+ *                             copies of their PAE L3 entries re-copied.
+ */
+
+static inline void safe_write_entry(void *dst, void *src) 
+/* Copy one PTE safely when processors might be running on the
+ * destination pagetable.   This does *not* give safety against
+ * concurrent writes (that's what the shadow lock is for), just 
+ * stops the hardware picking up partially written entries. */
+{
+    volatile unsigned long *d = dst;
+    unsigned long *s = src;
+    ASSERT(!((unsigned long) d & (sizeof (shadow_l1e_t) - 1)));
+#if CONFIG_PAGING_LEVELS == 3
+    /* In PAE mode, pagetable entries are larger
+     * than machine words, so won't get written atomically.  We need to make
+     * sure any other cpu running on these shadows doesn't see a
+     * half-written entry.  Do this by marking the entry not-present first,
+     * then writing the high word before the low word. */
+    BUILD_BUG_ON(sizeof (shadow_l1e_t) != 2 * sizeof (unsigned long));
+    d[0] = 0;
+    d[1] = s[1];
+    d[0] = s[0];
+#else
+    /* In 32-bit and 64-bit, sizeof(pte) == sizeof(ulong) == 1 word,
+     * which will be an atomic write, since the entry is aligned. */
+    BUILD_BUG_ON(sizeof (shadow_l1e_t) != sizeof (unsigned long));
+    *d = *s;
+#endif
+}
+
+
+static inline void 
+shadow_write_entries(void *d, void *s, int entries, mfn_t mfn)
+/* This function does the actual writes to shadow pages.
+ * It must not be called directly, since it doesn't do the bookkeeping
+ * that shadow_set_l*e() functions do. */
+{
+    shadow_l1e_t *dst = d;
+    shadow_l1e_t *src = s;
+    void *map = NULL;
+    int i;
+
+    /* Because we mirror access rights at all levels in the shadow, an
+     * l2 (or higher) entry with the RW bit cleared will leave us with
+     * no write access through the linear map.  
+     * We detect that by writing to the shadow with copy_to_user() and 
+     * using map_domain_page() to get a writeable mapping if we need to. */
+    if ( __copy_to_user(d, d, sizeof (unsigned long)) != 0 ) 
+    {
+        perfc_incrc(shadow_linear_map_failed);
+        map = sh_map_domain_page(mfn);
+        ASSERT(map != NULL);
+        dst = map + ((unsigned long)dst & (PAGE_SIZE - 1));
+    }
+
+
+    for ( i = 0; i < entries; i++ )
+        safe_write_entry(dst++, src++);
+
+    if ( map != NULL ) sh_unmap_domain_page(map);
+
+    /* XXX TODO:
+     * Update min/max field in page_info struct of this mfn */
+}
+
+static inline int
+perms_strictly_increased(u32 old_flags, u32 new_flags) 
+/* Given the flags of two entries, are the new flags a strict
+ * increase in rights over the old ones? */
+{
+    u32 of = old_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
+    u32 nf = new_flags & (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX);
+    /* Flip the NX bit, since it's the only one that decreases rights;
+     * we calculate as if it were an "X" bit. */
+    of ^= _PAGE_NX_BIT;
+    nf ^= _PAGE_NX_BIT;
+    /* If the changed bits are all set in the new flags, then rights strictly 
+     * increased between old and new. */
+    return ((of | (of ^ nf)) == nf);
+}
+
+static int inline
+shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+{
+    int res;
+    mfn_t mfn;
+    struct domain *owner;
+    shadow_l1e_t sanitized_sl1e =
+        shadow_l1e_remove_flags(sl1e, _PAGE_SHADOW_RW | _PAGE_SHADOW_PRESENT);
+
+    //ASSERT(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT);
+    //ASSERT((shadow_l1e_get_flags(sl1e) & L1_DISALLOW_MASK) == 0);
+
+    if ( !shadow_mode_refcounts(d) )
+        return 1;
+
+    res = get_page_from_l1e(sanitized_sl1e, d);
+
+    // If a privileged domain is attempting to install a map of a page it does
+    // not own, we let it succeed anyway.
+    //
+    if ( unlikely(!res) &&
+         IS_PRIV(d) &&
+         !shadow_mode_translate(d) &&
+         valid_mfn(mfn = shadow_l1e_get_mfn(sl1e)) &&
+         (owner = page_get_owner(mfn_to_page(mfn))) &&
+         (d != owner) )
+    {
+        res = get_page_from_l1e(sanitized_sl1e, owner);
+        SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
+                       "which is owned by domain %d: %s\n",
+                       d->domain_id, mfn_x(mfn), owner->domain_id,
+                       res ? "success" : "failed");
+    }
+
+    if ( unlikely(!res) )
+    {
+        perfc_incrc(shadow_get_page_fail);
+        SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
+    }
+
+    return res;
+}
+
+static void inline
+shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+{ 
+    if ( !shadow_mode_refcounts(d) )
+        return;
+
+    put_page_from_l1e(sl1e, d);
+}
+
+#if GUEST_PAGING_LEVELS >= 4
+static int shadow_set_l4e(struct vcpu *v, 
+                          shadow_l4e_t *sl4e, 
+                          shadow_l4e_t new_sl4e, 
+                          mfn_t sl4mfn)
+{
+    int flags = 0;
+    shadow_l4e_t old_sl4e;
+    paddr_t paddr;
+    ASSERT(sl4e != NULL);
+    old_sl4e = *sl4e;
+
+    if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
+    
+    paddr = ((((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
+             | (((unsigned long)sl4e) & ~PAGE_MASK));
+
+    if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        sh_get_ref(shadow_l4e_get_mfn(new_sl4e), paddr);
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
+        if ( (mfn_x(osl3mfn) != mfn_x(shadow_l4e_get_mfn(new_sl4e)))
+             || !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e), 
+                                          shadow_l4e_get_flags(new_sl4e)) )
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl3mfn, paddr);
+    }
+    return flags;
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+#if GUEST_PAGING_LEVELS >= 3
+static int shadow_set_l3e(struct vcpu *v, 
+                          shadow_l3e_t *sl3e, 
+                          shadow_l3e_t new_sl3e, 
+                          mfn_t sl3mfn)
+{
+    int flags = 0;
+    shadow_l3e_t old_sl3e;
+    paddr_t paddr;
+    ASSERT(sl3e != NULL);
+    old_sl3e = *sl3e;
+
+    if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
+
+    paddr = ((((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
+             | (((unsigned long)sl3e) & ~PAGE_MASK));
+    
+    if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        sh_get_ref(shadow_l3e_get_mfn(new_sl3e), paddr);
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+#if GUEST_PAGING_LEVELS == 3 
+    /* We wrote a guest l3e in a PAE pagetable.  This table is copied in
+     * the linear pagetable entries of its l2s, and may also be copied
+     * to a low memory location to make it fit in CR3.  Report that we
+     * need to resync those copies (we can't wait for the guest to flush
+     * the TLB because it might be an increase in rights). */
+    {
+        struct vcpu *vcpu;
+
+        struct pae_l3_bookkeeping *info = sl3p_to_info(sl3e);
+        for_each_vcpu(v->domain, vcpu)
+        {
+            if (info->vcpus & (1 << vcpu->vcpu_id))
+            {
+                // Remember that this flip/update needs to occur.
+                vcpu->arch.shadow.pae_flip_pending = 1;
+                flags |= SHADOW_SET_L3PAE_RECOPY;
+            }
+        }
+    }
+#endif
+
+    if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
+        if ( (mfn_x(osl2mfn) != mfn_x(shadow_l3e_get_mfn(new_sl3e))) ||
+             !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e), 
+                                       shadow_l3e_get_flags(new_sl3e)) ) 
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl2mfn, paddr);
+    }
+    return flags;
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */ 
+
+static int shadow_set_l2e(struct vcpu *v, 
+                          shadow_l2e_t *sl2e, 
+                          shadow_l2e_t new_sl2e, 
+                          mfn_t sl2mfn)
+{
+    int flags = 0;
+    shadow_l2e_t old_sl2e;
+    paddr_t paddr;
+
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+    /* In 2-on-3 we work with pairs of l2es pointing at two-page
+     * shadows.  Reference counting and up-pointers track from the first
+     * page of the shadow to the first l2e, so make sure that we're 
+     * working with those:     
+     * Align the pointer down so it's pointing at the first of the pair */
+    sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~(sizeof(shadow_l2e_t)));
+    /* Align the mfn of the shadow entry too */
+    new_sl2e.l2 &= ~(1<<PAGE_SHIFT);
+#endif
+
+    ASSERT(sl2e != NULL);
+    old_sl2e = *sl2e;
+    
+    if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
+    
+    paddr = ((((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT)
+             | (((unsigned long)sl2e) & ~PAGE_MASK));
+
+    if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */
+        sh_get_ref(shadow_l2e_get_mfn(new_sl2e), paddr);
+    } 
+
+    /* Write the new entry */
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+    {
+        shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
+        /* The l1 shadow is two pages long and need to be pointed to by
+         * two adjacent l1es.  The pair have the same flags, but point
+         * at odd and even MFNs */
+        ASSERT(!(pair[0].l2 & (1<<PAGE_SHIFT)));
+        pair[1].l2 |= (1<<PAGE_SHIFT);
+        shadow_write_entries(sl2e, &pair, 2, sl2mfn);
+    }
+#else /* normal case */
+    shadow_write_entries(sl2e, &new_sl2e, 1, sl2mfn);
+#endif
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
+        if ( (mfn_x(osl1mfn) != mfn_x(shadow_l2e_get_mfn(new_sl2e))) ||
+             !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e), 
+                                       shadow_l2e_get_flags(new_sl2e)) ) 
+        {
+            flags |= SHADOW_SET_FLUSH;
+        }
+        sh_put_ref(v, osl1mfn, paddr);
+    }
+    return flags;
+}
+
+static int shadow_set_l1e(struct vcpu *v, 
+                          shadow_l1e_t *sl1e, 
+                          shadow_l1e_t new_sl1e,
+                          mfn_t sl1mfn)
+{
+    int flags = 0;
+    struct domain *d = v->domain;
+    shadow_l1e_t old_sl1e;
+    ASSERT(sl1e != NULL);
+    
+    old_sl1e = *sl1e;
+
+    if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
+    
+    if ( shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT ) 
+    {
+        /* About to install a new reference */        
+        if ( shadow_mode_refcounts(d) ) {
+            if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) 
+            {
+                /* Doesn't look like a pagetable. */
+                flags |= SHADOW_SET_ERROR;
+                new_sl1e = shadow_l1e_empty();
+            }
+        }
+    } 
+
+    /* Write the new entry */
+    shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
+    flags |= SHADOW_SET_CHANGED;
+
+    if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_PRESENT ) 
+    {
+        /* We lost a reference to an old mfn. */
+        /* N.B. Unlike higher-level sets, never need an extra flush 
+         * when writing an l1e.  Because it points to the same guest frame 
+         * as the guest l1e did, it's the guest's responsibility to
+         * trigger a flush later. */
+        if ( shadow_mode_refcounts(d) ) 
+        {
+            shadow_put_page_from_l1e(old_sl1e, d);
+        } 
+    }
+    return flags;
+}
+
+
+/**************************************************************************/
+/* These functions take a vcpu and a virtual address, and return a pointer
+ * to the appropriate level N entry from the shadow tables.  
+ * If the necessary tables are not present in the shadow, they return NULL. */
+
+/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
+ * more levels than the guest, the upper levels are always fixed and do not 
+ * reflect any information from the guest, so we do not use these functions 
+ * to access them. */
+
+#if GUEST_PAGING_LEVELS >= 4
+static shadow_l4e_t *
+shadow_get_l4e(struct vcpu *v, unsigned long va)
+{
+    /* Reading the top level table is always valid. */
+    return sh_linear_l4_table(v) + shadow_l4_linear_offset(va);
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#if GUEST_PAGING_LEVELS >= 3
+static shadow_l3e_t *
+shadow_get_l3e(struct vcpu *v, unsigned long va)
+{
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
+    /* Get the l4 */
+    shadow_l4e_t *sl4e = shadow_get_l4e(v, va);
+    ASSERT(sl4e != NULL);
+    if ( !(shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l4e_get_mfn(*sl4e)));
+    /* l4 was present; OK to get the l3 */
+    return sh_linear_l3_table(v) + shadow_l3_linear_offset(va);
+#else /* PAE... */
+    /* Top level is always mapped */
+    ASSERT(v->arch.shadow_vtable);
+    return ((shadow_l3e_t *)v->arch.shadow_vtable) + 
shadow_l3_linear_offset(va);
+#endif 
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+
+static shadow_l2e_t *
+shadow_get_l2e(struct vcpu *v, unsigned long va)
+{
+#if GUEST_PAGING_LEVELS >= 3  /* 64bit/PAE... */
+    /* Get the l3 */
+    shadow_l3e_t *sl3e = shadow_get_l3e(v, va);
+    if ( sl3e == NULL || !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l3e_get_mfn(*sl3e)));
+    /* l3 was present; OK to get the l2 */
+#endif
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(va);
+}
+
+
+#if 0 // avoid the compiler warning for now...
+
+static shadow_l1e_t *
+shadow_get_l1e(struct vcpu *v, unsigned long va)
+{
+    /* Get the l2 */
+    shadow_l2e_t *sl2e = shadow_get_l2e(v, va);
+    if ( sl2e == NULL || !(shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT) )
+        return NULL;
+    ASSERT(valid_mfn(shadow_l2e_get_mfn(*sl2e)));
+    /* l2 was present; OK to get the l1 */
+    return sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
+}
+
+#endif
+
+
+/**************************************************************************/
+/* Macros to walk pagetables.  These take the shadow of a pagetable and 
+ * walk every "interesting" entry.  That is, they don't touch Xen mappings, 
+ * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every 
+ * second entry (since pairs of entries are managed together). For multi-page
+ * shadows they walk all pages.
+ * 
+ * Arguments are an MFN, the variable to point to each entry, a variable 
+ * to indicate that we are done (we will shortcut to the end of the scan 
+ * when _done != 0), a variable to indicate that we should avoid Xen mappings,
+ * and the code. 
+ *
+ * WARNING: These macros have side-effects.  They change the values of both 
+ * the pointer and the MFN. */ 
+
+static inline void increment_ptr_to_guest_entry(void *ptr)
+{
+    if ( ptr )
+    {
+        guest_l1e_t **entry = ptr;
+        (*entry)++;
+    }
+}
+
+/* All kinds of l1: touch all entries */
+#define _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)       \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l1e_t *_sp = map_shadow_page((_sl1mfn));                     \
+    ASSERT((mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l1_shadow                                         \
+           || (mfn_to_page(_sl1mfn)->count_info & PGC_SH_type_mask)    \
+           == PGC_SH_fl1_shadow);                                      \
+    for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl1e) = _sp + _i;                                             \
+        if ( shadow_l1e_get_flags(*(_sl1e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl1p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+/* 32-bit l1, on PAE or 64-bit shadows: need to walk both pages of shadow */
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done,  _code)       \
+do {                                                                    \
+    int __done = 0;                                                     \
+    _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                         \
+                         ({ (__done = _done); }), _code);               \
+    _sl1mfn = _mfn(mfn_x(_sl1mfn) + 1);                                 \
+    if ( !__done )                                                      \
+        _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p,                     \
+                             ({ (__done = _done); }), _code);           \
+} while (0)
+#else /* Everything else; l1 shadows are only one page */
+#define SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)        \
+       _SHADOW_FOREACH_L1E(_sl1mfn, _sl1e, _gl1p, _done, _code)
+#endif
+    
+
+#if GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS > 2
+
+/* 32-bit l2 on PAE/64: four pages, touch every second entry, and avoid Xen */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)    \
+do {                                                                      \
+    int _i, _j, __done = 0;                                               \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)         \
+           == PGC_SH_l2_32_shadow);                                      \
+    for ( _j = 0; _j < 4 && !__done; _j++ )                               \
+    {                                                                     \
+        shadow_l2e_t *_sp = map_shadow_page(_sl2mfn);                     \
+        for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i += 2 )         \
+            if ( (!(_xen))                                                \
+                 || ((_j * SHADOW_L2_PAGETABLE_ENTRIES) + _i)             \
+                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT) ) \
+            {                                                             \
+                (_sl2e) = _sp + _i;                                       \
+                if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )     \
+                    {_code}                                               \
+                if ( (__done = (_done)) ) break;                          \
+                increment_ptr_to_guest_entry(_gl2p);                      \
+            }                                                             \
+        unmap_shadow_page(_sp);                                           \
+        _sl2mfn = _mfn(mfn_x(_sl2mfn) + 1);                               \
+    }                                                                     \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 2
+
+/* 32-bit on 32-bit: avoid Xen entries */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
+do {                                                                       \
+    int _i;                                                                \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)          \
+           == PGC_SH_l2_32_shadow);                                       \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
+        if ( (!(_xen))                                                     \
+             ||                                                            \
+             (_i < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
+        {                                                                  \
+            (_sl2e) = _sp + _i;                                            \
+            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
+                {_code}                                                    \
+            if ( _done ) break;                                            \
+            increment_ptr_to_guest_entry(_gl2p);                           \
+        }                                                                  \
+    unmap_shadow_page(_sp);                                                \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 3
+
+/* PAE: if it's an l2h, don't touch Xen mappings */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)     \
+do {                                                                       \
+    int _i;                                                                \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                        \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)          \
+           == PGC_SH_l2_pae_shadow                                        \
+           || (mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l2h_pae_shadow);                                     \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
+        if ( (!(_xen))                                                     \
+             || ((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)    \
+                 != PGC_SH_l2h_pae_shadow)                                \
+             || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES))                  \
+                 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
+        {                                                                  \
+            (_sl2e) = _sp + _i;                                            \
+            if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )          \
+                {_code}                                                    \
+            if ( _done ) break;                                            \
+            increment_ptr_to_guest_entry(_gl2p);                           \
+        }                                                                  \
+    unmap_shadow_page(_sp);                                                \
+} while (0)
+
+#else 
+
+/* 64-bit l2: touch all entries */
+#define SHADOW_FOREACH_L2E(_sl2mfn, _sl2e, _gl2p, _done, _xen, _code)  \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l2e_t *_sp = map_shadow_page((_sl2mfn));                     \
+    ASSERT((mfn_to_page(_sl2mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l2_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl2e) = _sp + _i;                                             \
+        if ( shadow_l2e_get_flags(*(_sl2e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl2p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+#endif /* different kinds of l2 */
+
+#if GUEST_PAGING_LEVELS == 3
+
+/* PAE l3 subshadow: touch all entries (FOREACH_L2E will find Xen l2es). */
+#define SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p, _done, _code)             \
+do {                                                                    \
+    int _i;                                                             \
+    for ( _i = 0; _i < 4; _i++ )                                        \
+    {                                                                   \
+        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        _sl3e++;                                                        \
+        increment_ptr_to_guest_entry(_gl3p);                            \
+    }                                                                   \
+} while (0)
+
+/* PAE l3 full shadow: call subshadow walk on all valid l3 subshadows */
+#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
+do {                                                                    \
+    int _i, _j, _k, __done = 0;                                         \
+    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l3_pae_shadow);                                   \
+    /* The subshadows are split, 64 on each page of the shadow */       \
+    for ( _j = 0; _j < 2 && !__done; _j++ )                             \
+    {                                                                   \
+        void *_sp = sh_map_domain_page(_sl3mfn);                       \
+        for ( _i = 0; _i < 64; _i++ )                                   \
+        {                                                               \
+            /* Every second 32-byte region is a bookkeeping entry */    \
+            _sl3e = (shadow_l3e_t *)(_sp + (64 * _i));                  \
+            if ( (sl3p_to_info(_sl3e))->refcount > 0 )                  \
+                SHADOW_FOREACH_L3E_SUB(_sl3e, _gl3p,                   \
+                                        ({ __done = (_done); __done; }), \
+                                        _code);                         \
+            else                                                        \
+                for ( _k = 0 ; _k < 4 ; _k++ )                          \
+                    increment_ptr_to_guest_entry(_gl3p);                \
+            if ( __done ) break;                                        \
+        }                                                               \
+        sh_unmap_domain_page(_sp);                                     \
+        _sl3mfn = _mfn(mfn_x(_sl3mfn) + 1);                             \
+    }                                                                   \
+} while (0)
+
+#elif GUEST_PAGING_LEVELS == 4
+
+/* 64-bit l3: touch all entries */
+#define SHADOW_FOREACH_L3E(_sl3mfn, _sl3e, _gl3p, _done, _code)        \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l3e_t *_sp = map_shadow_page((_sl3mfn));                     \
+    ASSERT((mfn_to_page(_sl3mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l3_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        (_sl3e) = _sp + _i;                                             \
+        if ( shadow_l3e_get_flags(*(_sl3e)) & _PAGE_PRESENT )           \
+            {_code}                                                     \
+        if ( _done ) break;                                             \
+        increment_ptr_to_guest_entry(_gl3p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+/* 64-bit l4: avoid Xen mappings */
+#define SHADOW_FOREACH_L4E(_sl4mfn, _sl4e, _gl4p, _done, _xen, _code)  \
+do {                                                                    \
+    int _i;                                                             \
+    shadow_l4e_t *_sp = map_shadow_page((_sl4mfn));                     \
+    ASSERT((mfn_to_page(_sl4mfn)->count_info & PGC_SH_type_mask)       \
+           == PGC_SH_l4_64_shadow);                                    \
+    for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
+    {                                                                   \
+        if ( (!(_xen)) || is_guest_l4_slot(_i) )                        \
+        {                                                               \
+            (_sl4e) = _sp + _i;                                         \
+            if ( shadow_l4e_get_flags(*(_sl4e)) & _PAGE_PRESENT )       \
+                {_code}                                                 \
+            if ( _done ) break;                                         \
+        }                                                               \
+        increment_ptr_to_guest_entry(_gl4p);                            \
+    }                                                                   \
+    unmap_shadow_page(_sp);                                             \
+} while (0)
+
+#endif
+
+
+
+/**************************************************************************/
+/* Functions to install Xen mappings and linear mappings in shadow pages */
+
+static mfn_t sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type);
+
+// XXX -- this function should probably be moved to shadow-common.c, but that
+//        probably wants to wait until the shadow types have been moved from
+//        shadow-types.h to shadow-private.h
+//
+#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
+void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
+{
+    struct domain *d = v->domain;
+    shadow_l4e_t *sl4e;
+
+    sl4e = sh_map_domain_page(sl4mfn);
+    ASSERT(sl4e != NULL);
+    ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
+           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] =
+        shadow_l4e_from_mfn(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3)),
+                            __PAGE_HYPERVISOR);
+
+    /* Linear mapping */
+    sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] =
+        shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR);
+    sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] =
+        shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(v->domain) )
+    {
+        /* install domain-specific P2M table */
+        sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] =
+            shadow_l4e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
+                                __PAGE_HYPERVISOR);
+    }
+
+    sh_unmap_domain_page(sl4e);    
+}
+#endif
+
+#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
+// For 3-on-3 PV guests, we need to make sure the xen mappings are in
+// place, which means that we need to populate the l2h entry in the l3
+// table.
+
+void sh_install_xen_entries_in_l2h(struct vcpu *v, 
+                                    mfn_t sl2hmfn)
+{
+    struct domain *d = v->domain;
+    shadow_l2e_t *sl2e;
+    int i;
+
+    sl2e = sh_map_domain_page(sl2hmfn);
+    ASSERT(sl2e != NULL);
+    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
+           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            shadow_l2e_from_mfn(
+                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
+                __PAGE_HYPERVISOR);
+    
+    /* We don't set up a linear mapping here because we can't until this
+     * l2h is installed in an l3e.  sh_update_linear_entries() handles
+     * the linear mappings when the l3 is loaded. */
+
+    if ( shadow_mode_translate(d) )
+    {
+        /* Install the domain-specific p2m table */
+        l3_pgentry_t *p2m;
+        ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
+        p2m = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
+        for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
+        {
+            sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START) + i] =
+                shadow_l2e_from_mfn(_mfn(l3e_get_pfn(p2m[i])),
+                                    __PAGE_HYPERVISOR);
+        }
+        sh_unmap_domain_page(p2m);
+    }
+    
+    sh_unmap_domain_page(sl2e);
+}
+
+void sh_install_xen_entries_in_l3(struct vcpu *v, mfn_t gl3mfn, mfn_t sl3mfn)
+{
+    shadow_l3e_t *sl3e;
+    guest_l3e_t *gl3e = v->arch.guest_vtable;
+    shadow_l3e_t new_sl3e;
+    gfn_t l2gfn;
+    mfn_t l2gmfn, l2smfn;
+    int r;
+
+    ASSERT(!shadow_mode_external(v->domain));
+    ASSERT(guest_l3e_get_flags(gl3e[3]) & _PAGE_PRESENT);
+    l2gfn = guest_l3e_get_gfn(gl3e[3]);
+    l2gmfn = sh_gfn_to_mfn(v->domain, gfn_x(l2gfn));
+    l2smfn = get_shadow_status(v, l2gmfn, PGC_SH_l2h_shadow);
+    if ( !valid_mfn(l2smfn) )
+    {
+        l2smfn = sh_make_shadow(v, l2gmfn, PGC_SH_l2h_shadow);
+    }
+    l3e_propagate_from_guest(v, &gl3e[3], gl3mfn, l2smfn, &new_sl3e,
+                             ft_prefetch);
+    sl3e = sh_map_domain_page(sl3mfn);
+    r = shadow_set_l3e(v, &sl3e[3], new_sl3e, sl3mfn);
+    sh_unmap_domain_page(sl3e);
+}
+#endif
+
+
+#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
+void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
+{
+    struct domain *d = v->domain;
+    shadow_l2e_t *sl2e;
+    int i;
+
+    sl2e = sh_map_domain_page(sl2mfn);
+    ASSERT(sl2e != NULL);
+    ASSERT(sizeof (l2_pgentry_t) == sizeof (shadow_l2e_t));
+    
+    /* Copy the common Xen mappings from the idle domain */
+    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+
+    /* Install the per-domain mappings for this domain */
+    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
+        sl2e[shadow_l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            shadow_l2e_from_mfn(
+                page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i),
+                __PAGE_HYPERVISOR);
+
+    /* Linear mapping */
+    sl2e[shadow_l2_table_offset(LINEAR_PT_VIRT_START)] =
+        shadow_l2e_from_mfn(gl2mfn, __PAGE_HYPERVISOR);
+    sl2e[shadow_l2_table_offset(SH_LINEAR_PT_VIRT_START)] =
+        shadow_l2e_from_mfn(sl2mfn, __PAGE_HYPERVISOR);
+
+    if ( shadow_mode_translate(d) )
+    {
+        /* install domain-specific P2M table */
+        sl2e[shadow_l2_table_offset(RO_MPT_VIRT_START)] =
+            shadow_l2e_from_mfn(pagetable_get_mfn(d->arch.phys_table),
+                                __PAGE_HYPERVISOR);
+    }
+
+    sh_unmap_domain_page(sl2e);
+}
+#endif
+
+
+
+
+
+/**************************************************************************/
+/* Create a shadow of a given guest page.
+ */
+static mfn_t
+sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type)
+{
+    mfn_t smfn = shadow_alloc(v->domain, shadow_type, mfn_x(gmfn));
+    SHADOW_DEBUG(MAKE_SHADOW, "(%05lx, %u)=>%05lx\n",
+                  mfn_x(gmfn), shadow_type, mfn_x(smfn));
+
+    if ( shadow_type != PGC_SH_guest_root_type )
+        /* Lower-level shadow, not yet linked form a higher level */
+        mfn_to_page(smfn)->up = 0;
+
+    // Create the Xen mappings...
+    if ( !shadow_mode_external(v->domain) )
+    {
+        switch (shadow_type) 
+        {
+#if CONFIG_PAGING_LEVELS == 4 && GUEST_PAGING_LEVELS == 4
+        case PGC_SH_l4_shadow:
+            sh_install_xen_entries_in_l4(v, gmfn, smfn); break;
+#endif
+#if CONFIG_PAGING_LEVELS == 3 && GUEST_PAGING_LEVELS == 3
+        case PGC_SH_l3_shadow:
+            sh_install_xen_entries_in_l3(v, gmfn, smfn); break;
+        case PGC_SH_l2h_shadow:
+            sh_install_xen_entries_in_l2h(v, smfn); break;
+#endif
+#if CONFIG_PAGING_LEVELS == 2 && GUEST_PAGING_LEVELS == 2
+        case PGC_SH_l2_shadow:
+            sh_install_xen_entries_in_l2(v, gmfn, smfn); break;
+#endif
+        default: /* Do nothing */ break;
+        }
+    }
+    
+    shadow_promote(v, gmfn, shadow_type);
+    set_shadow_status(v, gmfn, shadow_type, smfn);
+
+    return smfn;
+}
+
+/* Make a splintered superpage shadow */
+static mfn_t
+make_fl1_shadow(struct vcpu *v, gfn_t gfn)
+{
+    mfn_t smfn = shadow_alloc(v->domain, PGC_SH_fl1_shadow,
+                               (unsigned long) gfn_x(gfn));
+
+    SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n",
+                  gfn_x(gfn), mfn_x(smfn));
+
+    set_fl1_shadow_status(v, gfn, smfn);
+    return smfn;
+}
+
+
+#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
+mfn_t
+sh_make_monitor_table(struct vcpu *v)
+{
+
+    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
+    
+#if CONFIG_PAGING_LEVELS == 4    
+    {
+        struct domain *d = v->domain;
+        mfn_t m4mfn;
+        m4mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        sh_install_xen_entries_in_l4(v, m4mfn, m4mfn);
+        /* Remember the level of this table */
+        mfn_to_page(m4mfn)->shadow_flags = 4;
+#if SHADOW_PAGING_LEVELS < 4
+        // Install a monitor l3 table in slot 0 of the l4 table.
+        // This is used for shadow linear maps.
+        {
+            mfn_t m3mfn; 
+            l4_pgentry_t *l4e;
+            m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+            mfn_to_page(m3mfn)->shadow_flags = 3;
+            l4e = sh_map_domain_page(m4mfn);
+            l4e[0] = l4e_from_pfn(mfn_x(m3mfn), __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(l4e);
+        }
+#endif /* SHADOW_PAGING_LEVELS < 4 */
+        return m4mfn;
+    }
+
+#elif CONFIG_PAGING_LEVELS == 3
+
+    {
+        struct domain *d = v->domain;
+        mfn_t m3mfn, m2mfn; 
+        l3_pgentry_t *l3e;
+        l2_pgentry_t *l2e;
+        int i;
+
+        m3mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        /* Remember the level of this table */
+        mfn_to_page(m3mfn)->shadow_flags = 3;
+
+        // Install a monitor l2 table in slot 3 of the l3 table.
+        // This is used for all Xen entries, including linear maps
+        m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        mfn_to_page(m2mfn)->shadow_flags = 2;
+        l3e = sh_map_domain_page(m3mfn);
+        l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
+        sh_install_xen_entries_in_l2h(v, m2mfn);
+        /* Install the monitor's own linear map */
+        l2e = sh_map_domain_page(m2mfn);
+        for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
+                (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 
+                : l2e_empty();
+        sh_unmap_domain_page(l2e);
+        sh_unmap_domain_page(l3e);
+
+        SHADOW_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
+        return m3mfn;
+    }
+
+#elif CONFIG_PAGING_LEVELS == 2
+
+    {
+        struct domain *d = v->domain;
+        mfn_t m2mfn;
+        m2mfn = shadow_alloc(d, PGC_SH_monitor_table, 0);
+        sh_install_xen_entries_in_l2(v, m2mfn, m2mfn);
+        /* Remember the level of this table */
+        mfn_to_page(m2mfn)->shadow_flags = 2;
+        return m2mfn;
+    }
+
+#else
+#error this should not happen
+#endif /* CONFIG_PAGING_LEVELS */
+}
+#endif /* SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS */
+
+/**************************************************************************/
+/* These functions also take a virtual address and return the level-N
+ * shadow table mfn and entry, but they create the shadow pagetables if
+ * they are needed.  The "demand" argument is non-zero when handling
+ * a demand fault (so we know what to do about accessed bits &c).
+ * If the necessary tables are not present in the guest, they return NULL. */
+#if GUEST_PAGING_LEVELS >= 4
+static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl4mfn)
+{
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl4mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* Reading the top level table is always valid. */
+    return sh_linear_l4_table(v) + shadow_l4_linear_offset(gw->va);
+}
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+
+
+#if GUEST_PAGING_LEVELS >= 3
+static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl3mfn,
+                                                fetch_type_t ft)
+{
+#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
+    mfn_t sl4mfn;
+    shadow_l4e_t *sl4e;
+    if ( !valid_mfn(gw->l3mfn) ) return NULL; /* No guest page. */
+    /* Get the l4e */
+    sl4e = shadow_get_and_create_l4e(v, gw, &sl4mfn);
+    ASSERT(sl4e != NULL);
+    if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
+    {
+        *sl3mfn = shadow_l4e_get_mfn(*sl4e);
+        ASSERT(valid_mfn(*sl3mfn));
+    } 
+    else 
+    {
+        int r;
+        shadow_l4e_t new_sl4e;
+        /* No l3 shadow installed: find and install it. */
+        *sl3mfn = get_shadow_status(v, gw->l3mfn, PGC_SH_l3_shadow);
+        if ( !valid_mfn(*sl3mfn) ) 
+        {
+            /* No l3 shadow of this page exists at all: make one. */
+            *sl3mfn = sh_make_shadow(v, gw->l3mfn, PGC_SH_l3_shadow);
+        }
+        /* Install the new sl3 table in the sl4e */
+        l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, 
+                                 *sl3mfn, &new_sl4e, ft);
+        r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
+#else /* PAE... */
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl3mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* This next line is important: the shadow l3 table is in an 8k
+     * shadow and we need to return the right mfn of the pair. This call
+     * will set it for us as a side-effect. */
+    (void) shadow_l3_index(sl3mfn, guest_index(gw->l3e));
+    ASSERT(v->arch.shadow_vtable);
+    return ((shadow_l3e_t *)v->arch.shadow_vtable) 
+        + shadow_l3_table_offset(gw->va);
+#endif /* GUEST_PAGING_LEVELS >= 4 */
+}
+#endif /* GUEST_PAGING_LEVELS >= 3 */
+
+
+static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl2mfn,
+                                                fetch_type_t ft)
+{
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64bit... */
+    mfn_t sl3mfn = _mfn(INVALID_MFN);
+    shadow_l3e_t *sl3e;
+    if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
+    /* Get the l3e */
+    sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
+    ASSERT(sl3e != NULL);  /* Since we know guest PT is valid this far */
+    if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
+    {
+        *sl2mfn = shadow_l3e_get_mfn(*sl3e);
+        ASSERT(valid_mfn(*sl2mfn));
+    } 
+    else 
+    {
+        int r;
+        shadow_l3e_t new_sl3e;
+        /* No l2 shadow installed: find and install it. */
+        *sl2mfn = get_shadow_status(v, gw->l2mfn, PGC_SH_l2_shadow);
+        if ( !valid_mfn(*sl2mfn) ) 
+        {
+            /* No l2 shadow of this page exists at all: make one. */
+            *sl2mfn = sh_make_shadow(v, gw->l2mfn, PGC_SH_l2_shadow);
+        }
+        /* Install the new sl2 table in the sl3e */
+        l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, 
+                                 *sl2mfn, &new_sl3e, ft);
+        r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);
+#if GUEST_PAGING_LEVELS == 3 
+        /* Need to sync up the linear maps, as we are about to use them */
+        ASSERT( r & SHADOW_SET_L3PAE_RECOPY );
+        sh_pae_recopy(v->domain);
+#endif
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#else /* 32bit... */
+    /* There is always a shadow of the top level table.  Get it. */
+    *sl2mfn = pagetable_get_mfn(v->arch.shadow_table);
+    /* This next line is important: the guest l2 has a 16k
+     * shadow, we need to return the right mfn of the four. This
+     * call will set it for us as a side-effect. */
+    (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
+    /* Reading the top level table is always valid. */
+    return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
+#endif 
+}
+
+
+static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v, 
+                                                walk_t *gw, 
+                                                mfn_t *sl1mfn,
+                                                fetch_type_t ft)
+{
+    mfn_t sl2mfn;
+    shadow_l2e_t *sl2e;
+
+    /* Get the l2e */
+    sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
+    if ( sl2e == NULL ) return NULL;
+    if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
+    {
+        *sl1mfn = shadow_l2e_get_mfn(*sl2e);
+        ASSERT(valid_mfn(*sl1mfn));
+    } 
+    else 
+    {
+        shadow_l2e_t new_sl2e;
+        int r, flags = guest_l2e_get_flags(*gw->l2e);
+        /* No l1 shadow installed: find and install it. */
+        if ( !(flags & _PAGE_PRESENT) )
+            return NULL; /* No guest page. */
+        if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) 
+        {
+            /* Splintering a superpage */
+            gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
+            *sl1mfn = get_fl1_shadow_status(v, l2gfn);
+            if ( !valid_mfn(*sl1mfn) ) 
+            {
+                /* No fl1 shadow of this superpage exists at all: make one. */
+                *sl1mfn = make_fl1_shadow(v, l2gfn);
+            }
+        } 
+        else 
+        {
+            /* Shadowing an actual guest l1 table */
+            if ( !valid_mfn(gw->l2mfn) ) return NULL; /* No guest page. */
+            *sl1mfn = get_shadow_status(v, gw->l1mfn, PGC_SH_l1_shadow);
+            if ( !valid_mfn(*sl1mfn) ) 
+            {
+                /* No l1 shadow of this page exists at all: make one. */
+                *sl1mfn = sh_make_shadow(v, gw->l1mfn, PGC_SH_l1_shadow);
+            }
+        }
+        /* Install the new sl1 table in the sl2e */
+        l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, 
+                                 *sl1mfn, &new_sl2e, ft);
+        r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
+        ASSERT((r & SHADOW_SET_FLUSH) == 0);        
+        /* This next line is important: in 32-on-PAE and 32-on-64 modes,
+         * the guest l1 table has an 8k shadow, and we need to return
+         * the right mfn of the pair. This call will set it for us as a
+         * side-effect.  (In all other cases, it's a no-op and will be
+         * compiled out.) */
+        (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
+    }
+    /* Now follow it down a level.  Guaranteed to succeed. */
+    return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
+}
+
+
+
+/**************************************************************************/
+/* Destructors for shadow tables: 
+ * Unregister the shadow, decrement refcounts of any entries present in it,
+ * and release the memory.
+ *
+ * N.B. These destructors do not clear the contents of the shadows.
+ *      This allows us to delay TLB shootdowns until the page is being reused.
+ *      See shadow_alloc() and shadow_free() for how this is handled.
+ */
+
+#if GUEST_PAGING_LEVELS >= 4
+void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l4e_t *sl4e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl4mfn;
+    int xen_mappings;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l4_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+
+    /* Decrement refcounts of all the old entries */
+    xen_mappings = (!shadow_mode_external(v->domain));
+    sl4mfn = smfn; 
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
+        if ( shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT ) 
+        {
+            sh_put_ref(v, shadow_l4e_get_mfn(*sl4e),
+                        (((paddr_t)mfn_x(sl4mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl4e & ~PAGE_MASK));
+        }
+    });
+    
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+#endif    
+
+#if GUEST_PAGING_LEVELS >= 3
+void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l3e_t *sl3e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl3mfn;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l3_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+#if GUEST_PAGING_LEVELS == 3
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+#endif
+
+    /* Decrement refcounts of all the old entries */
+    sl3mfn = smfn; 
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
+        if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l3e_get_mfn(*sl3e),
+                        (((paddr_t)mfn_x(sl3mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl3e & ~PAGE_MASK));
+    });
+
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+#endif    
+
+
+#if GUEST_PAGING_LEVELS == 3
+static void sh_destroy_l3_subshadow(struct vcpu *v, 
+                                     shadow_l3e_t *sl3e)
+/* Tear down just a single 4-entry l3 on a 2-page l3 shadow. */
+{
+    int i;
+    ASSERT((unsigned long)sl3e % (4 * sizeof (shadow_l3e_t)) == 0); 
+    for ( i = 0; i < GUEST_L3_PAGETABLE_ENTRIES; i++ ) 
+        if ( shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l3e_get_mfn(sl3e[i]),
+                        maddr_from_mapped_domain_page(sl3e));
+}
+#endif
+
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+void sh_unpin_all_l3_subshadows(struct vcpu *v, mfn_t smfn)
+/* Walk a full PAE l3 shadow, unpinning all of the subshadows on it */
+{
+    int i, j;
+    struct pae_l3_bookkeeping *bk;
+    
+    ASSERT((mfn_to_page(smfn)->count_info & PGC_SH_type_mask) 
+           == PGC_SH_l3_pae_shadow);
+    /* The subshadows are split, 64 on each page of the shadow */
+    for ( i = 0; i < 2; i++ ) 
+    {
+        void *p = sh_map_domain_page(_mfn(mfn_x(smfn) + i));
+        for ( j = 0; j < 64; j++ )
+        {
+            /* Every second 32-byte region is a bookkeeping entry */
+            bk = (struct pae_l3_bookkeeping *)(p + (64 * j) + 32);
+            if ( bk->pinned )
+                sh_unpin_l3_subshadow(v, (shadow_l3e_t *)(p + (64*j)), smfn);
+            /* Check whether we've just freed the whole shadow */
+            if ( (mfn_to_page(smfn)->count_info & PGC_SH_count_mask) == 0 ) 
+            {
+                sh_unmap_domain_page(p);
+                return;
+            }
+        }
+        sh_unmap_domain_page(p);
+    }
+}
+#endif
+
+void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
+{
+    shadow_l2e_t *sl2e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+    mfn_t gmfn, sl2mfn;
+    int xen_mappings;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l2_shadow 
+           || t == PGC_SH_l2h_pae_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+    delete_shadow_status(v, gmfn, t, smfn);
+    shadow_demote(v, gmfn, t);
+#if GUEST_PAGING_LEVELS == 2
+    /* Take this shadow off the list of root shadows */
+    list_del_init(&mfn_to_page(smfn)->list);
+#endif
+
+    /* Decrement refcounts of all the old entries */
+    sl2mfn = smfn;
+    xen_mappings = (!shadow_mode_external(v->domain) &&
+                    ((GUEST_PAGING_LEVELS == 2) ||
+                     ((GUEST_PAGING_LEVELS == 3) &&
+                      (t == PGC_SH_l2h_pae_shadow))));
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT ) 
+            sh_put_ref(v, shadow_l2e_get_mfn(*sl2e),
+                        (((paddr_t)mfn_x(sl2mfn)) << PAGE_SHIFT) 
+                        | ((unsigned long)sl2e & ~PAGE_MASK));
+    });
+
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+
+void sh_destroy_l1_shadow(struct vcpu *v, mfn_t smfn)
+{
+    struct domain *d = v->domain;
+    shadow_l1e_t *sl1e;
+    u32 t = mfn_to_page(smfn)->count_info & PGC_SH_type_mask;
+
+    SHADOW_DEBUG(DESTROY_SHADOW,
+                  "%s(%05lx)\n", __func__, mfn_x(smfn));
+    ASSERT(t == PGC_SH_l1_shadow || t == PGC_SH_fl1_shadow);
+
+    /* Record that the guest page isn't shadowed any more (in this type) */
+    if ( t == PGC_SH_fl1_shadow )
+    {
+        gfn_t gfn = _gfn(mfn_to_page(smfn)->u.inuse.type_info);
+        delete_fl1_shadow_status(v, gfn, smfn);
+    }
+    else 
+    {
+        mfn_t gmfn = _mfn(mfn_to_page(smfn)->u.inuse.type_info);
+        delete_shadow_status(v, gmfn, t, smfn);
+        shadow_demote(v, gmfn, t);
+    }
+    
+    if ( shadow_mode_refcounts(d) )
+    {
+        /* Decrement refcounts of all the old entries */
+        mfn_t sl1mfn = smfn; 
+        SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
+            if ( shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT ) 
+                shadow_put_page_from_l1e(*sl1e, d);
+        });
+    }
+    
+    /* Put the memory back in the pool */
+    shadow_free(v->domain, smfn);
+}
+
+#if SHADOW_PAGING_LEVELS == GUEST_PAGING_LEVELS
+void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
+{
+    struct domain *d = v->domain;
+    ASSERT((mfn_to_page(mmfn)->count_info & PGC_SH_type_mask)
+           == PGC_SH_monitor_table);
+
+#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
+    /* Need to destroy the l3 monitor page in slot 0 too */
+    {
+        l4_pgentry_t *l4e = sh_map_domain_page(mmfn);
+        ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
+        shadow_free(d, _mfn(l4e_get_pfn(l4e[0])));
+        sh_unmap_domain_page(l4e);
+    }
+#elif CONFIG_PAGING_LEVELS == 3
+    /* Need to destroy the l2 monitor page in slot 4 too */
+    {
+        l3_pgentry_t *l3e = sh_map_domain_page(mmfn);
+        ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+        shadow_free(d, _mfn(l3e_get_pfn(l3e[3])));
+        sh_unmap_domain_page(l3e);
+    }
+#endif
+
+    /* Put the memory back in the pool */
+    shadow_free(d, mmfn);
+}
+#endif
+
+/**************************************************************************/
+/* Functions to destroy non-Xen mappings in a pagetable hierarchy.
+ * These are called from common code when we are running out of shadow
+ * memory, and unpinning all the top-level shadows hasn't worked. 
+ *
+ * This implementation is pretty crude and slow, but we hope that it won't 
+ * be called very often. */
+
+#if GUEST_PAGING_LEVELS == 2
+
+void sh_unhook_32b_mappings(struct vcpu *v, mfn_t sl2mfn)
+{    
+    shadow_l2e_t *sl2e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, xen_mappings, {
+        (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+    });
+}
+
+#elif GUEST_PAGING_LEVELS == 3
+
+void sh_unhook_pae_mappings(struct vcpu *v, mfn_t sl3mfn)
+/* Walk a full PAE l3 shadow, unhooking entries from all the subshadows */
+{
+    shadow_l3e_t *sl3e;
+    SHADOW_FOREACH_L3E(sl3mfn, sl3e, 0, 0, {
+        if ( (shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) {
+            mfn_t sl2mfn = shadow_l3e_get_mfn(*sl3e);
+            if ( (mfn_to_page(sl2mfn)->count_info & PGC_SH_type_mask) 
+                 == PGC_SH_l2h_pae_shadow ) 
+            {
+                /* High l2: need to pick particular l2es to unhook */
+                shadow_l2e_t *sl2e;
+                SHADOW_FOREACH_L2E(sl2mfn, sl2e, 0, 0, 1, {
+                    (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
+                });
+            }
+            else
+            {
+                /* Normal l2: can safely unhook the whole l3e */
+                (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
+            }
+        }
+    });
+    /* We've changed PAE L3 entries: must sync up various copies of them */
+    sh_pae_recopy(v->domain);
+}
+
+#elif GUEST_PAGING_LEVELS == 4
+
+void sh_unhook_64b_mappings(struct vcpu *v, mfn_t sl4mfn)
+{
+    shadow_l4e_t *sl4e;
+    int xen_mappings = !shadow_mode_external(v->domain);
+    SHADOW_FOREACH_L4E(sl4mfn, sl4e, 0, 0, xen_mappings, {
+        (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
+    });
+}
+
+#endif
+
+/**************************************************************************/
+/* Internal translation functions.
+ * These functions require a pointer to the shadow entry that will be updated.
+ */
+
+/* These functions take a new guest entry, translate it to shadow and write 
+ * the shadow entry.
+ *
+ * They return the same bitmaps as the shadow_set_lXe() functions.
+ */
+
+#if GUEST_PAGING_LEVELS >= 4
+static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
+{
+    shadow_l4e_t new_sl4e;
+    guest_l4e_t *new_gl4e = new_ge;
+    shadow_l4e_t *sl4p = se;
+    mfn_t sl3mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl4e_calls);
+
+    if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
+    {
+        gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
+        mfn_t gl3mfn = vcpu_gfn_to_mfn(v, gl3gfn);
+        if ( valid_mfn(gl3mfn) )
+            sl3mfn = get_shadow_status(v, gl3mfn, PGC_SH_l3_shadow);
+        else
+            result |= SHADOW_SET_ERROR;
+    }
+    l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
+                             sl3mfn, &new_sl4e, ft_prefetch);
+    result |= shadow_set_l4e(v, sl4p, new_sl4e, sl4mfn);
+    return result;
+}
+#endif // GUEST_PAGING_LEVELS >= 4
+
+#if GUEST_PAGING_LEVELS >= 3
+static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
+{
+    shadow_l3e_t new_sl3e;
+    guest_l3e_t *new_gl3e = new_ge;
+    shadow_l3e_t *sl3p = se;
+    mfn_t sl2mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl3e_calls);
+
+    if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
+    {
+        gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
+        mfn_t gl2mfn = vcpu_gfn_to_mfn(v, gl2gfn);
+        if ( valid_mfn(gl2mfn) )
+            sl2mfn = get_shadow_status(v, gl2mfn, PGC_SH_l2_shadow);
+        else
+            result |= SHADOW_SET_ERROR;
+    }
+    l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), 
+                             sl2mfn, &new_sl3e, ft_prefetch);
+    result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
+
+#if GUEST_PAGING_LEVELS == 3
+    /* We have changed a PAE l3 entry: need to sync up the possible copies 
+     * of it */
+    if ( result & SHADOW_SET_L3PAE_RECOPY )
+        sh_pae_recopy(v->domain);
+#endif
+
+    return result;
+}
+#endif // GUEST_PAGING_LEVELS >= 3
+
+static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
+{
+    shadow_l2e_t new_sl2e;
+    guest_l2e_t *new_gl2e = new_ge;
+    shadow_l2e_t *sl2p = se;
+    mfn_t sl1mfn = _mfn(INVALID_MFN);
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl2e_calls);
+
+    if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
+    {
+        gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
+        if ( guest_supports_superpages(v) &&
+             (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
+        {
+            // superpage -- need to look up the shadow L1 which holds the
+            // splitters...
+            sl1mfn = get_fl1_shadow_status(v, gl1gfn);
+#if 0
+            // XXX - it's possible that we want to do some kind of prefetch
+            // for superpage fl1's here, but this is *not* on the demand path,
+            // so we'll hold off trying that for now...
+            //
+            if ( !valid_mfn(sl1mfn) )
+                sl1mfn = make_fl1_shadow(v, gl1gfn);
+#endif
+        }
+        else
+        {
+            mfn_t gl1mfn = vcpu_gfn_to_mfn(v, gl1gfn);
+            if ( valid_mfn(gl1mfn) )
+                sl1mfn = get_shadow_status(v, gl1mfn, PGC_SH_l1_shadow);
+            else
+                result |= SHADOW_SET_ERROR;
+        }
+    }
+    l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
+                             sl1mfn, &new_sl2e, ft_prefetch);
+    result |= shadow_set_l2e(v, sl2p, new_sl2e, sl2mfn);
+
+    return result;
+}
+
+static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
+{
+    shadow_l1e_t new_sl1e;
+    guest_l1e_t *new_gl1e = new_ge;
+    shadow_l1e_t *sl1p = se;
+    gfn_t gfn;
+    mfn_t mfn;
+    int result = 0;
+
+    perfc_incrc(shadow_validate_gl1e_calls);
+
+    gfn = guest_l1e_get_gfn(*new_gl1e);
+    mfn = vcpu_gfn_to_mfn(v, gfn);
+
+    l1e_propagate_from_guest(v, *new_gl1e, &new_sl1e, 
+                             /* mmio? */ !valid_mfn(mfn));
+    
+    result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
+    return result;
+}
+
+
+/**************************************************************************/
+/* Functions which translate and install a the shadows of arbitrary guest 
+ * entries that we have just seen the guest write. */
+
+
+static inline int 
+sh_map_and_validate(struct vcpu *v, mfn_t gmfn,
+                     void *new_gp, u32 size, u32 sh_type, 
+                     u32 (*shadow_index)(mfn_t *smfn, u32 idx),
+                     int (*validate_ge)(struct vcpu *v, void *ge, 
+                                        mfn_t smfn, void *se))
+/* Generic function for mapping and validating. */
+{
+    mfn_t smfn, smfn2, map_mfn;
+    shadow_l1e_t *sl1p;
+    u32 shadow_idx, guest_idx;
+    int result = 0;
+
+    /* Align address and size to guest entry boundaries */
+    size += (unsigned long)new_gp & (sizeof (guest_l1e_t) - 1);
+    new_gp = (void *)((unsigned long)new_gp & ~(sizeof (guest_l1e_t) - 1));
+    size = (size + sizeof (guest_l1e_t) - 1) & ~(sizeof (guest_l1e_t) - 1);
+    ASSERT(size + (((unsigned long)new_gp) & ~PAGE_MASK) <= PAGE_SIZE);
+
+    /* Map the shadow page */
+    smfn = get_shadow_status(v, gmfn, sh_type);
+    ASSERT(valid_mfn(smfn)); /* Otherwise we would not have been called */
+    guest_idx = guest_index(new_gp);
+    map_mfn = smfn;
+    shadow_idx = shadow_index(&map_mfn, guest_idx);
+    sl1p = map_shadow_page(map_mfn);
+
+    /* Validate one entry at a time */
+    while ( size )
+    {
+        smfn2 = smfn;
+        guest_idx = guest_index(new_gp);
+        shadow_idx = shadow_index(&smfn2, guest_idx);
+        if ( mfn_x(smfn2) != mfn_x(map_mfn) )
+        {
+            /* We have moved to another page of the shadow */
+            map_mfn = smfn2;
+            unmap_shadow_page(sl1p);
+            sl1p = map_shadow_page(map_mfn);
+        }
+        result |= validate_ge(v,
+                              new_gp,
+                              map_mfn,
+                              &sl1p[shadow_idx]);
+        size -= sizeof(guest_l1e_t);
+        new_gp += sizeof(guest_l1e_t);
+    }
+    unmap_shadow_page(sl1p);
+    return result;
+}
+
+
+int
+sh_map_and_validate_gl4e(struct vcpu *v, mfn_t gl4mfn,
+                          void *new_gl4p, u32 size)
+{
+#if GUEST_PAGING_LEVELS >= 4
+    return sh_map_and_validate(v, gl4mfn, new_gl4p, size, 
+                                PGC_SH_l4_shadow, 
+                                shadow_l4_index, 
+                                validate_gl4e);
+#else // ! GUEST_PAGING_LEVELS >= 4
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif 
+}
+    
+int
+sh_map_and_validate_gl3e(struct vcpu *v, mfn_t gl3mfn,
+                          void *new_gl3p, u32 size)
+{
+#if GUEST_PAGING_LEVELS >= 3
+    return sh_map_and_validate(v, gl3mfn, new_gl3p, size, 
+                                PGC_SH_l3_shadow, 
+                                shadow_l3_index, 
+                                validate_gl3e);
+#else // ! GUEST_PAGING_LEVELS >= 3
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif
+}
+
+int
+sh_map_and_validate_gl2e(struct vcpu *v, mfn_t gl2mfn,
+                          void *new_gl2p, u32 size)
+{
+    return sh_map_and_validate(v, gl2mfn, new_gl2p, size, 
+                                PGC_SH_l2_shadow, 
+                                shadow_l2_index, 
+                                validate_gl2e);
+}
+
+int
+sh_map_and_validate_gl2he(struct vcpu *v, mfn_t gl2mfn,
+                           void *new_gl2p, u32 size)
+{
+#if GUEST_PAGING_LEVELS == 3
+    return sh_map_and_validate(v, gl2mfn, new_gl2p, size, 
+                                PGC_SH_l2h_shadow, 
+                                shadow_l2_index, 
+                                validate_gl2e);
+#else /* Non-PAE guests don't have different kinds of l2 table */
+    SHADOW_PRINTK("called in wrong paging mode!\n");
+    BUG();
+    return 0;
+#endif
+}
+
+int
+sh_map_and_validate_gl1e(struct vcpu *v, mfn_t gl1mfn,
+                          void *new_gl1p, u32 size)
+{
+    return sh_map_and_validate(v, gl1mfn, new_gl1p, size, 
+                                PGC_SH_l1_shadow, 
+                                shadow_l1_index, 
+                                validate_gl1e);
+}
+
+
+/**************************************************************************/
+/* Optimization: If we see two emulated writes of zeros to the same
+ * page-table without another kind of page fault in between, we guess
+ * that this is a batch of changes (for process destruction) and
+ * unshadow the page so we don't take a pagefault on every entry.  This
+ * should also make finding writeable mappings of pagetables much
+ * easier. */
+
+/* Look to see if this is the second emulated write in a row to this
+ * page, and unshadow/unhook if it is */
+static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) &&
+         sh_mfn_is_a_page_table(gmfn) )
+    {
+        u32 flags = mfn_to_page(gmfn)->shadow_flags;
+        mfn_t smfn;
+        if ( !(flags & (SHF_L2_32|SHF_L3_PAE|SHF_L4_64)) )
+        {
+            perfc_incrc(shadow_early_unshadow);
+            sh_remove_shadows(v, gmfn, 0 /* Can fail to unshadow */ );
+            return;
+        }
+        /* SHF_unhooked_mappings is set to make sure we only unhook
+         * once in a single batch of updates. It is reset when this
+         * top-level page is loaded into CR3 again */
+        if ( !(flags & SHF_unhooked_mappings) ) 
+        {
+            perfc_incrc(shadow_early_unshadow_top);
+            mfn_to_page(gmfn)->shadow_flags |= SHF_unhooked_mappings;
+            if ( flags & SHF_L2_32 )
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l2_32_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L3_PAE ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l3_pae_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+            if ( flags & SHF_L4_64 ) 
+            {
+                smfn = get_shadow_status(v, gmfn, PGC_SH_l4_64_shadow);
+                shadow_unhook_mappings(v, smfn);
+            }
+        }
+    }
+    v->arch.shadow.last_emulated_mfn = mfn_x(gmfn);
+#endif
+}
+
+/* Stop counting towards early unshadows, as we've seen a real page fault */
+static inline void reset_early_unshadow(struct vcpu *v)
+{
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    v->arch.shadow.last_emulated_mfn = INVALID_MFN;
+#endif
+}
+
+
+
+/**************************************************************************/
+/* Entry points into the shadow code */
+
+/* Called from pagefault handler in Xen, and from the HVM trap handlers
+ * for pagefaults.  Returns 1 if this fault was an artefact of the
+ * shadow code (and the guest should retry) or 0 if it is not (and the
+ * fault should be handled elsewhere or passed to the guest). */
+
+static int sh_page_fault(struct vcpu *v, 
+                          unsigned long va, 
+                          struct cpu_user_regs *regs)
+{
+    struct domain *d = v->domain;
+    walk_t gw;
+    u32 accumulated_gflags;
+    gfn_t gfn;
+    mfn_t gmfn, sl1mfn=_mfn(0);
+    shadow_l1e_t sl1e, *ptr_sl1e;
+    paddr_t gpa;
+    struct cpu_user_regs emul_regs;
+    struct x86_emulate_ctxt emul_ctxt;
+    int r, mmio;
+    fetch_type_t ft = 0;
+
+    //
+    // XXX: Need to think about eventually mapping superpages directly in the
+    //      shadow (when possible), as opposed to splintering them into a
+    //      bunch of 4K maps.
+    //
+
+    SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
+                   v->domain->domain_id, v->vcpu_id, va, regs->error_code);
+    
+    shadow_lock(d);
+
+    shadow_audit_tables(v);
+                   
+    if ( guest_walk_tables(v, va, &gw, 1) != 0 )
+    {
+        SHADOW_PRINTK("malformed guest pagetable!");
+        print_gw(&gw);
+    }
+
+    sh_audit_gw(v, &gw);
+
+    // We do not look at the gw->l1e, as that will not exist for superpages.
+    // Instead, we use the gw->eff_l1e...
+    //
+    // We need not check all the levels of the guest page table entries for
+    // present vs not-present, as the eff_l1e will always be not present if
+    // one of the higher level entries is not present.
+    //
+    if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
+    {
+        if ( hvm_guest(v) && !shadow_vcpu_mode_translate(v) )
+        {
+            /* Not present in p2m map, means this is mmio */
+            gpa = va;
+            goto mmio;
+        }
+
+        perfc_incrc(shadow_fault_bail_not_present);
+        goto not_a_shadow_fault;
+    }
+
+    // All levels of the guest page table are now known to be present.
+    accumulated_gflags = accumulate_guest_flags(&gw);
+
+    // Check for attempts to access supervisor-only pages from user mode,
+    // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
+    // code.
+    //
+    if ( (regs->error_code & PFEC_user_mode) &&
+         !(accumulated_gflags & _PAGE_USER) )
+    {
+        /* illegal user-mode access to supervisor-only page */
+        perfc_incrc(shadow_fault_bail_user_supervisor);
+        goto not_a_shadow_fault;
+    }
+
+    // Was it a write fault?
+    //
+    if ( regs->error_code & PFEC_write_access )
+    {
+        if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
+        {
+            perfc_incrc(shadow_fault_bail_ro_mapping);
+            goto not_a_shadow_fault;
+        }
+    }
+    else // must have been either an insn fetch or read fault
+    {
+        // Check for NX bit violations: attempts to execute code that is
+        // marked "do not execute".  Such errors are not caused or dealt with
+        // by the shadow code.
+        //
+        if ( regs->error_code & PFEC_insn_fetch )
+        {
+            if ( accumulated_gflags & _PAGE_NX_BIT )
+            {
+                /* NX prevented this code fetch */
+                perfc_incrc(shadow_fault_bail_nx);
+                goto not_a_shadow_fault;
+            }
+        }
+    }
+
+    /* Is this an MMIO access? */
+    gfn = guest_l1e_get_gfn(gw.eff_l1e);
+    mmio = ( hvm_guest(v) 
+             && shadow_vcpu_mode_translate(v) 
+             && mmio_space(gfn_to_paddr(gfn)) );
+
+    /* For MMIO, the shadow holds the *gfn*; for normal accesses, if holds 
+     * the equivalent mfn. */
+    if ( mmio ) 
+        gmfn = _mfn(gfn_x(gfn));
+    else
+    {
+        gmfn = vcpu_gfn_to_mfn(v, gfn);
+        if ( !valid_mfn(gmfn) )
+        {
+            perfc_incrc(shadow_fault_bail_bad_gfn);
+            SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", 
+                           gfn_x(gfn), mfn_x(gmfn));
+            goto not_a_shadow_fault;
+        }
+    }
+
+    /* Make sure there is enough free shadow memory to build a chain of
+     * shadow tables: one SHADOW_MAX_ORDER chunk will always be enough
+     * to allocate all we need.  (We never allocate a top-level shadow
+     * on this path, only a 32b l1, pae l2+1 or 64b l3+2+1) */
+    shadow_prealloc(d, SHADOW_MAX_ORDER);
+
+    /* Acquire the shadow.  This must happen before we figure out the rights 
+     * for the shadow entry, since we might promote a page here. */
+    // XXX -- this code will need to change somewhat if/when the shadow code
+    // can directly map superpages...
+    ft = ((regs->error_code & PFEC_write_access) ?
+          ft_demand_write : ft_demand_read);
+    ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
+    ASSERT(ptr_sl1e);
+
+    /* Calculate the shadow entry */
+    if ( ft == ft_demand_write )
+    {
+        if ( l1e_write_fault(v, &gw, gmfn, &sl1e, mmio) )
+        {
+            perfc_incrc(shadow_fault_emulate_write);
+            goto emulate;
+        }
+    }
+    else if ( l1e_read_fault(v, &gw, gmfn, &sl1e, mmio) )
+    {
+        perfc_incrc(shadow_fault_emulate_read);
+        goto emulate;
+    }
+
+    /* Quick sanity check: we never make an MMIO entry that's got the 
+     * _PAGE_PRESENT flag set in it. */
+    ASSERT(!mmio || !(shadow_l1e_get_flags(sl1e) & _PAGE_PRESENT));
+
+    r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
+
+    if ( mmio ) 
+    {
+        gpa = guest_walk_to_gpa(&gw);
+        goto mmio;
+    }
+
+#if 0
+    if ( !(r & SHADOW_SET_CHANGED) )
+        debugtrace_printk("%s: shadow_set_l1e(va=%p, sl1e=%" SH_PRI_pte
+                          ") did not change anything\n",
+                          __func__, gw.va, l1e_get_intpte(sl1e));
+#endif
+
+    perfc_incrc(shadow_fault_fixed);
+    d->arch.shadow.fault_count++;
+    reset_early_unshadow(v);
+
+ done:
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("fixed\n");
+    shadow_audit_tables(v);
+    shadow_unlock(d);
+    return EXCRET_fault_fixed;
+
+ emulate:
+
+    /* Take the register set we were called with */
+    emul_regs = *regs;
+    if ( hvm_guest(v) )
+    {
+        /* Add the guest's segment selectors, rip, rsp. rflags */ 
+        hvm_store_cpu_guest_regs(v, &emul_regs, NULL);
+    }
+    emul_ctxt.regs = &emul_regs;
+    emul_ctxt.cr2 = va;
+    emul_ctxt.mode = hvm_guest(v) ? hvm_guest_x86_mode(v) : X86EMUL_MODE_HOST;
+
+    SHADOW_PRINTK("emulate: eip=%#lx\n", emul_regs.eip);
+
+    v->arch.shadow.propagate_fault = 0;
+    if ( x86_emulate_memop(&emul_ctxt, &shadow_emulator_ops) )
+    {
+        SHADOW_PRINTK("emulator failure, unshadowing mfn %#lx\n", 
+                       mfn_x(gmfn));
+        perfc_incrc(shadow_fault_emulate_failed);
+        /* If this is actually a page table, then we have a bug, and need 
+         * to support more operations in the emulator.  More likely, 
+         * though, this is a hint that this page should not be shadowed. */
+        shadow_remove_all_shadows(v, gmfn);
+        /* This means that actual missing operations will cause the 
+         * guest to loop on the same page fault. */
+        goto done;
+    }
+    if ( v->arch.shadow.propagate_fault )
+    {
+        /* Emulation triggered another page fault */
+        goto not_a_shadow_fault;
+    }
+
+    /* Emulator has changed the user registers: write back */
+    if ( hvm_guest(v) )
+    {
+        /* Write back the guest's segment selectors, rip, rsp. rflags */ 
+        hvm_load_cpu_guest_regs(v, &emul_regs);
+        /* And don't overwrite those in the caller's regs. */
+        emul_regs.eip = regs->eip;
+        emul_regs.cs = regs->cs;
+        emul_regs.eflags = regs->eflags;
+        emul_regs.esp = regs->esp;
+        emul_regs.ss = regs->ss;
+        emul_regs.es = regs->es;
+        emul_regs.ds = regs->ds;
+        emul_regs.fs = regs->fs;
+        emul_regs.gs = regs->gs;
+    }
+    *regs = emul_regs;
+
+    goto done;
+
+ mmio:
+    perfc_incrc(shadow_fault_mmio);
+    if ( !hvm_apic_support(d) && (gpa >= 0xFEC00000) )
+    {
+        /* Need to deal with these disabled-APIC accesses, as
+         * handle_mmio() apparently does not currently do that. */
+        /* TJD: What about it, then?   For now, I'm turning this BUG() 
+         * into a domain_crash() since we don't want to kill Xen. */
+        SHADOW_ERROR("disabled-APIC access: not supported\n.");
+        domain_crash(d); 
+    }
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("mmio\n");
+    shadow_audit_tables(v);
+    reset_early_unshadow(v);
+    shadow_unlock(d);
+    sh_log_mmio(v, gpa);
+    handle_mmio(va, gpa);
+    return EXCRET_fault_fixed;
+
+ not_a_shadow_fault:
+    sh_audit_gw(v, &gw);
+    unmap_walk(v, &gw);
+    SHADOW_PRINTK("not a shadow fault\n");
+    shadow_audit_tables(v);
+    reset_early_unshadow(v);
+    shadow_unlock(d);
+    return 0;
+}
+
+
+static int
+sh_invlpg(struct vcpu *v, unsigned long va)
+/* Called when the guest requests an invlpg.  Returns 1 if the invlpg
+ * instruction should be issued on the hardware, or 0 if it's safe not
+ * to do so. */
+{
+    shadow_l2e_t *ptr_sl2e = shadow_get_l2e(v, va);
+
+    // XXX -- might be a good thing to prefetch the va into the shadow
+
+    // no need to flush anything if there's no SL2...
+    //
+    if ( !ptr_sl2e )
+        return 0;
+
+    // If there's nothing shadowed for this particular sl2e, then
+    // there is no need to do an invlpg, either...
+    //
+    if ( !(shadow_l2e_get_flags(*ptr_sl2e) & _PAGE_PRESENT) )
+        return 0;
+
+    // Check to see if the SL2 is a splintered superpage...
+    // If so, then we'll need to flush the entire TLB (because that's
+    // easier than invalidating all of the individual 4K pages).
+    //
+    if ( (mfn_to_page(shadow_l2e_get_mfn(*ptr_sl2e))->count_info &
+          PGC_SH_type_mask) == PGC_SH_fl1_shadow )
+    {
+        local_flush_tlb();
+        return 0;
+    }
+
+    return 1;
+}
+
+static unsigned long
+sh_gva_to_gfn(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    walk_t gw;
+    gfn_t gfn;
+
+    guest_walk_tables(v, va, &gw, 0);
+    gfn = guest_walk_to_gfn(&gw);
+    unmap_walk(v, &gw);
+
+    return gfn_x(gfn);
+}
+
+
+static unsigned long
+sh_gva_to_gpa(struct vcpu *v, unsigned long va)
+/* Called to translate a guest virtual address to what the *guest*
+ * pagetables would map it to. */
+{
+    unsigned long gfn = sh_gva_to_gfn(v, va);
+    if ( gfn == INVALID_GFN )
+        return 0;
+    else
+        return (gfn << PAGE_SHIFT) | (va & ~PAGE_MASK);
+}
+
+
+// XXX -- should this be in this file?
+//        Or should it be moved to shadow-common.c?
+//
+/* returns a lowmem machine address of the copied HVM L3 root table
+ * If clear_res != 0, then clear the PAE-l3 reserved bits in the copy,
+ * otherwise blank out any entries with reserved bits in them.  */
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+static unsigned long
+hvm_pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab, int clear_res)
+{
+    int i, f;
+    int res = (_PAGE_RW|_PAGE_NX_BIT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY);
+    l3_pgentry_t new_l3e, *copy = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+    memcpy(copy, l3tab, 4 * sizeof(l3_pgentry_t));
+    for ( i = 0; i < 4; i++ )
+    {
+        f = l3e_get_flags(l3tab[i]);
+        if ( (f & _PAGE_PRESENT) && (!(f & res) || clear_res) )
+            new_l3e = l3e_from_pfn(l3e_get_pfn(l3tab[i]), f & ~res);
+        else
+            new_l3e = l3e_empty();
+        safe_write_entry(&copy[i], &new_l3e);
+    }
+    return __pa(copy);
+}
+#endif
+
+
+static inline void
+sh_update_linear_entries(struct vcpu *v)
+/* Sync up all the linear mappings for this vcpu's pagetables */
+{
+    struct domain *d = v->domain;
+
+    /* Linear pagetables in PV guests
+     * ------------------------------
+     *
+     * Guest linear pagetables, which map the guest pages, are at
+     * LINEAR_PT_VIRT_START.  Shadow linear pagetables, which map the
+     * shadows, are at SH_LINEAR_PT_VIRT_START.  Most of the time these
+     * are set up at shadow creation time, but (of course!) the PAE case
+     * is subtler.  Normal linear mappings are made by having an entry
+     * in the top-level table that points to itself (shadow linear) or
+     * to the guest top-level table (guest linear).  For PAE, to set up
+     * a linear map requires us to copy the four top-level entries into 
+     * level-2 entries.  That means that every time we change a PAE l3e,
+     * we need to reflect the change into the copy.
+     *
+     * Linear pagetables in HVM guests
+     * -------------------------------
+     *
+     * For HVM guests, the linear pagetables are installed in the monitor
+     * tables (since we can't put them in the shadow).  Shadow linear
+     * pagetables, which map the shadows, are at SH_LINEAR_PT_VIRT_START,
+     * and we use the linear pagetable slot at LINEAR_PT_VIRT_START for 
+     * a linear pagetable of the monitor tables themselves.  We have 
+     * the same issue of having to re-copy PAE l3 entries whevever we use
+     * PAE shadows. 
+     *
+     * Because HVM guests run on the same monitor tables regardless of the 
+     * shadow tables in use, the linear mapping of the shadow tables has to 
+     * be updated every time v->arch.shadow_table changes. 
+     */
+
+    /* Don't try to update the monitor table if it doesn't exist */
+    if ( shadow_mode_external(d) 
+         && pagetable_get_pfn(v->arch.monitor_table) == 0 ) 
+        return;
+
+#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 4)
+    
+    /* For PV, one l4e points at the guest l4, one points at the shadow
+     * l4.  No maintenance required. 
+     * For HVM, just need to update the l4e that points to the shadow l4. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Use the linear map if we can; otherwise make a new mapping */
+        if ( v == current ) 
+        {
+            __linear_l4_table[l4_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+        } 
+        else
+        { 
+            l4_pgentry_t *ml4e;
+            ml4e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ml4e[l4_table_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l4e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(ml4e);
+        }
+    }
+
+#elif (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS == 3)
+
+    /* This case only exists in HVM.  To give ourselves a linear map of the 
+     * shadows, we need to extend a PAE shadow to 4 levels.  We do this by 
+     * having a monitor l3 in slot 0 of the monitor l4 table, and 
+     * copying the PAE l3 entries into it.  Then, by having the monitor l4e
+     * for shadow pagetables also point to the monitor l4, we can use it
+     * to access the shadows. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Install copies of the shadow l3es into the monitor l3 table.
+         * The monitor l3 table is hooked into slot 0 of the monitor
+         * l4 table, so we use l3 linear indices 0 to 3 */
+        shadow_l3e_t *sl3e;
+        l3_pgentry_t *ml3e;
+        mfn_t l3mfn;
+        int i;
+
+        /* Use linear mappings if we can; otherwise make new mappings */
+        if ( v == current ) 
+        {
+            ml3e = __linear_l3_table;
+            l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0]));
+#if GUEST_PAGING_LEVELS == 2
+            /* Shadow l3 tables are made up by update_cr3 */
+            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+#else
+            sl3e = v->arch.shadow_vtable;
+#endif
+        }
+        else 
+        {   
+            l4_pgentry_t *ml4e;
+            ml4e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l4e_get_flags(ml4e[0]) & _PAGE_PRESENT);
+            l3mfn = _mfn(l4e_get_pfn(ml4e[0]));
+            ml3e = sh_map_domain_page(l3mfn);
+            sh_unmap_domain_page(ml4e);
+#if GUEST_PAGING_LEVELS == 2
+            /* Shadow l3 tables are made up by update_cr3 */
+            sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+#else
+            sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table));
+#endif
+        }
+
+        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        {
+            ml3e[i] = 
+                (shadow_l3e_get_flags(sl3e[i]) & _PAGE_PRESENT) 
+                ? l3e_from_pfn(mfn_x(shadow_l3e_get_mfn(sl3e[i])), 
+                               __PAGE_HYPERVISOR) 
+                : l3e_empty();
+        }
+
+        if ( v != current ) 
+        {
+            sh_unmap_domain_page(ml3e);
+#if GUEST_PAGING_LEVELS != 2
+            sh_unmap_domain_page(sl3e);
+#endif
+        }
+    }
+
+#elif CONFIG_PAGING_LEVELS == 3
+
+    /* PV: need to copy the guest's l3 entries into the guest-linear-map l2
+     * entries in the shadow, and the shadow's l3 entries into the 
+     * shadow-linear-map l2 entries in the shadow.  This is safe to do 
+     * because Xen does not let guests share high-slot l2 tables between l3s,
+     * so we know we're not treading on anyone's toes. 
+     *
+     * HVM: need to copy the shadow's l3 entries into the
+     * shadow-linear-map l2 entries in the monitor table.  This is safe
+     * because we have one monitor table for each vcpu.  The monitor's
+     * own l3es don't need to be copied because they never change.  
+     * XXX That might change if we start stuffing things into the rest
+     * of the monitor's virtual address space. 
+     */ 
+    {
+        l2_pgentry_t *l2e, new_l2e;
+        shadow_l3e_t *guest_l3e = NULL, *shadow_l3e;
+        int i;
+
+#if GUEST_PAGING_LEVELS == 2
+        /* Shadow l3 tables were built by update_cr3 */
+        if ( shadow_mode_external(d) )
+            shadow_l3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab;
+        else
+            BUG(); /* PV 2-on-3 is not supported yet */
+        
+#else /* GUEST_PAGING_LEVELS == 3 */
+        
+        /* Use local vcpu's mappings if we can; otherwise make new mappings */
+        if ( v == current ) 
+        {
+            shadow_l3e = v->arch.shadow_vtable;
+            if ( !shadow_mode_external(d) )
+                guest_l3e = v->arch.guest_vtable;
+        }
+        else 
+        {
+            mfn_t smfn;
+            int idx;
+            
+            /* Map the shadow l3 */
+            smfn = pagetable_get_mfn(v->arch.shadow_table);
+            idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable));
+            shadow_l3e = sh_map_domain_page(smfn);
+            shadow_l3e += idx;
+            if ( !shadow_mode_external(d) )
+            {
+                /* Also the guest l3 */
+                mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); 
+                guest_l3e = sh_map_domain_page(gmfn);
+                guest_l3e += guest_index(v->arch.guest_vtable);
+            }
+        }
+#endif /* GUEST_PAGING_LEVELS */
+        
+        /* Choose where to write the entries, using linear maps if possible */
+        if ( v == current && shadow_mode_external(d) ) 
+        {
+            /* From the monitor tables, it's safe to use linear maps to update
+             * monitor l2s */
+            l2e = __linear_l2_table + (3 * L2_PAGETABLE_ENTRIES);
+        }
+        else if ( shadow_mode_external(d) ) 
+        {
+            /* Map the monitor table's high l2 */
+            l3_pgentry_t *l3e;
+            l3e = sh_map_domain_page(
+                pagetable_get_mfn(v->arch.monitor_table));
+            ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
+            l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[3])));
+            sh_unmap_domain_page(l3e);
+        } 
+        else 
+        {
+            /* Map the shadow table's high l2 */
+            ASSERT(shadow_l3e_get_flags(shadow_l3e[3]) & _PAGE_PRESENT);
+            l2e = sh_map_domain_page(shadow_l3e_get_mfn(shadow_l3e[3]));
+        }
+        
+        
+        if ( !shadow_mode_external(d) )
+        {
+            /* Write linear mapping of guest. */
+            for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+            { 
+                new_l2e = (shadow_l3e_get_flags(guest_l3e[i]) & _PAGE_PRESENT) 
+                    ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(guest_l3e[i])),
+                                   __PAGE_HYPERVISOR) 
+                    : l2e_empty();
+                safe_write_entry(
+                    &l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i],
+                    &new_l2e);
+            }
+        }
+        
+        /* Write linear mapping of shadow. */
+        for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ )
+        {
+            new_l2e = (shadow_l3e_get_flags(shadow_l3e[i]) & _PAGE_PRESENT) 
+                ? l2e_from_pfn(mfn_x(shadow_l3e_get_mfn(shadow_l3e[i])),
+                               __PAGE_HYPERVISOR) 
+                : l2e_empty();
+            safe_write_entry(
+                &l2e[l2_table_offset(SH_LINEAR_PT_VIRT_START) + i],
+                &new_l2e);
+        }
+        
+        if ( v != current || !shadow_mode_external(d) )
+            sh_unmap_domain_page(l2e);
+        
+#if GUEST_PAGING_LEVELS == 3
+        if ( v != current) 
+        {
+            sh_unmap_domain_page(shadow_l3e);
+            if ( !shadow_mode_external(d) )
+                sh_unmap_domain_page(guest_l3e);
+        }
+#endif
+    }
+
+#elif CONFIG_PAGING_LEVELS == 2
+
+    /* For PV, one l2e points at the guest l2, one points at the shadow
+     * l2. No maintenance required. 
+     * For HVM, just need to update the l2e that points to the shadow l2. */
+
+    if ( shadow_mode_external(d) )
+    {
+        /* Use the linear map if we can; otherwise make a new mapping */
+        if ( v == current ) 
+        {
+            __linear_l2_table[l2_linear_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+        } 
+        else
+        { 
+            l2_pgentry_t *ml2e;
+            ml2e = 
sh_map_domain_page(pagetable_get_mfn(v->arch.monitor_table));
+            ml2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = 
+                l2e_from_pfn(pagetable_get_pfn(v->arch.shadow_table),
+                             __PAGE_HYPERVISOR);
+            sh_unmap_domain_page(ml2e);
+        }
+    }
+
+#else
+#error this should not happen
+#endif
+}
+
+
+// XXX -- should this be in this file?
+//        Or should it be moved to shadow-common.c?
+//
+#if (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+void sh_pae_recopy(struct domain *d)
+/* Called whenever we write to the l3 entries of a PAE pagetable which 
+ * is currently in use.  Each vcpu that is using the table needs to 
+ * resync its copies of the l3s in linear maps and any low-memory
+ * copies it might have made for fitting into 32bit CR3.
+ * Since linear maps are also resynced when we change CR3, we don't
+ * need to worry about changes to PAE l3es that are not currently in use.*/
+{
+    struct vcpu *v;
+    cpumask_t flush_mask = CPU_MASK_NONE;
+    ASSERT(shadow_lock_is_acquired(d));
+    
+    for_each_vcpu(d, v)
+    {
+        if ( !v->arch.shadow.pae_flip_pending ) 
+            continue;
+
+        cpu_set(v->processor, flush_mask);
+        
+        SHADOW_PRINTK("d=%u v=%u\n", v->domain->domain_id, v->vcpu_id);
+
+        /* This vcpu has a copy in its linear maps */
+        sh_update_linear_entries(v);
+        if ( hvm_guest(v) )
+        {
+            /* This vcpu has a copy in its HVM PAE l3 */
+            v->arch.hvm_vcpu.hw_cr3 = 
+                hvm_pae_copy_root(v, v->arch.shadow_vtable,
+                                  !shadow_vcpu_mode_translate(v));
+        }
+#if CONFIG_PAGING_LEVELS == 3
+        else 
+        {
+            /* This vcpu might have copied the l3 to below 4GB */
+            if ( v->arch.cr3 >> PAGE_SHIFT 
+                 != pagetable_get_pfn(v->arch.shadow_table) )
+            {
+                /* Recopy to where that copy is. */
+                int i;
+                l3_pgentry_t *dst, *src;
+                dst = __va(v->arch.cr3 & ~0x1f); /* Mask cache control bits */
+                src = v->arch.shadow_vtable;
+                for ( i = 0 ; i < 4 ; i++ ) 
+                    safe_write_entry(dst + i, src + i);
+            }
+        }
+#endif
+        v->arch.shadow.pae_flip_pending = 0;        
+    }
+
+    flush_tlb_mask(flush_mask);
+}
+#endif /* (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3) */
+
+
+/* removes:
+ *     vcpu->arch.guest_vtable
+ *     vcpu->arch.shadow_table
+ *     vcpu->arch.shadow_vtable
+ * Does all appropriate management/bookkeeping/refcounting/etc...
+ */
+static void
+sh_detach_old_tables(struct vcpu *v)
+{
+    mfn_t smfn;
+
+    ////
+    //// vcpu->arch.guest_vtable
+    ////
+    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
+         v->arch.guest_vtable )
+    {
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        sh_unmap_domain_page_global(v->arch.guest_vtable);
+        v->arch.guest_vtable = NULL;
+    }
+
+    ////
+    //// vcpu->arch.shadow_table
+    ////
+    smfn = pagetable_get_mfn(v->arch.shadow_table);
+    if ( mfn_x(smfn) )
+    {
+        ASSERT(v->arch.shadow_vtable);
+
+#if GUEST_PAGING_LEVELS == 3
+        // PAE guests do not (necessarily) use an entire page for their
+        // 4-entry L3s, so we have to deal with them specially.
+        //
+        sh_put_ref_l3_subshadow(v, v->arch.shadow_vtable, smfn);
+#else
+        sh_put_ref(v, smfn, 0);
+#endif
+
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+        {
+            struct pae_l3_bookkeeping *info =
+                sl3p_to_info(v->arch.shadow_vtable);
+            ASSERT(test_bit(v->vcpu_id, &info->vcpus));
+            clear_bit(v->vcpu_id, &info->vcpus);
+        }
+#endif
+        v->arch.shadow_table = pagetable_null();
+    }
+
+    ////
+    //// vcpu->arch.shadow_vtable
+    ////
+    if ( (shadow_mode_external(v->domain) || (GUEST_PAGING_LEVELS == 3)) &&
+         v->arch.shadow_vtable )
+    {
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        //
+        sh_unmap_domain_page_global(v->arch.shadow_vtable);
+        v->arch.shadow_vtable = NULL;
+    }
+}
+
+static void
+sh_update_cr3(struct vcpu *v)
+/* Updates vcpu->arch.shadow_table after the guest has changed CR3.
+ * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
+ * if appropriate).
+ * HVM guests should also set hvm_get_guest_cntl_reg(v, 3)...
+ */
+{
+    struct domain *d = v->domain;
+    mfn_t gmfn, smfn;
+#if GUEST_PAGING_LEVELS == 3
+    u32 guest_idx=0;
+#endif
+
+    ASSERT(shadow_lock_is_acquired(v->domain));
+    ASSERT(v->arch.shadow.mode);
+
+    ////
+    //// vcpu->arch.guest_table is already set
+    ////
+    
+#ifndef NDEBUG 
+    /* Double-check that the HVM code has sent us a sane guest_table */
+    if ( hvm_guest(v) )
+    {
+        gfn_t gfn;
+
+        ASSERT(shadow_mode_external(d));
+
+        // Is paging enabled on this vcpu?
+        if ( shadow_vcpu_mode_translate(v) )
+        {
+            gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3)));
+            gmfn = vcpu_gfn_to_mfn(v, gfn);
+            ASSERT(valid_mfn(gmfn));
+            ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn));
+        } 
+        else 
+        {
+            /* Paging disabled: guest_table points at (part of) p2m */
+#if SHADOW_PAGING_LEVELS != 3 /* in 3-on-4, guest-table is in slot 0 of p2m */
+            /* For everything else, they sould be the same */
+            ASSERT(v->arch.guest_table.pfn == d->arch.phys_table.pfn);
+#endif
+        }
+    }
+#endif
+
+    SHADOW_PRINTK("d=%u v=%u guest_table=%05lx\n",
+                   d->domain_id, v->vcpu_id, 
+                   (unsigned long)pagetable_get_pfn(v->arch.guest_table));
+
+#if GUEST_PAGING_LEVELS == 4
+    if ( !(v->arch.flags & TF_kernel_mode) )
+        gmfn = pagetable_get_mfn(v->arch.guest_table_user);
+    else
+#endif
+        gmfn = pagetable_get_mfn(v->arch.guest_table);
+
+    sh_detach_old_tables(v);
+
+    if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) )
+    {
+        ASSERT(v->arch.cr3 == 0);
+        return;
+    }
+
+    ////
+    //// vcpu->arch.guest_vtable
+    ////
+    if ( shadow_mode_external(d) )
+    {
+#if GUEST_PAGING_LEVELS == 3
+        if ( shadow_vcpu_mode_translate(v) ) 
+            /* Paging enabled: find where in the page the l3 table is */
+            guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3));
+        else
+            /* Paging disabled: l3 is at the start of a page (in the p2m) */ 
+            guest_idx = 0; 
+
+        // Ignore the low 2 bits of guest_idx -- they are really just
+        // cache control.
+        guest_idx &= ~3;
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable =
+            (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx;
+#else
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+#endif
+    }
+    else
+    {
+#ifdef __x86_64__
+        v->arch.guest_vtable = __linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
+        // XXX - why does this need a global map?
+        v->arch.guest_vtable = sh_map_domain_page_global(gmfn);
+#else
+        v->arch.guest_vtable = __linear_l2_table;
+#endif
+    }
+
+#if 0
+    printk("%s %s %d gmfn=%05lx guest_vtable=%p\n",
+           __func__, __FILE__, __LINE__, gmfn, v->arch.guest_vtable);
+#endif
+
+    ////
+    //// vcpu->arch.shadow_table
+    ////
+    smfn = get_shadow_status(v, gmfn, PGC_SH_guest_root_type);
+    if ( valid_mfn(smfn) )
+    {
+        /* Pull this root shadow to the front of the list of roots. */
+        list_del(&mfn_to_page(smfn)->list);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    else
+    {
+        /* This guest MFN is a pagetable.  Must revoke write access. */
+        if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) 
+             != 0 )
+            flush_tlb_mask(d->domain_dirty_cpumask); 
+        /* Make sure there's enough free shadow memory. */
+        shadow_prealloc(d, SHADOW_MAX_ORDER); 
+        /* Shadow the page. */
+        smfn = sh_make_shadow(v, gmfn, PGC_SH_guest_root_type);
+        list_add(&mfn_to_page(smfn)->list, &d->arch.shadow.toplevel_shadows);
+    }
+    ASSERT(valid_mfn(smfn));
+    v->arch.shadow_table = pagetable_from_mfn(smfn);
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW
+    /* Once again OK to unhook entries from this table if we see fork/exit */
+    ASSERT(sh_mfn_is_a_page_table(gmfn));
+    mfn_to_page(gmfn)->shadow_flags &= ~SHF_unhooked_mappings;
+#endif
+
+
+    ////
+    //// vcpu->arch.shadow_vtable
+    ////
+    if ( shadow_mode_external(d) )
+    {
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+        mfn_t adjusted_smfn = smfn;
+        u32 shadow_idx = shadow_l3_index(&adjusted_smfn, guest_idx);
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        v->arch.shadow_vtable =
+            (shadow_l3e_t *)sh_map_domain_page_global(adjusted_smfn) +
+            shadow_idx;
+#else
+        // Q: why does this need to use (un)map_domain_page_*global* ?
+        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#endif
+    }
+    else
+    {
+#if SHADOW_PAGING_LEVELS == 4
+        v->arch.shadow_vtable = __sh_linear_l4_table;
+#elif GUEST_PAGING_LEVELS == 3
+        // XXX - why does this need a global map?
+        v->arch.shadow_vtable = sh_map_domain_page_global(smfn);
+#else
+        v->arch.shadow_vtable = __sh_linear_l2_table;
+#endif
+    }
+
+    ////
+    //// Take a ref to the new shadow table, and pin it.
+    ////
+    //
+    // This ref is logically "held" by v->arch.shadow_table entry itself.
+    // Release the old ref.
+    //
+#if GUEST_PAGING_LEVELS == 3
+    // PAE guests do not (necessarily) use an entire page for their
+    // 4-entry L3s, so we have to deal with them specially.
+    //
+    // XXX - might want to revisit this if/when we do multiple compilation for
+    //       HVM-vs-PV guests, as PAE PV guests could get away without doing
+    //       subshadows.
+    //
+    sh_get_ref_l3_subshadow(v->arch.shadow_vtable, smfn);
+    sh_pin_l3_subshadow(v->arch.shadow_vtable, smfn);
+#else
+    sh_get_ref(smfn, 0);
+    sh_pin(smfn);
+#endif
+
+#if (SHADOW_PAGING_LEVELS == 3) && (GUEST_PAGING_LEVELS == 3)
+    // PAE 3-on-3 shadows have to keep track of which vcpu's are using
+    // which l3 subshadow, in order handle the SHADOW_SET_L3PAE_RECOPY
+    // case from validate_gl3e().  Search for SHADOW_SET_L3PAE_RECOPY
+    // in the code for more info.
+    //
+    {
+        struct pae_l3_bookkeeping *info =
+            sl3p_to_info(v->arch.shadow_vtable);
+        ASSERT(!test_bit(v->vcpu_id, &info->vcpus));
+        set_bit(v->vcpu_id, &info->vcpus);
+    }
+#endif
+
+    debugtrace_printk("%s cr3 gmfn=%05lx smfn=%05lx\n",
+                      __func__, gmfn, smfn);
+
+    ///
+    /// v->arch.cr3 and, if appropriate, v->arch.hvm_vcpu.hw_cr3
+    ///
+    if ( shadow_mode_external(d) )
+    {
+        ASSERT(hvm_guest(v));
+        make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
+
+#if (GUEST_PAGING_LEVELS == 2) && (SHADOW_PAGING_LEVELS != 2)
+#if SHADOW_PAGING_LEVELS != 3
+#error unexpected combination of GUEST and SHADOW paging levels
+#endif
+        /* 2-on-3: make a PAE l3 table that points at the four-page l2 */
+        {
+            mfn_t smfn = pagetable_get_mfn(v->arch.shadow_table);
+            int i;
+
+            ASSERT(v->arch.hvm_vcpu.hw_cr3 ==
+                   virt_to_maddr(v->arch.hvm_vcpu.hvm_lowmem_l3tab));
+            for (i = 0; i < 4; i++)
+            {
+                v->arch.hvm_vcpu.hvm_lowmem_l3tab[i] =
+                    shadow_l3e_from_mfn(_mfn(mfn_x(smfn)+i), _PAGE_PRESENT);
+            }
+        }
+#elif (GUEST_PAGING_LEVELS == 3) && (SHADOW_PAGING_LEVELS == 3)
+        /* 3-on-3: copy the shadow l3 to slots that are below 4GB.
+         * If paging is disabled, clear l3e reserved bits; otherwise 
+         * remove entries that have reserved bits set. */
+        v->arch.hvm_vcpu.hw_cr3 =
+            hvm_pae_copy_root(v, v->arch.shadow_vtable, 
+                              !shadow_vcpu_mode_translate(v));
+#else
+        /* 2-on-2 or 4-on-4: just put the shadow top-level into cr3 */
+        v->arch.hvm_vcpu.hw_cr3 =
+            pagetable_get_paddr(v->arch.shadow_table);
+#endif
+    }
+    else // not shadow_mode_external...
+    {
+        /* We don't support PV except guest == shadow == config levels */
+        BUG_ON(GUEST_PAGING_LEVELS != SHADOW_PAGING_LEVELS);
+        make_cr3(v, pagetable_get_pfn(v->arch.shadow_table));
+    }
+
+    /* Fix up the linear pagetable mappings */
+    sh_update_linear_entries(v);
+}
+
+
+/**************************************************************************/
+/* Functions to revoke guest rights */
+
+#if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
+static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn)
+/* Look up this vaddr in the current shadow and see if it's a writeable
+ * mapping of this gmfn.  If so, remove it.  Returns 1 if it worked. */
+{
+    shadow_l1e_t sl1e, *sl1p;
+    shadow_l2e_t *sl2p;
+#if GUEST_PAGING_LEVELS >= 3
+    shadow_l3e_t *sl3p;
+#if GUEST_PAGING_LEVELS >= 4
+    shadow_l4e_t *sl4p;
+#endif
+#endif
+    mfn_t sl1mfn;
+
+
+    /* Carefully look in the shadow linear map for the l1e we expect */
+    if ( v->arch.shadow_vtable == NULL ) return 0;
+#if GUEST_PAGING_LEVELS >= 4
+    sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr);
+    if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) )
+        return 0;
+    sl3p = sh_linear_l3_table(v) + shadow_l3_linear_offset(vaddr);
+    if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) )
+        return 0;
+#elif GUEST_PAGING_LEVELS == 3
+    sl3p = ((shadow_l3e_t *) v->arch.shadow_vtable) 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog