WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 27 Jul 2007 02:47:20 -0700
Delivery-date: Fri, 27 Jul 2007 02:45:34 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1182365367 21600
# Node ID 810885428743660169e7382ec9596373ca6ce48f
# Parent  c20bc60f9243d08199cb0a9a837cbe11c6b3dcdc
# Parent  005dd6b1cf8e0008aba7984b828274a40e8d7d95
merge with xen-unstable.hg
---
 docs/src/user.tex                       |    1 
 tools/blktap/drivers/Makefile           |    1 
 tools/blktap/drivers/block-aio.c        |   49 ++++-----
 tools/blktap/drivers/block-qcow.c       |   48 ++++-----
 tools/blktap/drivers/tapaio.c           |  164 ++++++++++++++++++++++++++++++++
 tools/blktap/drivers/tapaio.h           |   58 +++++++++++
 tools/examples/init.d/xendomains        |   33 ++++--
 tools/ioemu/block-raw.c                 |    2 
 tools/ioemu/target-i386-dm/exec-dm.c    |   42 ++++++--
 tools/ioemu/vl.c                        |   12 ++
 tools/libxc/xc_core.c                   |    2 
 tools/python/xen/xend/XendDomainInfo.py |    2 
 tools/python/xen/xend/server/blkif.py   |    5 
 xen/arch/ia64/xen/domain.c              |    9 -
 xen/arch/ia64/xen/xenmem.c              |    2 
 xen/arch/x86/apic.c                     |    4 
 xen/arch/x86/boot/cmdline.S             |   40 +++++--
 xen/arch/x86/boot/trampoline.S          |   11 --
 xen/arch/x86/boot/video.S               |   59 ++++++-----
 xen/arch/x86/boot/video.h               |    9 -
 xen/arch/x86/boot/x86_32.S              |    4 
 xen/arch/x86/boot/x86_64.S              |    2 
 xen/arch/x86/domain.c                   |   37 +++++--
 xen/arch/x86/domain_build.c             |    6 -
 xen/arch/x86/flushtlb.c                 |    4 
 xen/arch/x86/hvm/hvm.c                  |   21 ++--
 xen/arch/x86/hvm/irq.c                  |   81 ++++++++-------
 xen/arch/x86/hvm/svm/asid.c             |   72 +++++++-------
 xen/arch/x86/hvm/svm/intr.c             |  146 ++++++++++++++++------------
 xen/arch/x86/hvm/svm/svm.c              |   60 +++++------
 xen/arch/x86/hvm/svm/vmcb.c             |    6 -
 xen/arch/x86/hvm/vioapic.c              |   34 +++---
 xen/arch/x86/hvm/vlapic.c               |    9 -
 xen/arch/x86/hvm/vmx/intr.c             |  106 ++++++++++----------
 xen/arch/x86/hvm/vmx/vmcs.c             |    2 
 xen/arch/x86/hvm/vmx/vmx.c              |   59 ++++++++---
 xen/arch/x86/hvm/vpic.c                 |    3 
 xen/arch/x86/hvm/vpt.c                  |   40 ++++---
 xen/arch/x86/mm.c                       |   10 -
 xen/arch/x86/setup.c                    |   10 +
 xen/arch/x86/traps.c                    |   14 ++
 xen/arch/x86/x86_32/traps.c             |    1 
 xen/arch/x86/x86_64/compat_kexec.S      |   65 +++++++++++-
 xen/arch/x86/x86_64/traps.c             |    1 
 xen/common/compat/memory.c              |    7 +
 xen/common/domctl.c                     |    4 
 xen/common/grant_table.c                |   12 +-
 xen/common/kernel.c                     |   10 -
 xen/common/kexec.c                      |    4 
 xen/common/perfc.c                      |    2 
 xen/drivers/char/console.c              |    2 
 xen/drivers/video/vga.c                 |    3 
 xen/include/asm-ia64/guest_access.h     |   25 ++--
 xen/include/asm-x86/event.h             |    1 
 xen/include/asm-x86/guest_access.h      |   68 +++++++------
 xen/include/asm-x86/hvm/hvm.h           |   33 +++++-
 xen/include/asm-x86/hvm/irq.h           |   12 +-
 xen/include/asm-x86/hvm/support.h       |    1 
 xen/include/asm-x86/hvm/svm/asid.h      |    1 
 xen/include/asm-x86/hvm/vcpu.h          |    4 
 xen/include/asm-x86/hvm/vlapic.h        |    2 
 xen/include/asm-x86/hvm/vmx/vmx.h       |   13 +-
 xen/include/asm-x86/hvm/vpic.h          |    2 
 xen/include/asm-x86/hvm/vpt.h           |    3 
 xen/include/xen/compat.h                |   62 +++++++-----
 xen/include/xen/xencomm.h               |   43 ++++----
 66 files changed, 1080 insertions(+), 580 deletions(-)

diff -r c20bc60f9243 -r 810885428743 docs/src/user.tex
--- a/docs/src/user.tex Wed Jun 20 12:47:52 2007 -0600
+++ b/docs/src/user.tex Wed Jun 20 12:49:27 2007 -0600
@@ -3178,6 +3178,7 @@ editing \path{grub.conf}.
   \begin{description}
   \item[ ask ] Display a vga menu allowing manual selection of video
   mode.
+  \item[ current ] Use existing vga mode without modification.
   \item[ text-$<$mode$>$ ] Select text-mode resolution, where mode is
   one of 80x25, 80x28, 80x30, 80x34, 80x43, 80x50, 80x60.
   \item[ gfx-$<$mode$>$ ] Select VESA graphics mode
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile     Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/Makefile     Wed Jun 20 12:49:27 2007 -0600
@@ -35,6 +35,7 @@ BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-ram.o
 BLK-OBJS  += block-qcow.o
 BLK-OBJS  += aes.o
+BLK-OBJS  += tapaio.o
 
 all: $(IBIN) qcow-util
 
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c  Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-aio.c  Wed Jun 20 12:49:27 2007 -0600
@@ -43,14 +43,7 @@
 #include <sys/ioctl.h>
 #include <linux/fs.h>
 #include "tapdisk.h"
-
-
-/**
- * We used a kernel patch to return an fd associated with the AIO context
- * so that we can concurrently poll on synchronous and async descriptors.
- * This is signalled by passing 1 as the io context to io_setup.
- */
-#define REQUEST_ASYNC_FD 1
+#include "tapaio.h"
 
 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
 
@@ -65,14 +58,13 @@ struct tdaio_state {
        int fd;
        
        /* libaio state */
-       io_context_t       aio_ctx;
+       tap_aio_context_t  aio_ctx;
        struct iocb        iocb_list  [MAX_AIO_REQS];
        struct iocb       *iocb_free  [MAX_AIO_REQS];
        struct pending_aio pending_aio[MAX_AIO_REQS];
        int                iocb_free_count;
        struct iocb       *iocb_queue[MAX_AIO_REQS];
        int                iocb_queued;
-       int                poll_fd; /* NB: we require aio_poll support */
        struct io_event    aio_events[MAX_AIO_REQS];
 };
 
@@ -148,7 +140,7 @@ static inline void init_fds(struct disk_
        for(i = 0; i < MAX_IOFD; i++) 
                dd->io_fd[i] = 0;
 
-       dd->io_fd[0] = prv->poll_fd;
+       dd->io_fd[0] = prv->aio_ctx.pollfd;
 }
 
 /* Open the disk file and initialize aio state. */
@@ -162,12 +154,9 @@ int tdaio_open (struct disk_driver *dd, 
        /* Initialize AIO */
        prv->iocb_free_count = MAX_AIO_REQS;
        prv->iocb_queued     = 0;
-       
-       prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
-       prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
-
-       if (prv->poll_fd < 0) {
-               ret = prv->poll_fd;
+
+       ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
+       if (ret < 0) {
                 if (ret == -EAGAIN) {
                         DPRINTF("Couldn't setup AIO context.  If you are "
                                 "trying to concurrently use a large number "
@@ -176,9 +165,7 @@ int tdaio_open (struct disk_driver *dd, 
                                 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
                                 "aio-max-nr')\n");
                 } else {
-                        DPRINTF("Couldn't get fd for AIO poll support.  This "
-                                "is probably because your kernel does not "
-                                "have the aio-poll patch applied.\n");
+                        DPRINTF("Couldn't setup AIO context.\n");
                 }
                goto done;
        }
@@ -286,7 +273,7 @@ int tdaio_submit(struct disk_driver *dd)
        if (!prv->iocb_queued)
                return 0;
 
-       ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+       ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, 
prv->iocb_queue);
        
        /* XXX: TODO: Handle error conditions here. */
        
@@ -300,7 +287,7 @@ int tdaio_close(struct disk_driver *dd)
 {
        struct tdaio_state *prv = (struct tdaio_state *)dd->private;
        
-       io_destroy(prv->aio_ctx);
+       io_destroy(prv->aio_ctx.aio_ctx);
        close(prv->fd);
 
        return 0;
@@ -308,15 +295,13 @@ int tdaio_close(struct disk_driver *dd)
 
 int tdaio_do_callbacks(struct disk_driver *dd, int sid)
 {
-       int ret, i, rsp = 0;
+       int i, nr_events, rsp = 0;
        struct io_event *ep;
        struct tdaio_state *prv = (struct tdaio_state *)dd->private;
 
-       /* Non-blocking test for completed io. */
-       ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
-                          NULL);
-                       
-       for (ep=prv->aio_events,i=ret; i-->0; ep++) {
+       nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+       for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
                struct iocb        *io  = ep->obj;
                struct pending_aio *pio;
                
@@ -327,6 +312,14 @@ int tdaio_do_callbacks(struct disk_drive
 
                prv->iocb_free[prv->iocb_free_count++] = io;
        }
+
+       if (nr_events) {
+               nr_events = tap_aio_more_events(&prv->aio_ctx);
+               goto repeat;
+       }
+
+       tap_aio_continue(&prv->aio_ctx);
+
        return rsp;
 }
 
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:49:27 2007 -0600
@@ -38,6 +38,7 @@
 #include "bswap.h"
 #include "aes.h"
 #include "tapdisk.h"
+#include "tapaio.h"
 
 #if 1
 #define ASSERT(_p) \
@@ -52,9 +53,6 @@
     (uint64_t)( \
         (l + (s - 1)) - ((l + (s - 1)) % s)); \
 })
-
-/******AIO DEFINES******/
-#define REQUEST_ASYNC_FD 1
 
 struct pending_aio {
         td_callback_t cb;
@@ -145,7 +143,7 @@ struct tdqcow_state {
        AES_KEY aes_encrypt_key;       /*AES key*/
        AES_KEY aes_decrypt_key;       /*AES key*/
         /* libaio state */
-        io_context_t        aio_ctx;
+        tap_aio_context_t   aio_ctx;
         int                 max_aio_reqs;
         struct iocb        *iocb_list;
         struct iocb       **iocb_free;
@@ -153,7 +151,6 @@ struct tdqcow_state {
         int                 iocb_free_count;
         struct iocb       **iocb_queue;
         int                 iocb_queued;
-        int                 poll_fd;      /* NB: we require aio_poll support */
         struct io_event    *aio_events;
 };
 
@@ -179,7 +176,7 @@ static void free_aio_state(struct disk_d
 
 static int init_aio_state(struct disk_driver *dd)
 {
-        int i;
+       int i, ret;
        struct td_state     *bs = dd->td_state;
        struct tdqcow_state  *s = (struct tdqcow_state *)dd->private;
         long     ioidx;
@@ -216,12 +213,9 @@ static int init_aio_state(struct disk_dr
                 goto fail;
         }
 
-        /*Signal kernel to create Poll FD for Asyc completion events*/
-        s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
-        s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx);
-
-       if (s->poll_fd < 0) {
-                if (s->poll_fd == -EAGAIN) {
+       ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
+       if (ret < 0) {
+                if (ret == -EAGAIN) {
                         DPRINTF("Couldn't setup AIO context.  If you are "
                                 "trying to concurrently use a large number "
                                 "of blktap-based disks, you may need to "
@@ -229,9 +223,7 @@ static int init_aio_state(struct disk_dr
                                 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
                                 "aio-max-nr')\n");
                 } else {
-                        DPRINTF("Couldn't get fd for AIO poll support.  This "
-                                "is probably because your kernel does not "
-                                "have the aio-poll patch applied.\n");
+                        DPRINTF("Couldn't setup AIO context.\n");
                 }
                goto fail;
        }
@@ -845,7 +837,7 @@ static inline void init_fds(struct disk_
        for(i = 0; i < MAX_IOFD; i++) 
                dd->io_fd[i] = 0;
 
-       dd->io_fd[0] = s->poll_fd;
+       dd->io_fd[0] = s->aio_ctx.pollfd;
 }
 
 /* Open the disk file and initialize qcow state. */
@@ -1144,7 +1136,7 @@ int tdqcow_submit(struct disk_driver *dd
        if (!prv->iocb_queued)
                return 0;
 
-       ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+       ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, 
prv->iocb_queue);
 
         /* XXX: TODO: Handle error conditions here. */
 
@@ -1172,7 +1164,7 @@ int tdqcow_close(struct disk_driver *dd)
                close(fd);
        }
 
-       io_destroy(s->aio_ctx);
+       io_destroy(s->aio_ctx.aio_ctx);
        free(s->name);
        free(s->l1_table);
        free(s->l2_cache);
@@ -1184,17 +1176,15 @@ int tdqcow_close(struct disk_driver *dd)
 
 int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
 {
-        int ret, i, rsp = 0,*ptr;
+        int ret, i, nr_events, rsp = 0,*ptr;
         struct io_event *ep;
         struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
 
         if (sid > MAX_IOFD) return 1;
-       
-       /* Non-blocking test for completed io. */
-        ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events,
-                           NULL);
-
-        for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
+
+        nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+        for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
                 struct iocb        *io  = ep->obj;
                 struct pending_aio *pio;
 
@@ -1215,6 +1205,14 @@ int tdqcow_do_callbacks(struct disk_driv
 
                 prv->iocb_free[prv->iocb_free_count++] = io;
         }
+
+        if (nr_events) {
+                nr_events = tap_aio_more_events(&prv->aio_ctx);
+                goto repeat;
+        }
+
+        tap_aio_continue(&prv->aio_ctx);
+
         return rsp;
 }
 
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.c     Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "tapaio.h"
+#include "tapdisk.h"
+#include <unistd.h>
+
+/**
+ * We used a kernel patch to return an fd associated with the AIO context
+ * so that we can concurrently poll on synchronous and async descriptors.
+ * This is signalled by passing 1 as the io context to io_setup.
+ */
+#define REQUEST_ASYNC_FD 1
+
+/*
+ * If we don't have any way to do epoll on aio events in a normal kernel,
+ * wait for aio events in a separate thread and return completion status
+ * that via a pipe that can be waited on normally.
+ *
+ * To keep locking problems between the completion thread and the submit
+ * thread to a minimum, there's a handshake which allows only one thread
+ * to be doing work on the completion queue at a time:
+ *
+ * 1) main thread sends completion thread a command via the command pipe;
+ * 2) completion thread waits for aio events and returns the number
+ *    received on the completion pipe
+ * 3) main thread processes the received ctx->aio_events events
+ * 4) loop back to 1) to let the completion thread refill the aio_events
+ *    buffer.
+ *
+ * This workaround needs to disappear once the kernel provides a single
+ * mechanism for waiting on both aio and normal fd wakeups.
+ */
+static void *
+tap_aio_completion_thread(void *arg)
+{
+       tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+       int command;
+       int nr_events;
+       int rc;
+
+       while (1) {
+               rc = read(ctx->command_fd[0], &command, sizeof(command));
+
+               do {
+                       rc = io_getevents(ctx->aio_ctx, 1,
+                                         ctx->max_aio_events, ctx->aio_events,
+                                         NULL);
+                       if (rc) {
+                               nr_events = rc;
+                               rc = write(ctx->completion_fd[1], &nr_events,
+                                          sizeof(nr_events));
+                       }
+               } while (!rc);
+       }
+}
+
+void
+tap_aio_continue(tap_aio_context_t *ctx)
+{
+        int cmd = 0;
+
+        if (!ctx->poll_in_thread)
+                return;
+
+        if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0)
+                DPRINTF("Cannot write to command pipe\n");
+}
+
+int
+tap_aio_setup(tap_aio_context_t *ctx,
+              struct io_event *aio_events,
+              int max_aio_events)
+{
+        int ret;
+
+        ctx->aio_events = aio_events;
+        ctx->max_aio_events = max_aio_events;
+        ctx->poll_in_thread = 0;
+
+        ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+        if (ret < 0 && ret != -EINVAL)
+                return ret;
+        else if (ret > 0) {
+                ctx->pollfd = ret;
+                return ctx->pollfd;
+        }
+
+        ctx->aio_ctx = (io_context_t) 0;
+        ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+        if (ret < 0)
+                return ret;
+
+        if ((ret = pipe(ctx->command_fd)) < 0) {
+                DPRINTF("Unable to create command pipe\n");
+                return -1;
+        }
+        if ((ret = pipe(ctx->completion_fd)) < 0) {
+                DPRINTF("Unable to create completion pipe\n");
+                return -1;
+        }
+
+        if ((ret = pthread_create(&ctx->aio_thread, NULL,
+                                  tap_aio_completion_thread, ctx)) != 0) {
+                DPRINTF("Unable to create completion thread\n");
+                return -1;
+        }
+
+        ctx->pollfd = ctx->completion_fd[0];
+        ctx->poll_in_thread = 1;
+
+        tap_aio_continue(ctx);
+
+        return 0;
+}
+
+int
+tap_aio_get_events(tap_aio_context_t *ctx)
+{
+        int nr_events = 0;
+
+        if (!ctx->poll_in_thread)
+                nr_events = io_getevents(ctx->aio_ctx, 1,
+                                         ctx->max_aio_events, ctx->aio_events, 
NULL);
+        else
+                read(ctx->completion_fd[0], &nr_events, sizeof(nr_events));
+
+        return nr_events;
+}
+
+int tap_aio_more_events(tap_aio_context_t *ctx)
+{
+        return io_getevents(ctx->aio_ctx, 0,
+                            ctx->max_aio_events, ctx->aio_events, NULL);
+}
+
+
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.h     Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __TAPAIO_H__
+#define __TAPAIO_H__
+
+#include <pthread.h>
+#include <libaio.h>
+
+struct tap_aio_context {
+        io_context_t     aio_ctx;
+
+        struct io_event *aio_events;
+        int              max_aio_events;
+
+        pthread_t        aio_thread;
+        int              command_fd[2];
+        int              completion_fd[2];
+        int              pollfd;
+        unsigned int     poll_in_thread : 1;
+};
+
+typedef struct tap_aio_context tap_aio_context_t;
+
+int  tap_aio_setup      (tap_aio_context_t *ctx,
+                         struct io_event *aio_events,
+                         int max_aio_events);
+void tap_aio_continue   (tap_aio_context_t *ctx);
+int  tap_aio_get_events (tap_aio_context_t *ctx);
+int  tap_aio_more_events(tap_aio_context_t *ctx);
+
+#endif /* __TAPAIO_H__ */
diff -r c20bc60f9243 -r 810885428743 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains  Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/examples/init.d/xendomains  Wed Jun 20 12:49:27 2007 -0600
@@ -182,25 +182,31 @@ rdnames()
 
 parseln()
 {
-    name=`echo "$1" | cut -c0-17`
-    name=${name%% *}
-    rest=`echo "$1" | cut -c18- `
-    read id mem cpu vcpu state tm < <(echo "$rest")
+    if [[ "$1" =~ "\(domain" ]]; then
+        name=;id=
+    else if [[ "$1" =~ "\(name" ]]; then
+        name=$(echo $1 | sed -e 's/^.*(name \(.*\))$/\1/')
+    else if [[ "$1" =~ "\(domid" ]]; then
+        id=$(echo $1 | sed -e 's/^.*(domid \(.*\))$/\1/')
+    fi; fi; fi
+
+    [ -n "$name" -a -n "$id" ] && return 0 || return 1
 }
 
 is_running()
 {
     rdname $1
     RC=1
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        case $name in 
            ($NM)
                RC=0
                ;;
        esac
-    done < <(xm list | grep -v '^Name')
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
     return $RC
 }
 
@@ -267,13 +273,14 @@ start()
 
 all_zombies()
 {
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        if test "$state" != "-b---d" -a "$state" != "-----d"; then
            return 1;
        fi
-    done < <(xm list | grep -v '^Name')
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
     return 0
 }
 
@@ -309,8 +316,9 @@ stop()
        rdnames
     fi
     echo -n "Shutting down Xen domains:"
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        echo -n " $name"
        if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
@@ -384,7 +392,7 @@ stop()
            fi
            kill $WDOG_PID >/dev/null 2>&1
        fi
-    done < <(xm list | grep -v '^Name')
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
 
     # NB. this shuts down ALL Xen domains (politely), not just the ones in
     # AUTODIR/*
@@ -409,15 +417,16 @@ stop()
 
 check_domain_up()
 {
+    name=;id=
     while read LN; do
-       parseln "$LN"
+       parseln "$LN" || continue
        if test $id = 0; then continue; fi
        case $name in 
            ($1)
                return 0
                ;;
        esac
-    done < <(xm list | grep -v "^Name")
+    done < <(xm list -l | grep '(\(domain\|domid\|name\)')
     return 1
 }
 
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/block-raw.c
--- a/tools/ioemu/block-raw.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/block-raw.c   Wed Jun 20 12:49:27 2007 -0600
@@ -166,7 +166,7 @@ typedef struct RawAIOCB {
     struct RawAIOCB *next;
 } RawAIOCB;
 
-static int aio_sig_num = SIGUSR2;
+const int aio_sig_num = SIGUSR2;
 static RawAIOCB *first_aio; /* AIO issued */
 static int aio_initialized = 0;
 
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c      Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c      Wed Jun 20 12:49:27 2007 -0600
@@ -443,19 +443,40 @@ extern unsigned long logdirty_bitmap_siz
  * Forcing a word-sized read/write prevents the guest from seeing a partially
  * written word-sized atom.
  */
-void memcpy_words(void *dst, void *src, size_t n)
-{
-    while (n >= sizeof(long)) {
-        *((long *)dst) = *((long *)src);
-        dst = ((long *)dst) + 1;
-        src = ((long *)src) + 1;
-        n -= sizeof(long);
-    }
-
-    if (n & 4) {
+#if defined(__x86_64__) || defined(__i386__)
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+    asm (
+        "   movl %%edx,%%ecx \n"
+#ifdef __x86_64
+        "   shrl $3,%%ecx    \n"
+        "   andl $7,%%edx    \n"
+        "   rep  movsq       \n"
+        "   test $4,%%edx    \n"
+        "   jz   1f          \n"
+        "   movsl            \n"
+#else /* __i386__ */
+        "   shrl $2,%%ecx    \n"
+        "   andl $3,%%edx    \n"
+        "   rep  movsl       \n"
+#endif
+        "1: test $2,%%edx    \n"
+        "   jz   1f          \n"
+        "   movsw            \n"
+        "1: test $1,%%edx    \n"
+        "   jz   1f          \n"
+        "   movsb            \n"
+        "1:                  \n"
+        : : "S" (src), "D" (dst), "d" (n) : "ecx" );
+}
+#else
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+    while (n >= sizeof(uint32_t)) {
         *((uint32_t *)dst) = *((uint32_t *)src);
         dst = ((uint32_t *)dst) + 1;
         src = ((uint32_t *)src) + 1;
+        n -= sizeof(uint32_t);
     }
 
     if (n & 2) {
@@ -470,6 +491,7 @@ void memcpy_words(void *dst, void *src, 
         src = ((uint8_t *)src) + 1;
     }
 }
+#endif
 
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
                             int len, int is_write)
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/vl.c  Wed Jun 20 12:49:27 2007 -0600
@@ -7059,6 +7059,18 @@ int main(int argc, char **argv)
 #endif
 
     char qemu_dm_logfilename[128];
+    
+    /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
+       then only the threads that use the signal unblock it -- this fixes a
+       race condition in Qcow support where the AIO signal is misdelivered.  */
+    {
+        extern const int aio_sig_num;
+        sigset_t set;
+
+        sigemptyset(&set);
+        sigaddset(&set, aio_sig_num);
+        sigprocmask(SIG_BLOCK, &set, NULL);
+    }
 
     LIST_INIT (&vm_change_state_head);
 #ifndef _WIN32
diff -r c20bc60f9243 -r 810885428743 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c     Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/libxc/xc_core.c     Wed Jun 20 12:49:27 2007 -0600
@@ -156,7 +156,7 @@ struct xc_core_section_headers {
     Elf64_Shdr  *shdrs;
 };
 #define SHDR_INIT       16
-#define SHDR_INC        4
+#define SHDR_INC        4U
 
 static struct xc_core_section_headers*
 xc_core_shdr_init(void)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Jun 20 12:49:27 2007 -0600
@@ -983,7 +983,7 @@ class XendDomainInfo:
                 self.info['VCPUs_live'] = vcpus
                 self._writeDom(self._vcpuDomDetails())
         else:
-            self.info['VCPUs_live'] = vcpus
+            self.info['VCPUs_max'] = vcpus
             xen.xend.XendDomain.instance().managed_config_save(self)
         log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
                  vcpus)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/server/blkif.py     Wed Jun 20 12:49:27 2007 -0600
@@ -98,6 +98,11 @@ class BlkifController(DevController):
 
         if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and
             dev == new_back['dev'] and mode == 'r'):
+            # dummy device
+            self.writeBackend(devid,
+                              'type', new_back['type'],
+                              'params', '')
+            # new backend-device
             self.writeBackend(devid,
                               'type', new_back['type'],
                               'params', new_back['params'])
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/domain.c        Wed Jun 20 12:49:27 2007 -0600
@@ -1146,9 +1146,8 @@ static void __init loaddomainelfimage(st
                        dom_imva = __va_ul(page_to_maddr(p));
                        if (filesz > 0) {
                                if (filesz >= PAGE_SIZE)
-                                       memcpy((void *) dom_imva,
-                                              (void *) elfaddr,
-                                              PAGE_SIZE);
+                                       copy_page((void *) dom_imva,
+                                                 (void *) elfaddr);
                                else {
                                        // copy partial page
                                        memcpy((void *) dom_imva,
@@ -1166,7 +1165,7 @@ static void __init loaddomainelfimage(st
                        }
                        else if (memsz > 0) {
                                 /* always zero out entire page */
-                               memset((void *) dom_imva, 0, PAGE_SIZE);
+                               clear_page((void *) dom_imva);
                        }
                        memsz -= PAGE_SIZE;
                        filesz -= PAGE_SIZE;
@@ -1367,7 +1366,7 @@ int __init construct_dom0(struct domain 
        if (start_info_page == NULL)
                panic("can't allocate start info page");
        si = page_to_virt(start_info_page);
-       memset(si, 0, PAGE_SIZE);
+       clear_page(si);
        snprintf(si->magic, sizeof(si->magic), "xen-%i.%i-ia64",
                xen_major_version(), xen_minor_version());
        si->nr_pages     = max_pages;
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/xenmem.c
--- a/xen/arch/ia64/xen/xenmem.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/xenmem.c        Wed Jun 20 12:49:27 2007 -0600
@@ -90,7 +90,7 @@ alloc_dir_page(void)
                panic("Not enough memory for virtual frame table!\n");
        ++table_size;
        dir = mfn << PAGE_SHIFT;
-       memset(__va(dir), 0, PAGE_SIZE);
+       clear_page(__va(dir));
        return dir;
 }
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/apic.c       Wed Jun 20 12:49:27 2007 -0600
@@ -817,7 +817,7 @@ void __init init_apic_mappings(void)
      */
     if (!smp_found_config && detect_init_APIC()) {
         apic_phys = __pa(alloc_xenheap_page());
-        memset(__va(apic_phys), 0, PAGE_SIZE);
+        clear_page(__va(apic_phys));
     } else
         apic_phys = mp_lapic_addr;
 
@@ -852,7 +852,7 @@ void __init init_apic_mappings(void)
             } else {
 fake_ioapic_page:
                 ioapic_phys = __pa(alloc_xenheap_page());
-                memset(__va(ioapic_phys), 0, PAGE_SIZE);
+                clear_page(__va(ioapic_phys));
             }
             set_fixmap_nocache(idx, ioapic_phys);
             apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/cmdline.S
--- a/xen/arch/x86/boot/cmdline.S       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/cmdline.S       Wed Jun 20 12:49:27 2007 -0600
@@ -119,30 +119,31 @@ 3:      pop     %edi
         ret
 
 .Lfind_option:
-        push    %ebx
-        push    4+8(%esp)
-        push    4+8(%esp)
+        mov     4(%esp),%eax
+        dec     %eax
+        push    %ebx
+1:      pushl   4+8(%esp)
+        inc     %eax
+        push    %eax
         call    .Lstrstr
         add     $8,%esp
         test    %eax,%eax
         jz      3f
         cmp     %eax,4+4(%esp)
-        je      1f
+        je      2f
         cmpb    $' ',-1(%eax)
-        jne     2f
-1:      mov     %eax,%ebx
-        push    4+8(%esp)
+        jne     1b
+2:      mov     %eax,%ebx
+        pushl   4+8(%esp)
         call    .Lstrlen
         add     $4,%esp
-        xchg    %eax,%ebx
-        add     %eax,%ebx
+        xadd    %eax,%ebx
         cmpb    $'\0',(%ebx)
         je      3f
         cmpb    $' ',(%ebx)
         je      3f
         cmpb    $'=',(%ebx)
-        je      3f
-2:      xor     %eax,%eax
+        jne     1b
 3:      pop     %ebx
         ret
 
@@ -297,7 +298,7 @@ 1:      lodsw
         call    .Lstr_prefix
         add     $8,%esp
         test    %eax,%eax
-        jnz     .Lcmdline_exit
+        jnz     .Lparse_vga_current
 
         /* We have 'vga=mode-<mode>'. */
         add     $5,%ebx
@@ -305,6 +306,19 @@ 1:      lodsw
         call    .Latoi
         add     $4,%esp
         mov     %ax,bootsym_phys(boot_vid_mode)
+        jmp     .Lcmdline_exit
+
+.Lparse_vga_current:
+        /* Check for 'vga=current'. */
+        push    %ebx
+        pushl   $sym_phys(.Lvga_current)
+        call    .Lstr_prefix
+        add     $8,%esp
+        test    %eax,%eax
+        jnz     .Lcmdline_exit
+
+        /* We have 'vga=current'. */
+        movw    $VIDEO_CURRENT_MODE,bootsym_phys(boot_vid_mode)
 
 .Lcmdline_exit:
         popa
@@ -328,6 +342,8 @@ 1:      lodsw
         .asciz  "gfx-"
 .Lvga_mode:
         .asciz  "mode-"
+.Lvga_current:
+        .asciz  "current"
 .Lno_rm_opt:
         .asciz  "no-real-mode"
 .Ledid_opt:
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/trampoline.S
--- a/xen/arch/x86/boot/trampoline.S    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/trampoline.S    Wed Jun 20 12:49:27 2007 -0600
@@ -13,12 +13,11 @@ trampoline_realmode_entry:
         cli
         lidt    bootsym(idt_48)
         lgdt    bootsym(gdt_48)
+        mov     $1,%bl                    # EBX != 0 indicates we are an AP
         xor     %ax, %ax
         inc     %ax
         lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
-        mov     $1,%bl                    # EBX != 0 indicates we are an AP
-        jmp     1f
-1:      ljmpl   $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
+        ljmpl   $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
 
 idt_48: .word   0, 0, 0 # base = limit = 0
 gdt_48: .word   6*8-1
@@ -135,10 +134,9 @@ trampoline_boot_cpu_entry:
         ljmp    $BOOT_PSEUDORM_CS,$bootsym(1f)
         .code16
 1:      mov     %eax,%cr0                 # CR0.PE = 0 (leave protected mode)
-        jmp     1f
 
         /* Load proper real-mode values into %cs, %ds, %es and %ss. */
-1:      ljmp    $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
+        ljmp    $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
 1:      mov     $(BOOT_TRAMPOLINE>>4),%ax
         mov     %ax,%ds
         mov     %ax,%es
@@ -166,10 +164,9 @@ 1:      mov     $(BOOT_TRAMPOLINE>>4),%a
         xor     %ax,%ax
         inc     %ax
         lmsw    %ax                       # CR0.PE = 1 (enter protected mode)
-        jmp     1f
 
         /* Load proper protected-mode values into all segment registers. */
-1:      ljmpl   $BOOT_CS32,$bootsym_phys(1f)
+        ljmpl   $BOOT_CS32,$bootsym_phys(1f)
         .code32
 1:      mov     $BOOT_DS,%eax
         mov     %eax,%ds
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.S
--- a/xen/arch/x86/boot/video.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.S Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,10 @@
 
 #include "video.h"
 
-#define modelist (0x3000)
+/* Scratch space layout. */
+#define modelist       (0x3000)
+#define vesa_glob_info (modelist + 1024)
+#define vesa_mode_info (vesa_glob_info + 1024)
 
 /* Retrieve Extended Display Identification Data. */
 #define CONFIG_FIRMWARE_EDID
@@ -109,7 +112,7 @@ mopar2: movb    %al, _param(PARAM_VIDEO_
 
 # Fetching of VESA frame buffer parameters
 mopar_gr:
-        leaw    modelist+1024, %di
+        leaw    vesa_mode_info, %di
         movb    $0x23, _param(PARAM_HAVE_VGA)
         movw    16(%di), %ax
         movw    %ax, _param(PARAM_LFB_LINELENGTH)
@@ -128,9 +131,7 @@ mopar_gr:
         movl    %eax, _param(PARAM_LFB_COLORS+4)
 
 # get video mem size
-        leaw    modelist+1024, %di
-        movw    $0x4f00, %ax
-        int     $0x10
+        leaw    vesa_glob_info, %di
         xorl    %eax, %eax
         movw    18(%di), %ax
         movl    %eax, _param(PARAM_LFB_SIZE)
@@ -183,7 +184,10 @@ dac_done:
 
         movw    %es, _param(PARAM_VESAPM_SEG)
         movw    %di, _param(PARAM_VESAPM_OFF)
-no_pm:  ret
+
+no_pm:  pushw   %ds
+        popw    %es
+        ret
 
 # The video mode menu
 mode_menu:
@@ -428,17 +432,13 @@ setmenu:
         jmp     mode_set
 
 check_vesa:
-#ifdef CONFIG_FIRMWARE_EDID
-        leaw    modelist+1024, %di
+        leaw    vesa_glob_info, %di
         movw    $0x4f00, %ax
         int     $0x10
         cmpw    $0x004f, %ax
         jnz     setbad
 
-        movw    4(%di), %ax
-        movw    %ax, bootsym(vbe_version)
-#endif
-        leaw    modelist+1024, %di
+        leaw    vesa_mode_info, %di
         subb    $VIDEO_FIRST_VESA>>8, %bh
         movw    %bx, %cx                # Get mode information structure
         movw    $0x4f01, %ax
@@ -447,7 +447,7 @@ check_vesa:
         cmpw    $0x004f, %ax
         jnz     setbad
 
-        movb    (%di), %al              # Check capabilities.
+        movb    (%di), %al              # Check mode attributes.
         andb    $0x99, %al
         cmpb    $0x99, %al
         jnz     _setbad                 # Doh! No linear frame buffer.
@@ -530,6 +530,7 @@ spec_inits:
         .word   bootsym(set_8pixel)
         .word   bootsym(set_80x43)
         .word   bootsym(set_80x28)
+        .word   bootsym(set_current)
         .word   bootsym(set_80x30)
         .word   bootsym(set_80x34)
         .word   bootsym(set_80x60)
@@ -575,6 +576,7 @@ set14:  movw    $0x1111, %ax            
         movb    $0x01, %ah              # Define cursor scan lines 11-12
         movw    $0x0b0c, %cx
         int     $0x10
+set_current:
         stc
         ret
 
@@ -695,33 +697,34 @@ vga_modes_end:
 # Detect VESA modes.
 vesa_modes:
         movw    %di, %bp                # BP=original mode table end
-        addw    $0x200, %di             # Buffer space
+        leaw    vesa_glob_info, %di
         movw    $0x4f00, %ax            # VESA Get card info call
         int     $0x10
+        movw    %di, %si
         movw    %bp, %di
         cmpw    $0x004f, %ax            # Successful?
         jnz     ret0
         
-        cmpw    $0x4556, 0x200(%di)     # 'VE'
+        cmpw    $0x4556, (%si)          # 'VE'
         jnz     ret0
         
-        cmpw    $0x4153, 0x202(%di)     # 'SA'
+        cmpw    $0x4153, 2(%si)         # 'SA'
         jnz     ret0
         
         movw    $bootsym(vesa_name), bootsym(card_name) # Set name to "VESA 
VGA"
         pushw   %gs
-        lgsw    0x20e(%di), %si         # GS:SI=mode list
+        lgsw    0xe(%si), %si           # GS:SI=mode list
         movw    $128, %cx               # Iteration limit
 vesa1:
         gs;     lodsw
-        cmpw    $0xffff, %ax                        # End of the table?
+        cmpw    $0xffff, %ax            # End of the table?
         jz      vesar
         
-        cmpw    $0x0080, %ax                        # Check validity of mode ID
+        cmpw    $0x0080, %ax            # Check validity of mode ID
         jc      vesa2
         
-        orb     %ah, %ah        # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
-        jz      vesan                # Certain BIOSes report 0x80-0xff!
+        orb     %ah, %ah                # Valid IDs 0x0000-0x007f/0x0100-0x07ff
+        jz      vesan                   # Certain BIOSes report 0x80-0xff!
 
         cmpw    $0x0800, %ax
         jnc     vesae
@@ -891,8 +894,13 @@ store_edid:
         cmpb    $1, bootsym(opt_edid)   # EDID disabled on cmdline (edid=no)?
         je      .Lno_edid
 
-        cmpw    $0x0200, bootsym(vbe_version)  # only do EDID on >= VBE2.0
-        jl      .Lno_edid
+        leaw    vesa_glob_info, %di
+        movw    $0x4f00, %ax
+        int     $0x10
+        cmpw    $0x004f, %ax
+        jne     .Lno_edid
+        cmpw    $0x0200, 4(%di)         # only do EDID on >= VBE2.0
+        jb      .Lno_edid
 
         xorw    %di, %di                # Report Capability
         pushw   %di
@@ -901,6 +909,8 @@ store_edid:
         xorw    %bx, %bx
         xorw    %cx, %cx
         int     $0x10
+        pushw   %ds
+        popw    %es
         cmpw    $0x004f, %ax            # Call failed?
         jne     .Lno_edid
 
@@ -920,8 +930,6 @@ store_edid:
         movw    $0x01, %bx
         movw    $0x00, %cx
         movw    $0x00, %dx
-        pushw   %ds
-        popw    %es
         movw    $bootsym(boot_edid_info), %di
         int     $0x10
 
@@ -940,7 +948,6 @@ card_name:      .word   0       # Pointe
 card_name:      .word   0       # Pointer to adapter name
 graphic_mode:   .byte   0       # Graphic mode with a linear frame buffer
 dac_size:       .byte   6       # DAC bit depth
-vbe_version:    .word   0       # VBE bios version
 
 # Status messages
 keymsg:         .ascii  "Press <RETURN> to see video modes available,"
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.h
--- a/xen/arch/x86/boot/video.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.h Wed Jun 20 12:49:27 2007 -0600
@@ -16,10 +16,11 @@
 #define VIDEO_80x50         0x0f01
 #define VIDEO_80x43         0x0f02
 #define VIDEO_80x28         0x0f03
-#define VIDEO_80x30         0x0f04
-#define VIDEO_80x34         0x0f05
-#define VIDEO_80x60         0x0f06
-#define VIDEO_LAST_SPECIAL  0x0f07
+#define VIDEO_CURRENT_MODE  0x0f04
+#define VIDEO_80x30         0x0f05
+#define VIDEO_80x34         0x0f06
+#define VIDEO_80x60         0x0f07
+#define VIDEO_LAST_SPECIAL  0x0f08
 
 #define ASK_VGA             0xfffd
 #define VIDEO_VESA_BY_SIZE  0xffff
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S        Wed Jun 20 12:49:27 2007 -0600
@@ -30,9 +30,7 @@ 1:      mov     %eax,(%edi)
         loop    1b
                 
         /* Pass off the Multiboot info structure to C land. */
-        mov     multiboot_ptr,%eax
-        add     $__PAGE_OFFSET,%eax
-        push    %eax
+        pushl   multiboot_ptr
         call    __start_xen
         ud2     /* Force a panic (invalid opcode). */
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S        Wed Jun 20 12:49:27 2007 -0600
@@ -51,8 +51,6 @@ 1:      movq    %rax,(%rdi)
 
         /* Pass off the Multiboot info structure to C land. */
         mov     multiboot_ptr(%rip),%edi
-        lea     start-0x100000(%rip),%rax
-        add     %rax,%rdi
         call    __start_xen
         ud2     /* Force a panic (invalid opcode). */
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain.c     Wed Jun 20 12:49:27 2007 -0600
@@ -232,26 +232,28 @@ static int setup_compat_l4(struct vcpu *
     l4_pgentry_t *l4tab;
     int rc;
 
-    if ( !pg )
+    if ( pg == NULL )
         return -ENOMEM;
 
     /* This page needs to look like a pagetable so that it can be shadowed */
     pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated;
 
     l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+    l4tab[0] = l4e_empty();
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_page(pg, __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
         l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
                        __PAGE_HYPERVISOR);
+
+    if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
+    {
+        free_domheap_page(pg);
+        return rc;
+    }
+
     v->arch.guest_table = pagetable_from_page(pg);
     v->arch.guest_table_user = v->arch.guest_table;
-
-    if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
-    {
-        free_domheap_page(pg);
-        return rc;
-    }
 
     return 0;
 }
@@ -318,11 +320,11 @@ int switch_compat(struct domain *d)
     gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
     for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
     {
+        if ( (d->vcpu[vcpuid] != NULL) &&
+             (setup_compat_l4(d->vcpu[vcpuid]) != 0) )
+            goto undo_and_fail;
         d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
                                  FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
-        if (d->vcpu[vcpuid]
-            && setup_compat_l4(d->vcpu[vcpuid]) != 0)
-            return -ENOMEM;
     }
 
     d->arch.physaddr_bitsize =
@@ -330,6 +332,19 @@ int switch_compat(struct domain *d)
         + (PAGE_SIZE - 2);
 
     return 0;
+
+ undo_and_fail:
+    d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
+    release_arg_xlat_area(d);
+    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
+    while ( vcpuid-- != 0 )
+    {
+        if ( d->vcpu[vcpuid] != NULL )
+            release_compat_l4(d->vcpu[vcpuid]);
+        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+    }
+    return -ENOMEM;
 }
 
 #else
@@ -461,7 +476,7 @@ int arch_domain_create(struct domain *d)
         if ( (d->shared_info = alloc_xenheap_page()) == NULL )
             goto fail;
 
-        memset(d->shared_info, 0, PAGE_SIZE);
+        clear_page(d->shared_info);
         share_xen_page_with_guest(
             virt_to_page(d->shared_info), d, XENSHARE_writable);
     }
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain_build.c       Wed Jun 20 12:49:27 2007 -0600
@@ -505,7 +505,7 @@ int __init construct_dom0(
     v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
 #else
     l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
-    memcpy(l2tab, idle_pg_table, PAGE_SIZE);
+    copy_page(l2tab, idle_pg_table);
     l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
         l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
     v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
@@ -645,7 +645,7 @@ int __init construct_dom0(
             panic("Not enough RAM for domain 0 PML4.\n");
         l4start = l4tab = page_to_virt(page);
     }
-    memcpy(l4tab, idle_pg_table, PAGE_SIZE);
+    copy_page(l4tab, idle_pg_table);
     l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
     l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
@@ -823,7 +823,7 @@ int __init construct_dom0(
 
     /* Set up start info area. */
     si = (start_info_t *)vstartinfo_start;
-    memset(si, 0, PAGE_SIZE);
+    clear_page(si);
     si->nr_pages = nr_pages;
 
     si->shared_info = virt_to_maddr(d->shared_info);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/flushtlb.c
--- a/xen/arch/x86/flushtlb.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/flushtlb.c   Wed Jun 20 12:49:27 2007 -0600
@@ -80,6 +80,8 @@ void write_cr3(unsigned long cr3)
 
     t = pre_flush();
 
+    hvm_flush_guest_tlbs();
+
 #ifdef USER_MAPPINGS_ARE_GLOBAL
     __pge_off();
     __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
@@ -103,6 +105,8 @@ void local_flush_tlb(void)
 
     t = pre_flush();
 
+    hvm_flush_guest_tlbs();
+
 #ifdef USER_MAPPINGS_ARE_GLOBAL
     __pge_off();
     __pge_on();
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c    Wed Jun 20 12:49:27 2007 -0600
@@ -831,11 +831,24 @@ void hvm_update_guest_cr3(struct vcpu *v
     hvm_funcs.update_guest_cr3(v);
 }
 
+static void hvm_latch_shinfo_size(struct domain *d)
+{
+    /*
+     * Called from operations which are among the very first executed by
+     * PV drivers on initialisation or after save/restore. These are sensible
+     * points at which to sample the execution mode of the guest and latch
+     * 32- or 64-bit format for shared state.
+     */
+    if ( current->domain == d )
+        d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+}
+
 /* Initialise a hypercall transfer page for a VMX domain using
    paravirtualised drivers. */
 void hvm_hypercall_page_initialise(struct domain *d,
                                    void *hypercall_page)
 {
+    hvm_latch_shinfo_size(d);
     hvm_funcs.init_hypercall_page(d, hypercall_page);
 }
 
@@ -1065,13 +1078,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
                 break;
             case HVM_PARAM_CALLBACK_IRQ:
                 hvm_set_callback_via(d, a.value);
-                /*
-                 * Since this operation is one of the very first executed
-                 * by PV drivers on initialisation or after save/restore, it
-                 * is a sensible point at which to sample the execution mode of
-                 * the guest and latch 32- or 64-bit format for shared state.
-                 */
-                d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+                hvm_latch_shinfo_size(d);
                 break;
             }
             d->arch.hvm_domain.params[a.index] = a.value;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/irq.c    Wed Jun 20 12:49:27 2007 -0600
@@ -285,43 +285,49 @@ void hvm_set_callback_via(struct domain 
     }
 }
 
-int cpu_has_pending_irq(struct vcpu *v)
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
 {
     struct hvm_domain *plat = &v->domain->arch.hvm_domain;
 
-    /* APIC */
+    if ( unlikely(v->arch.hvm_vcpu.nmi_pending) )
+        return hvm_intack_nmi;
+
     if ( vlapic_has_interrupt(v) != -1 )
-        return 1;
-
-    /* PIC */
+        return hvm_intack_lapic;
+
     if ( !vlapic_accept_pic_intr(v) )
-        return 0;
-
-    return plat->vpic[0].int_output;
-}
-
-int cpu_get_interrupt(struct vcpu *v, int *type)
-{
-    int vector;
-
-    if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 )
-        return vector;
-
-    if ( (v->vcpu_id == 0) &&
-         ((vector = cpu_get_pic_interrupt(v, type)) != -1) )
-        return vector;
-
-    return -1;
-}
-
-int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type)
+        return hvm_intack_none;
+
+    return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none;
+}
+
+int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector)
+{
+    switch ( type )
+    {
+    case hvm_intack_nmi:
+        return test_and_clear_bool(v->arch.hvm_vcpu.nmi_pending);
+    case hvm_intack_lapic:
+        return ((*vector = cpu_get_apic_interrupt(v)) != -1);
+    case hvm_intack_pic:
+        ASSERT(v->vcpu_id == 0);
+        return ((*vector = cpu_get_pic_interrupt(v)) != -1);
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src)
 {
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
 
-    if ( type == APIC_DM_EXTINT )
+    if ( src == hvm_intack_pic )
         return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base
                 + (isa_irq & 7));
 
+    ASSERT(src == hvm_intack_lapic);
     return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector;
 }
 
@@ -337,19 +343,20 @@ int is_isa_irq_masked(struct vcpu *v, in
             domain_vioapic(v->domain)->redirtbl[gsi].fields.mask);
 }
 
-/*
- * TODO: 1. Should not need special treatment of event-channel events.
- *       2. Should take notice of interrupt shadows (or clear them).
- */
 int hvm_local_events_need_delivery(struct vcpu *v)
 {
-    int pending;
-
-    pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v));
-    if ( unlikely(pending) )
-        pending = hvm_interrupts_enabled(v); 
-
-    return pending;
+    enum hvm_intack type;
+
+    /* TODO: Get rid of event-channel special case. */
+    if ( vcpu_info(v, evtchn_upcall_pending) )
+        type = hvm_intack_pic;
+    else
+        type = hvm_vcpu_has_pending_irq(v);
+
+    if ( likely(type == hvm_intack_none) )
+        return 0;
+
+    return hvm_interrupts_enabled(v, type);
 }
 
 #if 0 /* Keep for debugging */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/asid.c
--- a/xen/arch/x86/hvm/svm/asid.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/asid.c       Wed Jun 20 12:49:27 2007 -0600
@@ -60,7 +60,7 @@ struct svm_asid_data {
    u64 core_asid_generation;
    u32 next_asid;
    u32 max_asid;
-   u32 erratum170;
+   u32 erratum170:1;
 };
 
 static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data);
@@ -140,25 +140,21 @@ void svm_asid_init_vcpu(struct vcpu *v)
 }
 
 /*
- * Increase the Generation to make free ASIDs.  Flush physical TLB and give
- * ASID.
- */
-static void svm_asid_handle_inc_generation(struct vcpu *v)
-{
-    struct svm_asid_data *data = svm_asid_core_data();
-
-    if ( likely(data->core_asid_generation <  SVM_ASID_LAST_GENERATION) )
-    {
-        /* Handle ASID overflow. */
+ * Increase the Generation to make free ASIDs, and indirectly cause a 
+ * TLB flush of all ASIDs on the next vmrun.
+ */
+void svm_asid_inc_generation(void)
+{
+    struct svm_asid_data *data = svm_asid_core_data();
+
+    if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) )
+    {
+        /* Move to the next generation.  We can't flush the TLB now
+         * because you need to vmrun to do that, and current might not
+         * be a HVM vcpu, but the first HVM vcpu that runs after this 
+         * will pick up ASID 1 and flush the TLBs. */
         data->core_asid_generation++;
-        data->next_asid = SVM_ASID_FIRST_GUEST_ASID + 1;
-
-        /* Handle VCPU. */
-        v->arch.hvm_svm.vmcb->guest_asid = SVM_ASID_FIRST_GUEST_ASID;
-        v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
-
-        /* Trigger flush of physical TLB. */
-        v->arch.hvm_svm.vmcb->tlb_control = 1;
+        data->next_asid = SVM_ASID_FIRST_GUEST_ASID;
         return;
     }
 
@@ -168,11 +164,12 @@ static void svm_asid_handle_inc_generati
      * this core (flushing TLB always). So correctness is established; it
      * only runs a bit slower.
      */
-    printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
-    data->erratum170 = 1;
-    data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
-
-    svm_asid_init_vcpu(v);
+    if ( !data->erratum170 )
+    {
+        printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
+        data->erratum170 = 1;
+        data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
+    }
 }
 
 /*
@@ -202,18 +199,21 @@ asmlinkage void svm_asid_handle_vmrun(vo
         return;
     }
 
-    /* Different ASID generations trigger fetching of a fresh ASID. */
-    if ( likely(data->next_asid <= data->max_asid) )
-    {
-        /* There is a free ASID. */
-        v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
-        v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
-        v->arch.hvm_svm.vmcb->tlb_control = 0;
-        return;
-    }
-
-    /* Slow path, may cause TLB flush. */
-    svm_asid_handle_inc_generation(v);
+    /* If there are no free ASIDs, need to go to a new generation */
+    if ( unlikely(data->next_asid > data->max_asid) )
+        svm_asid_inc_generation();
+
+    /* Now guaranteed to be a free ASID. */
+    v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
+    v->arch.hvm_svm.asid_generation  = data->core_asid_generation;
+
+    /* When we assign ASID 1, flush all TLB entries.  We need to do it 
+     * here because svm_asid_inc_generation() can be called at any time, 
+     * but the TLB flush can only happen on vmrun. */
+    if ( v->arch.hvm_svm.vmcb->guest_asid == SVM_ASID_FIRST_GUEST_ASID )
+        v->arch.hvm_svm.vmcb->tlb_control = 1;
+    else
+        v->arch.hvm_svm.vmcb->tlb_control = 0;
 }
 
 void svm_asid_inv_asid(struct vcpu *v)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c       Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
  */
 
 #include <xen/config.h>
@@ -39,100 +38,119 @@
 #include <xen/domain_page.h>
 #include <asm/hvm/trace.h>
 
-/*
- * Most of this code is copied from vmx_io.c and modified 
- * to be suitable for SVM.
- */
-
-static inline int svm_inject_extint(struct vcpu *v, int trap)
+static void svm_inject_dummy_vintr(struct vcpu *v)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     vintr_t intr = vmcb->vintr;
 
-    /* Update only relevant fields */    
     intr.fields.irq = 1;
     intr.fields.intr_masking = 1;
-    intr.fields.vector = trap;
+    intr.fields.vector = 0;
     intr.fields.prio = 0xF;
     intr.fields.ign_tpr = 1;
     vmcb->vintr = intr;
+}
+    
+static void svm_inject_nmi(struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    eventinj_t event;
 
-    return 0;
+    event.bytes = 0;
+    event.fields.v = 1;
+    event.fields.type = EVENTTYPE_NMI;
+    event.fields.vector = 2;
+
+    ASSERT(vmcb->eventinj.fields.v == 0);
+    vmcb->eventinj = event;
+}
+    
+static void svm_inject_extint(struct vcpu *v, int vector)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    eventinj_t event;
+
+    event.bytes = 0;
+    event.fields.v = 1;
+    event.fields.type = EVENTTYPE_INTR;
+    event.fields.vector = vector;
+
+    ASSERT(vmcb->eventinj.fields.v == 0);
+    vmcb->eventinj = event;
 }
     
 asmlinkage void svm_intr_assist(void) 
 {
     struct vcpu *v = current;
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    int intr_type = APIC_DM_EXTINT;
-    int intr_vector = -1;
+    enum hvm_intack intr_source;
+    int intr_vector;
 
     /*
-     * Previous Interrupt delivery caused this intercept?
+     * Previous event delivery caused this intercept?
      * This will happen if the injection is latched by the processor (hence
-     * clearing vintr.fields.irq) but then subsequently a fault occurs (e.g.,
-     * due to lack of shadow mapping of guest IDT or guest-kernel stack).
-     * 
-     * NB. Exceptions that fault during delivery are lost. This needs to be
-     * fixed but we'll usually get away with it since faults are usually
-     * idempotent. But this isn't the case for e.g. software interrupts!
+     * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
+     * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
+     * stack).
      */
-    if ( vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0) )
+    if ( vmcb->exitintinfo.fields.v )
     {
-        intr_vector = vmcb->exitintinfo.fields.vector;
+        vmcb->eventinj = vmcb->exitintinfo;
         vmcb->exitintinfo.bytes = 0;
         HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
-        svm_inject_extint(v, intr_vector);
         return;
     }
 
-    /*
-     * Previous interrupt still pending? This occurs if we return from VMRUN
-     * very early in the entry-to-guest process. Usually this is because an
-     * external physical interrupt was pending when we executed VMRUN.
-     */
-    if ( vmcb->vintr.fields.irq )
-        return;
-
-    /* Crank the handle on interrupt state and check for new interrrupts. */
+    /* Crank the handle on interrupt state. */
     pt_update_irq(v);
     hvm_set_callback_irq_level();
-    if ( !cpu_has_pending_irq(v) )
-        return;
 
-    /*
-     * If the guest can't take an interrupt right now, create a 'fake'
-     * virtual interrupt on to intercept as soon as the guest _can_ take
-     * interrupts.  Do not obtain the next interrupt from the vlapic/pic
-     * if unable to inject.
-     *
-     * Also do this if there is an exception pending.  This is because
-     * the delivery of the exception can arbitrarily delay the injection
-     * of the vintr (for example, if the exception is handled via an
-     * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
-     * - the vTPR could be modified upwards, so we need to wait until the
-     *   exception is delivered before we can safely decide that an
-     *   interrupt is deliverable; and
-     * - the guest might look at the APIC/PIC state, so we ought not to have 
-     *   cleared the interrupt out of the IRR.
-     */
-    if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow 
-         || vmcb->eventinj.fields.v )  
+    do {
+        intr_source = hvm_vcpu_has_pending_irq(v);
+        if ( likely(intr_source == hvm_intack_none) )
+            return;
+
+        /*
+         * If the guest can't take an interrupt right now, create a 'fake'
+         * virtual interrupt on to intercept as soon as the guest _can_ take
+         * interrupts.  Do not obtain the next interrupt from the vlapic/pic
+         * if unable to inject.
+         *
+         * Also do this if there is an injection already pending. This is
+         * because the event delivery can arbitrarily delay the injection
+         * of the vintr (for example, if the exception is handled via an
+         * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
+         * - the vTPR could be modified upwards, so we need to wait until the
+         *   exception is delivered before we can safely decide that an
+         *   interrupt is deliverable; and
+         * - the guest might look at the APIC/PIC state, so we ought not to
+         *   have cleared the interrupt out of the IRR.
+         *
+         * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
+         * shadow. This is hard to do without hardware support. We should also
+         * track 'NMI blocking' from NMI injection until IRET. This can be done
+         * quite easily in software by intercepting the unblocking IRET.
+         */
+        if ( !hvm_interrupts_enabled(v, intr_source) ||
+             vmcb->eventinj.fields.v )
+        {
+            vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
+            HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+            svm_inject_dummy_vintr(v);
+            return;
+        }
+    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
+
+    if ( intr_source == hvm_intack_nmi )
     {
-        vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
-        HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
-        svm_inject_extint(v, 0x0); /* actual vector doesn't matter */
-        return;
+        svm_inject_nmi(v);
     }
-
-    /* Okay, we can deliver the interrupt: grab it and update PIC state. */
-    intr_vector = cpu_get_interrupt(v, &intr_type);
-    BUG_ON(intr_vector < 0);
-
-    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
-    svm_inject_extint(v, intr_vector);
-
-    pt_intr_post(v, intr_vector, intr_type);
+    else
+    {
+        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+        svm_inject_extint(v, intr_vector);
+        pt_intr_post(v, intr_vector, intr_source);
+    }
 }
 
 /*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Jun 20 12:49:27 2007 -0600
@@ -312,26 +312,8 @@ int svm_vmcb_save(struct vcpu *v, struct
     c->sysenter_esp = vmcb->sysenter_esp;
     c->sysenter_eip = vmcb->sysenter_eip;
 
-    /* Save any event/interrupt that was being injected when we last
-     * exited.  Although there are three(!) VMCB fields that can contain
-     * active events, we only need to save at most one: because the
-     * intr_assist logic never delivers an IRQ when any other event is
-     * active, we know that the only possible collision is if we inject
-     * a fault while exitintinfo contains a valid event (the delivery of
-     * which caused the last exit).  In that case replaying just the
-     * first event should cause the same behaviour when we restore. */
-    if ( vmcb->vintr.fields.irq 
-         && /* Check it's not a fake interrupt (see svm_intr_assist()) */
-         !(vmcb->general1_intercepts & GENERAL1_INTERCEPT_VINTR) )
-    {
-        c->pending_vector = vmcb->vintr.fields.vector;
-        c->pending_type = 0; /* External interrupt */
-        c->pending_error_valid = 0;
-        c->pending_reserved = 0;
-        c->pending_valid = 1;
-        c->error_code = 0;
-    }
-    else if ( vmcb->exitintinfo.fields.v )
+    /* Save any event/interrupt that was being injected when we last exited. */
+    if ( vmcb->exitintinfo.fields.v )
     {
         c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
         c->error_code = vmcb->exitintinfo.fields.errorcode;
@@ -569,10 +551,15 @@ static inline void svm_restore_dr(struct
         __restore_debug_registers(v);
 }
 
-static int svm_interrupts_enabled(struct vcpu *v)
-{
-    unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
-    return !irq_masked(eflags); 
+static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if ( type == hvm_intack_nmi )
+        return !vmcb->interrupt_shadow;
+
+    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+    return !irq_masked(vmcb->rflags) && !vmcb->interrupt_shadow; 
 }
 
 static int svm_guest_x86_mode(struct vcpu *v)
@@ -596,6 +583,14 @@ static void svm_update_guest_cr3(struct 
 static void svm_update_guest_cr3(struct vcpu *v)
 {
     v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
+}
+
+static void svm_flush_guest_tlbs(void)
+{
+    /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
+     * next VMRUN.  (If ASIDs are disabled, the whole TLB is flushed on
+     * VMRUN anyway). */
+    svm_asid_inc_generation();
 }
 
 static void svm_update_vtpr(struct vcpu *v, unsigned long value)
@@ -770,8 +765,6 @@ static void svm_init_hypercall_page(stru
 {
     char *p;
     int i;
-
-    memset(hypercall_page, 0, PAGE_SIZE);
 
     for ( i = 0; i < (PAGE_SIZE / 32); i++ )
     {
@@ -948,6 +941,7 @@ static struct hvm_function_table svm_fun
     .get_segment_register = svm_get_segment_register,
     .update_host_cr3      = svm_update_host_cr3,
     .update_guest_cr3     = svm_update_guest_cr3,
+    .flush_guest_tlbs     = svm_flush_guest_tlbs,
     .update_vtpr          = svm_update_vtpr,
     .stts                 = svm_stts,
     .set_tsc_offset       = svm_set_tsc_offset,
@@ -957,7 +951,7 @@ static struct hvm_function_table svm_fun
     .event_injection_faulted = svm_event_injection_faulted
 };
 
-void svm_npt_detect(void)
+static void svm_npt_detect(void)
 {
     u32 eax, ebx, ecx, edx;
 
@@ -1017,6 +1011,9 @@ int start_svm(struct cpuinfo_x86 *c)
 
     hvm_enable(&svm_function_table);
 
+    if ( opt_hap_enabled )
+        printk("SVM: Nested paging enabled.\n");
+        
     return 1;
 }
 
@@ -1477,7 +1474,7 @@ static void svm_io_instruction(struct vc
 
     /* Copy current guest state into io instruction state structure. */
     memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
-    hvm_store_cpu_guest_regs(v, regs, NULL);
+    svm_store_cpu_guest_regs(v, regs, NULL);
 
     info.bytes = vmcb->exitinfo1;
 
@@ -2148,11 +2145,14 @@ static inline void svm_do_msr_access(
 
 static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
 {
+    enum hvm_intack type = hvm_vcpu_has_pending_irq(current);
+
     __update_guest_eip(vmcb, 1);
 
     /* Check for interrupt not handled or new interrupt. */
-    if ( (vmcb->rflags & X86_EFLAGS_IF) &&
-         (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) {
+    if ( vmcb->eventinj.fields.v ||
+         ((type != hvm_intack_none) && svm_interrupts_enabled(current, type)) )
+    {
         HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
         return;
     }
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Wed Jun 20 12:49:27 2007 -0600
@@ -56,7 +56,7 @@ struct vmcb_struct *alloc_vmcb(void)
         return NULL;
     }
 
-    memset(vmcb, 0, PAGE_SIZE);
+    clear_page(vmcb);
     return vmcb;
 }
 
@@ -72,11 +72,11 @@ struct host_save_area *alloc_host_save_a
     hsa = alloc_xenheap_page();
     if ( hsa == NULL )
     {
-        printk(XENLOG_WARNING "Warning: failed to allocate vmcb.\n");
+        printk(XENLOG_WARNING "Warning: failed to allocate hsa.\n");
         return NULL;
     }
 
-    memset(hsa, 0, PAGE_SIZE);
+    clear_page(hsa);
     return hsa;
 }
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vioapic.c        Wed Jun 20 12:49:27 2007 -0600
@@ -254,17 +254,11 @@ static void ioapic_inj_irq(
     HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
                 vector, trig_mode, delivery_mode);
 
-    switch ( delivery_mode )
-    {
-    case dest_Fixed:
-    case dest_LowestPrio:
-        if ( vlapic_set_irq(target, vector, trig_mode) )
-            vcpu_kick(vlapic_vcpu(target));
-        break;
-    default:
-        gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode);
-        break;
-    }
+    ASSERT((delivery_mode == dest_Fixed) ||
+           (delivery_mode == dest_LowestPrio));
+
+    if ( vlapic_set_irq(target, vector, trig_mode) )
+        vcpu_kick(vlapic_vcpu(target));
 }
 
 static uint32_t ioapic_get_delivery_bitmask(
@@ -368,7 +362,6 @@ static void vioapic_deliver(struct hvm_h
     }
 
     case dest_Fixed:
-    case dest_ExtINT:
     {
         uint8_t bit;
         for ( bit = 0; deliver_bitmask != 0; bit++ )
@@ -393,10 +386,21 @@ static void vioapic_deliver(struct hvm_h
         break;
     }
 
-    case dest_SMI:
     case dest_NMI:
-    case dest_INIT:
-    case dest__reserved_2:
+    {
+        uint8_t bit;
+        for ( bit = 0; deliver_bitmask != 0; bit++ )
+        {
+            if ( !(deliver_bitmask & (1 << bit)) )
+                continue;
+            deliver_bitmask &= ~(1 << bit);
+            if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) &&
+                 !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+                vcpu_kick(v);
+        }
+        break;
+    }
+
     default:
         gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n",
                  delivery_mode);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:49:27 2007 -0600
@@ -294,7 +294,8 @@ static int vlapic_accept_irq(struct vcpu
         break;
 
     case APIC_DM_NMI:
-        gdprintk(XENLOG_WARNING, "Ignoring guest NMI\n");
+        if ( !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+            vcpu_kick(v);
         break;
 
     case APIC_DM_INIT:
@@ -747,7 +748,7 @@ int vlapic_has_interrupt(struct vcpu *v)
     return highest_irr;
 }
 
-int cpu_get_apic_interrupt(struct vcpu *v, int *mode)
+int cpu_get_apic_interrupt(struct vcpu *v)
 {
     int vector = vlapic_has_interrupt(v);
     struct vlapic *vlapic = vcpu_vlapic(v);
@@ -757,8 +758,6 @@ int cpu_get_apic_interrupt(struct vcpu *
  
     vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
     vlapic_clear_irr(vector, vlapic);
-
-    *mode = APIC_DM_FIXED;
     return vector;
 }
 
@@ -935,7 +934,7 @@ int vlapic_init(struct vcpu *v)
        return -ENOMEM;
     }
 
-    memset(vlapic->regs, 0, PAGE_SIZE);
+    clear_page(vlapic->regs);
 
     vlapic_reset(vlapic);
 
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c       Wed Jun 20 12:49:27 2007 -0600
@@ -102,8 +102,8 @@ static void update_tpr_threshold(struct 
 
 asmlinkage void vmx_intr_assist(void)
 {
-    int has_ext_irq, intr_vector, intr_type = 0;
-    unsigned long eflags, intr_shadow;
+    int intr_vector;
+    enum hvm_intack intr_source;
     struct vcpu *v = current;
     unsigned int idtv_info_field;
     unsigned long inst_len;
@@ -114,65 +114,67 @@ asmlinkage void vmx_intr_assist(void)
 
     update_tpr_threshold(vcpu_vlapic(v));
 
-    has_ext_irq = cpu_has_pending_irq(v);
+    do {
+        intr_source = hvm_vcpu_has_pending_irq(v);
 
-    if ( unlikely(v->arch.hvm_vmx.vector_injected) )
-    {
-        v->arch.hvm_vmx.vector_injected = 0;
-        if ( unlikely(has_ext_irq) )
-            enable_irq_window(v);
-        return;
-    }
+        if ( unlikely(v->arch.hvm_vmx.vector_injected) )
+        {
+            v->arch.hvm_vmx.vector_injected = 0;
+            if ( unlikely(intr_source != hvm_intack_none) )
+                enable_irq_window(v);
+            return;
+        }
 
-    /* This could be moved earlier in the VMX resume sequence. */
-    idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
-    if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
-    {
-        __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+        /* This could be moved earlier in the VMX resume sequence. */
+        idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
+        if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
+        {
+            __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+
+            /*
+             * Safe: the length will only be interpreted for software
+             * exceptions and interrupts. If we get here then delivery of some
+             * event caused a fault, and this always results in defined
+             * VM_EXIT_INSTRUCTION_LEN.
+             */
+            inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
+            __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+
+            if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
+                __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                          __vmread(IDT_VECTORING_ERROR_CODE));
+            if ( unlikely(intr_source != hvm_intack_none) )
+                enable_irq_window(v);
+
+            HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
+            return;
+        }
+
+        if ( likely(intr_source == hvm_intack_none) )
+            return;
 
         /*
-         * Safe: the length will only be interpreted for software exceptions
-         * and interrupts. If we get here then delivery of some event caused a
-         * fault, and this always results in defined VM_EXIT_INSTRUCTION_LEN.
+         * TODO: Better NMI handling. Shouldn't wait for EFLAGS.IF==1, but
+         * should wait for exit from 'NMI blocking' window (NMI injection to
+         * next IRET). This requires us to use the new 'virtual NMI' support.
          */
-        inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
-        __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+        if ( !hvm_interrupts_enabled(v, intr_source) )
+        {
+            enable_irq_window(v);
+            return;
+        }
+    } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
 
-        if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
-            __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
-                      __vmread(IDT_VECTORING_ERROR_CODE));
-        if ( unlikely(has_ext_irq) )
-            enable_irq_window(v);
-
-        HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
-        return;
+    if ( intr_source == hvm_intack_nmi )
+    {
+        vmx_inject_nmi(v);
     }
-
-    if ( likely(!has_ext_irq) )
-        return;
-
-    intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
-    if ( unlikely(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)) )
+    else
     {
-        enable_irq_window(v);
-        HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility");
-        return;
+        HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+        vmx_inject_extint(v, intr_vector);
+        pt_intr_post(v, intr_vector, intr_source);
     }
-
-    eflags = __vmread(GUEST_RFLAGS);
-    if ( irq_masked(eflags) )
-    {
-        enable_irq_window(v);
-        return;
-    }
-
-    intr_vector = cpu_get_interrupt(v, &intr_type);
-    BUG_ON(intr_vector < 0);
-
-    HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
-    vmx_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE);
-
-    pt_intr_post(v, intr_vector, intr_type);
 }
 
 /*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Wed Jun 20 12:49:27 2007 -0600
@@ -158,7 +158,7 @@ static struct vmcs_struct *vmx_alloc_vmc
         return NULL;
     }
 
-    memset(vmcs, 0, PAGE_SIZE);
+    clear_page(vmcs);
     vmcs->vmcs_revision_id = vmcs_revision_id;
 
     return vmcs;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Jun 20 12:49:27 2007 -0600
@@ -1070,8 +1070,6 @@ static void vmx_init_hypercall_page(stru
     char *p;
     int i;
 
-    memset(hypercall_page, 0, PAGE_SIZE);
-
     for ( i = 0; i < (PAGE_SIZE / 32); i++ )
     {
         p = (char *)(hypercall_page + (i * 32));
@@ -1115,16 +1113,26 @@ static int vmx_nx_enabled(struct vcpu *v
     return v->arch.hvm_vmx.efer & EFER_NX;
 }
 
-static int vmx_interrupts_enabled(struct vcpu *v) 
-{
-    unsigned long eflags = __vmread(GUEST_RFLAGS); 
-    return !irq_masked(eflags); 
-}
-
+static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+    unsigned long intr_shadow, eflags;
+
+    ASSERT(v == current);
+
+    intr_shadow  = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+    intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
+
+    if ( type == hvm_intack_nmi )
+        return !intr_shadow;
+
+    ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+    eflags = __vmread(GUEST_RFLAGS);
+    return !irq_masked(eflags) && !intr_shadow;
+}
 
 static void vmx_update_host_cr3(struct vcpu *v)
 {
-    ASSERT( (v == current) || !vcpu_runnable(v) );
+    ASSERT((v == current) || !vcpu_runnable(v));
     vmx_vmcs_enter(v);
     __vmwrite(HOST_CR3, v->arch.cr3);
     vmx_vmcs_exit(v);
@@ -1132,12 +1140,18 @@ static void vmx_update_host_cr3(struct v
 
 static void vmx_update_guest_cr3(struct vcpu *v)
 {
-    ASSERT( (v == current) || !vcpu_runnable(v) );
+    ASSERT((v == current) || !vcpu_runnable(v));
     vmx_vmcs_enter(v);
     __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
     vmx_vmcs_exit(v);
 }
 
+static void vmx_flush_guest_tlbs(void)
+{
+    /* No tagged TLB support on VMX yet.  The fact that we're in Xen 
+     * at all means any guest will have a clean TLB when it's next run,
+     * because VMRESUME will flush it for us. */
+}
 
 static void vmx_inject_exception(
     unsigned int trapnr, int errcode, unsigned long cr2)
@@ -1205,6 +1219,7 @@ static struct hvm_function_table vmx_fun
     .get_segment_register = vmx_get_segment_register,
     .update_host_cr3      = vmx_update_host_cr3,
     .update_guest_cr3     = vmx_update_guest_cr3,
+    .flush_guest_tlbs     = vmx_flush_guest_tlbs,
     .update_vtpr          = vmx_update_vtpr,
     .stts                 = vmx_stts,
     .set_tsc_offset       = vmx_set_tsc_offset,
@@ -1837,7 +1852,7 @@ static void vmx_io_instruction(unsigned 
 
     /* Copy current guest state into io instruction state structure. */
     memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
-    hvm_store_cpu_guest_regs(current, regs, NULL);
+    vmx_store_cpu_guest_regs(current, regs, NULL);
 
     HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
                 "exit_qualification = %lx",
@@ -2549,7 +2564,8 @@ static inline int vmx_do_msr_read(struct
 
     HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
 
-    switch (ecx) {
+    switch ( ecx )
+    {
     case MSR_IA32_TIME_STAMP_COUNTER:
         msr_content = hvm_get_guest_time(v);
         break;
@@ -2565,6 +2581,8 @@ static inline int vmx_do_msr_read(struct
     case MSR_IA32_APICBASE:
         msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
         break;
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+        goto gp_fault;
     default:
         if ( long_mode_do_msr_read(regs) )
             goto done;
@@ -2576,8 +2594,8 @@ static inline int vmx_do_msr_read(struct
             regs->edx = edx;
             goto done;
         }
-        vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
-        return 0;
+
+        goto gp_fault;
     }
 
     regs->eax = msr_content & 0xFFFFFFFF;
@@ -2589,6 +2607,10 @@ done:
                 ecx, (unsigned long)regs->eax,
                 (unsigned long)regs->edx);
     return 1;
+
+gp_fault:
+    vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+    return 0;
 }
 
 static int vmx_alloc_vlapic_mapping(struct domain *d)
@@ -2667,7 +2689,8 @@ static inline int vmx_do_msr_write(struc
     msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
     HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);
 
-    switch (ecx) {
+    switch ( ecx )
+    {
     case MSR_IA32_TIME_STAMP_COUNTER:
         hvm_set_guest_time(v, msr_content);
         pt_reset(v);
@@ -2684,6 +2707,8 @@ static inline int vmx_do_msr_write(struc
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
         break;
+    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+        goto gp_fault;
     default:
         if ( !long_mode_do_msr_write(regs) )
             wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2691,6 +2716,10 @@ static inline int vmx_do_msr_write(struc
     }
 
     return 1;
+
+gp_fault:
+    vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+    return 0;
 }
 
 static void vmx_do_hlt(void)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpic.c
--- a/xen/arch/x86/hvm/vpic.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpic.c   Wed Jun 20 12:49:27 2007 -0600
@@ -499,7 +499,7 @@ void vpic_irq_negative_edge(struct domai
         vpic_update_int_output(vpic);
 }
 
-int cpu_get_pic_interrupt(struct vcpu *v, int *type)
+int cpu_get_pic_interrupt(struct vcpu *v)
 {
     int irq, vector;
     struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
@@ -512,6 +512,5 @@ int cpu_get_pic_interrupt(struct vcpu *v
         return -1;
 
     vector = vpic[irq >> 3].irq_base + (irq & 7);
-    *type = APIC_DM_EXTINT;
     return vector;
 }
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpt.c    Wed Jun 20 12:49:27 2007 -0600
@@ -155,7 +155,8 @@ void pt_update_irq(struct vcpu *v)
     }
 }
 
-static struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type)
+static struct periodic_time *is_pt_irq(
+    struct vcpu *v, int vector, enum hvm_intack src)
 {
     struct list_head *head = &v->arch.hvm_vcpu.tm_list;
     struct periodic_time *pt;
@@ -174,7 +175,7 @@ static struct periodic_time *is_pt_irq(s
             return pt;
         }
 
-        vec = get_isa_irq_vector(v, pt->irq, type);
+        vec = get_isa_irq_vector(v, pt->irq, src);
 
         /* RTC irq need special care */
         if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) )
@@ -186,7 +187,7 @@ static struct periodic_time *is_pt_irq(s
     return NULL;
 }
 
-void pt_intr_post(struct vcpu *v, int vector, int type)
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src)
 {
     struct periodic_time *pt;
     time_cb *cb;
@@ -194,7 +195,7 @@ void pt_intr_post(struct vcpu *v, int ve
 
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
-    pt = is_pt_irq(v, vector, type);
+    pt = is_pt_irq(v, vector, src);
     if ( pt == NULL )
     {
         spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -227,13 +228,10 @@ void pt_reset(struct vcpu *v)
 
     list_for_each_entry ( pt, head, list )
     {
-        if ( pt->enabled )
-        {
-            pt->pending_intr_nr = 0;
-            pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
-            pt->scheduled = NOW() + pt->period;
-            set_timer(&pt->timer, pt->scheduled);
-        }
+        pt->pending_intr_nr = 0;
+        pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
+        pt->scheduled = NOW() + pt->period;
+        set_timer(&pt->timer, pt->scheduled);
     }
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -247,10 +245,7 @@ void pt_migrate(struct vcpu *v)
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
     list_for_each_entry ( pt, head, list )
-    {
-        if ( pt->enabled )
-            migrate_timer(&pt->timer, v->processor);
-    }
+        migrate_timer(&pt->timer, v->processor);
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
 }
@@ -263,8 +258,9 @@ void create_periodic_time(
 
     spin_lock(&v->arch.hvm_vcpu.tm_lock);
 
-    init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
     pt->enabled = 1;
+    pt->pending_intr_nr = 0;
+
     if ( period < 900000 ) /* < 0.9 ms */
     {
         gdprintk(XENLOG_WARNING,
@@ -283,6 +279,8 @@ void create_periodic_time(
     pt->priv = data;
 
     list_add(&pt->list, &v->arch.hvm_vcpu.tm_list);
+
+    init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
     set_timer(&pt->timer, pt->scheduled);
 
     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -295,8 +293,12 @@ void destroy_periodic_time(struct period
 
     pt_lock(pt);
     pt->enabled = 0;
-    pt->pending_intr_nr = 0;
     list_del(&pt->list);
+    pt_unlock(pt);
+
+    /*
+     * pt_timer_fn() can run until this kill_timer() returns. We must do this
+     * outside pt_lock() otherwise we can deadlock with pt_timer_fn().
+     */
     kill_timer(&pt->timer);
-    pt_unlock(pt);
-}
+}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/mm.c Wed Jun 20 12:49:27 2007 -0600
@@ -2942,7 +2942,7 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong) 
     if ( entries > FIRST_RESERVED_GDT_ENTRY )
         return -EINVAL;
     
-    if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
+    if ( copy_from_guest(frames, frame_list, nr_pages) )
         return -EFAULT;
 
     LOCK_BIGLOCK(current->domain);
@@ -3123,7 +3123,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
         else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
             return -ESRCH;
 
-        rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
+        rc = copy_from_guest(d->arch.e820, fmap.map.buffer,
                              fmap.map.nr_entries) ? -EFAULT : 0;
         d->arch.nr_e820 = fmap.map.nr_entries;
 
@@ -3144,7 +3144,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
             return -EFAULT;
 
         map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
-        if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
+        if ( copy_to_guest(map.buffer, d->arch.e820, map.nr_entries) ||
              copy_to_guest(arg, &map, 1) )
             return -EFAULT;
 
@@ -3168,7 +3168,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
         buffer = guest_handle_cast(memmap.buffer, e820entry_t);
 
         count = min((unsigned int)e820.nr_map, memmap.nr_entries);
-        if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
+        if ( copy_to_guest(buffer, e820.map, count) < 0 )
             return -EFAULT;
 
         memmap.nr_entries = count;
@@ -3181,7 +3181,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
     case XENMEM_machphys_mapping:
     {
-        struct xen_machphys_mapping mapping = {
+        static const struct xen_machphys_mapping mapping = {
             .v_start = MACH2PHYS_VIRT_START,
             .v_end   = MACH2PHYS_VIRT_END,
             .max_mfn = MACH2PHYS_NR_ENTRIES - 1
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/setup.c      Wed Jun 20 12:49:27 2007 -0600
@@ -295,14 +295,14 @@ static struct e820map __initdata boot_e8
 /* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
 static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
 {
-    unsigned long rs, re;
+    uint64_t rs, re;
     int i;
 
     for ( i = 0; i < boot_e820.nr_map; i++ )
     {
         /* Have we found the e820 region that includes the specified range? */
         rs = boot_e820.map[i].addr;
-        re = boot_e820.map[i].addr + boot_e820.map[i].size;
+        re = rs + boot_e820.map[i].size;
         if ( (s < rs) || (e > re) )
             continue;
 
@@ -402,7 +402,7 @@ void init_done(void)
     startup_cpu_idle_loop();
 }
 
-void __init __start_xen(multiboot_info_t *mbi)
+void __init __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
     char __cmdline[] = "", *cmdline = __cmdline;
@@ -410,6 +410,7 @@ void __init __start_xen(multiboot_info_t
     unsigned int initrdidx = 1;
     char *_policy_start = NULL;
     unsigned long _policy_len = 0;
+    multiboot_info_t *mbi = __va(mbi_p);
     module_t *mod = (module_t *)__va(mbi->mods_addr);
     unsigned long nr_pages, modules_length;
     int i, e820_warn = 0, bytes = 0;
@@ -678,6 +679,9 @@ void __init __start_xen(multiboot_info_t
             barrier();
             move_memory(e, 0, __pa(&_end) - xen_phys_start);
 
+            /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
+            memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
+
             /* Walk initial pagetables, relocating page directory entries. */
             pl4e = __va(__pa(idle_pg_table));
             for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/traps.c      Wed Jun 20 12:49:27 2007 -0600
@@ -462,7 +462,17 @@ int rdmsr_hypervisor_regs(
     if ( idx > 0 )
         return 0;
 
-    *eax = *edx = 0;
+    switch ( idx )
+    {
+    case 0:
+    {
+        *eax = *edx = 0;
+        break;
+    }
+    default:
+        BUG();
+    }
+
     return 1;
 }
 
@@ -1130,7 +1140,7 @@ static inline int guest_io_okay(
          * read as 0xff (no access allowed).
          */
         TOGGLE_MODE();
-        switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp,
+        switch ( __copy_from_guest_offset(x.bytes, v->arch.iobmp,
                                           port>>3, 2) )
         {
         default: x.bytes[0] = ~0;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c       Wed Jun 20 12:49:27 2007 -0600
@@ -513,6 +513,7 @@ static void hypercall_page_initialise_ri
 
 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
 {
+    memset(hypercall_page, 0xCC, PAGE_SIZE);
     if ( is_hvm_domain(d) )
         hvm_hypercall_page_initialise(d, hypercall_page);
     else if ( supervisor_mode_kernel )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/compat_kexec.S
--- a/xen/arch/x86/x86_64/compat_kexec.S        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/compat_kexec.S        Wed Jun 20 12:49:27 2007 -0600
@@ -1,5 +1,11 @@
 /*
  * Compatibility kexec handler.
+ */
+
+/*
+ * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is
+ * currently true but if it ever changes then compat_pg_table will
+ * need to be moved back below 4G at run time.
  */
 
 #include <xen/config.h>
@@ -8,7 +14,20 @@
 #include <asm/msr.h>
 #include <asm/page.h>
 
-#define SYM_PHYS(sym)       ((sym) - __XEN_VIRT_START)
+/* The unrelocated physical address of a symbol. */
+#define SYM_PHYS(sym)          ((sym) - __XEN_VIRT_START)
+
+/* Load physical address of symbol into register and relocate it. */
+#define RELOCATE_SYM(sym,reg)  mov $SYM_PHYS(sym), reg ; \
+                               add xen_phys_start(%rip), reg
+
+/*
+ * Relocate a physical address in memory. Size of temporary register
+ * determines size of the value to relocate.
+ */
+#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \
+                               add xen_phys_start(%rip), reg ; \
+                               mov reg, addr(%rip)
 
         .text
 
@@ -31,20 +50,35 @@ 1:      dec %r9
         test %r9,%r9
         jnz 1b
 
-        mov $SYM_PHYS(compat_page_list),%rdx
+        RELOCATE_SYM(compat_page_list,%rdx)
+
+        /* Relocate compatibility mode entry point address. */
+        RELOCATE_MEM(compatibility_mode_far,%eax)
+
+        /* Relocate compat_pg_table. */
+        RELOCATE_MEM(compat_pg_table,     %rax)
+        RELOCATE_MEM(compat_pg_table+0x8, %rax)
+        RELOCATE_MEM(compat_pg_table+0x10,%rax)
+        RELOCATE_MEM(compat_pg_table+0x18,%rax)
 
         /*
          * Setup an identity mapped region in PML4[0] of idle page
          * table.
          */
-        lea l3_identmap(%rip),%rax
-        sub %rbx,%rax
+        RELOCATE_SYM(l3_identmap,%rax)
         or  $0x63,%rax
         mov %rax, idle_pg_table(%rip)
 
         /* Switch to idle page table. */
-        movq $SYM_PHYS(idle_pg_table), %rax
+        RELOCATE_SYM(idle_pg_table,%rax)
         movq %rax, %cr3
+
+        /* Switch to identity mapped compatibility stack. */
+        RELOCATE_SYM(compat_stack,%rax)
+        movq %rax, %rsp
+
+        /* Save xen_phys_start for 32 bit code. */
+        movq xen_phys_start(%rip), %rbx
 
         /* Jump to low identity mapping in compatibility mode. */
         ljmp *compatibility_mode_far(%rip)
@@ -54,7 +88,26 @@ compatibility_mode_far:
         .long SYM_PHYS(compatibility_mode)
         .long __HYPERVISOR_CS32
 
+        /*
+         * We use 5 words of stack for the arguments passed to the kernel. The
+         * kernel only uses 1 word before switching to its own stack. Allocate
+         * 16 words to give "plenty" of room.
+         */
+        .fill 16,4,0
+compat_stack:
+
         .code32
+
+#undef RELOCATE_SYM
+#undef RELOCATE_MEM
+
+/*
+ * Load physical address of symbol into register and relocate it. %rbx
+ * contains xen_phys_start(%rip) saved before jump to compatibility
+ * mode.
+ */
+#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
+                              add %ebx, reg
 
 compatibility_mode:
         /* Setup some sane segments. */
@@ -78,7 +131,7 @@ compatibility_mode:
         movl %eax, %cr0
 
         /* Switch to 32 bit page table. */
-        movl  $SYM_PHYS(compat_pg_table), %eax
+        RELOCATE_SYM(compat_pg_table, %eax)
         movl  %eax, %cr3
 
         /* Clear MSR_EFER[LME], disabling long mode */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c       Wed Jun 20 12:49:27 2007 -0600
@@ -510,6 +510,7 @@ static void hypercall_page_initialise_ri
 
 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
 {
+    memset(hypercall_page, 0xCC, PAGE_SIZE);
     if ( is_hvm_domain(d) )
         hvm_hypercall_page_initialise(d, hypercall_page);
     else if ( !is_pv_32bit_domain(d) )
diff -r c20bc60f9243 -r 810885428743 xen/common/compat/memory.c
--- a/xen/common/compat/memory.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/compat/memory.c        Wed Jun 20 12:49:27 2007 -0600
@@ -258,7 +258,8 @@ int compat_memory_op(unsigned int cmd, X
                     compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent];
 
                     BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]);
-                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start, 
start_extent, &pfn, 1) )
+                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start,
+                                                 start_extent, &pfn, 1) )
                     {
                         if ( split >= 0 )
                         {
@@ -275,6 +276,10 @@ int compat_memory_op(unsigned int cmd, X
                         break;
                     }
                 }
+
+                /* Bail if there was an error. */
+                if ( (split >= 0) && (end_extent != nat.rsrv->nr_extents) )
+                    split = 0;
             }
             else
                 start_extent = end_extent;
diff -r c20bc60f9243 -r 810885428743 xen/common/domctl.c
--- a/xen/common/domctl.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/domctl.c       Wed Jun 20 12:49:27 2007 -0600
@@ -43,7 +43,7 @@ void cpumask_to_xenctl_cpumap(
 
     bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS);
 
-    copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes);
+    copy_to_guest(xenctl_cpumap->bitmap, bytemap, copy_bytes);
 
     for ( i = copy_bytes; i < guest_bytes; i++ )
         copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1);
@@ -63,7 +63,7 @@ void xenctl_cpumap_to_cpumask(
     if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
         return;
 
-    copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes);
+    copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes);
 
     bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS);
 }
diff -r c20bc60f9243 -r 810885428743 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/grant_table.c  Wed Jun 20 12:49:27 2007 -0600
@@ -148,7 +148,7 @@ get_maptrack_handle(
                 return -1;
             }
 
-            memset(new_mt, 0, PAGE_SIZE);
+            clear_page(new_mt);
 
             new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
 
@@ -624,7 +624,7 @@ gnttab_grow_table(struct domain *d, unsi
     {
         if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
             goto active_alloc_failed;
-        memset(gt->active[i], 0, PAGE_SIZE);
+        clear_page(gt->active[i]);
     }
 
     /* Shared */
@@ -632,7 +632,7 @@ gnttab_grow_table(struct domain *d, unsi
     {
         if ( (gt->shared[i] = alloc_xenheap_page()) == NULL )
             goto shared_alloc_failed;
-        memset(gt->shared[i], 0, PAGE_SIZE);
+        clear_page(gt->shared[i]);
     }
 
     /* Share the new shared frames with the recipient domain */
@@ -1365,7 +1365,7 @@ grant_table_create(
     {
         if ( (t->active[i] = alloc_xenheap_page()) == NULL )
             goto no_mem_2;
-        memset(t->active[i], 0, PAGE_SIZE);
+        clear_page(t->active[i]);
     }
 
     /* Tracking of mapped foreign frames table */
@@ -1375,7 +1375,7 @@ grant_table_create(
     memset(t->maptrack, 0, max_nr_maptrack_frames() * sizeof(t->maptrack[0]));
     if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
         goto no_mem_3;
-    memset(t->maptrack[0], 0, PAGE_SIZE);
+    clear_page(t->maptrack[0]);
     t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping);
     for ( i = 0; i < t->maptrack_limit; i++ )
         t->maptrack[0][i].ref = i+1;
@@ -1389,7 +1389,7 @@ grant_table_create(
     {
         if ( (t->shared[i] = alloc_xenheap_page()) == NULL )
             goto no_mem_4;
-        memset(t->shared[i], 0, PAGE_SIZE);
+        clear_page(t->shared[i]);
     }
 
     for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
diff -r c20bc60f9243 -r 810885428743 xen/common/kernel.c
--- a/xen/common/kernel.c       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kernel.c       Wed Jun 20 12:49:27 2007 -0600
@@ -142,7 +142,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
     {
         xen_extraversion_t extraversion;
         safe_strcpy(extraversion, xen_extra_version());
-        if ( copy_to_guest(arg, (char *)extraversion, sizeof(extraversion)) )
+        if ( copy_to_guest(arg, extraversion, ARRAY_SIZE(extraversion)) )
             return -EFAULT;
         return 0;
     }
@@ -167,7 +167,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
         memset(info, 0, sizeof(info));
         arch_get_xen_caps(&info);
 
-        if ( copy_to_guest(arg, (char *)info, sizeof(info)) )
+        if ( copy_to_guest(arg, info, ARRAY_SIZE(info)) )
             return -EFAULT;
         return 0;
     }
@@ -187,7 +187,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
     {
         xen_changeset_info_t chgset;
         safe_strcpy(chgset, xen_changeset());
-        if ( copy_to_guest(arg, (char *)chgset, sizeof(chgset)) )
+        if ( copy_to_guest(arg, chgset, ARRAY_SIZE(chgset)) )
             return -EFAULT;
         return 0;
     }
@@ -229,8 +229,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
 
     case XENVER_guest_handle:
     {
-        if ( copy_to_guest(arg, (char *)current->domain->handle,
-                           sizeof(current->domain->handle)) )
+        if ( copy_to_guest(arg, current->domain->handle,
+                           ARRAY_SIZE(current->domain->handle)) )
             return -EFAULT;
         return 0;
     }    
diff -r c20bc60f9243 -r 810885428743 xen/common/kexec.c
--- a/xen/common/kexec.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kexec.c        Wed Jun 20 12:49:27 2007 -0600
@@ -169,7 +169,11 @@ static int kexec_get(reserve)(xen_kexec_
 
 static int kexec_get(xen)(xen_kexec_range_t *range)
 {
+#ifdef CONFIG_X86_64
+    range->start = xenheap_phys_start;
+#else
     range->start = virt_to_maddr(_start);
+#endif
     range->size = (unsigned long)xenheap_phys_end - (unsigned 
long)range->start;
     return 0;
 }
diff -r c20bc60f9243 -r 810885428743 xen/common/perfc.c
--- a/xen/common/perfc.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/perfc.c        Wed Jun 20 12:49:27 2007 -0600
@@ -227,7 +227,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
     }
     BUG_ON(v != perfc_nbr_vals);
 
-    if ( copy_to_guest(desc, (xen_sysctl_perfc_desc_t *)perfc_d, NR_PERFCTRS) )
+    if ( copy_to_guest(desc, perfc_d, NR_PERFCTRS) )
         return -EFAULT;
     if ( copy_to_guest(val, perfc_vals, perfc_nbr_vals) )
         return -EFAULT;
diff -r c20bc60f9243 -r 810885428743 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/char/console.c        Wed Jun 20 12:49:27 2007 -0600
@@ -326,7 +326,7 @@ static long guest_console_write(XEN_GUES
                 CONSOLEIO_write, count, buffer);
 
         kcount = min_t(int, count, sizeof(kbuf)-1);
-        if ( copy_from_guest((char *)kbuf, buffer, kcount) )
+        if ( copy_from_guest(kbuf, buffer, kcount) )
             return -EFAULT;
         kbuf[kcount] = '\0';
 
diff -r c20bc60f9243 -r 810885428743 xen/drivers/video/vga.c
--- a/xen/drivers/video/vga.c   Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/video/vga.c   Wed Jun 20 12:49:27 2007 -0600
@@ -32,6 +32,9 @@ static unsigned char *video;
  * 
  *   'vga=ask':
  *      display a vga menu of available modes
+ * 
+ *   'vga=current':
+ *      use the current vga mode without modification
  * 
  *   'vga=text-80x<rows>':
  *      text mode, where <rows> is one of {25,28,30,34,43,50,60}
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-ia64/guest_access.h
--- a/xen/include/asm-ia64/guest_access.h       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-ia64/guest_access.h       Wed Jun 20 12:49:27 2007 -0600
@@ -76,28 +76,31 @@ extern int xencomm_handle_is_null(void *
     __copy_field_from_guest(ptr, hnd, field)
 
 #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                    \
-    const typeof(ptr) _d = (hnd).p;                                     \
-    const typeof(ptr) _s = (ptr);                                       \
+    const typeof(*(ptr)) *_s = (ptr);                                   \
+    void *_d = (hnd).p;                                                 \
+    ((void)((hnd).p == (ptr)));                                         \
     xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
 })
 
 #define __copy_field_to_guest(hnd, ptr, field) ({                   \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(ptr) _d = (hnd).p;                                 \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
     const typeof(&(ptr)->field) _s = &(ptr)->field;                 \
+    void *_d = (hnd).p;                                             \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
     xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off);               \
 })
 
-#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({                     \
-    const typeof(ptr) _s = (hnd).p;                                        \
-    const typeof(ptr) _d = (ptr);                                          \
-    xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx));  \
+#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({                    \
+    const typeof(*(ptr)) *_s = (hnd).p;                                   \
+    typeof(*(ptr)) *_d = (ptr);                                           \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
 })
 
 #define __copy_field_from_guest(ptr, hnd, field) ({                 \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(ptr) _s = (hnd).p;                                 \
-    const typeof(&(ptr)->field) _d = &(ptr)->field;                 \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
+    const void *_s = (hnd).p;                                       \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                       \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
     xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off);             \
 })
 
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h       Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/event.h       Wed Jun 20 12:49:27 2007 -0600
@@ -10,7 +10,6 @@
 #define __ASM_EVENT_H__
 
 #include <xen/shared.h>
-#include <asm/hvm/irq.h> /* cpu_has_pending_irq() */
 
 static inline void vcpu_kick(struct vcpu *v)
 {
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/guest_access.h
--- a/xen/include/asm-x86/guest_access.h        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/guest_access.h        Wed Jun 20 12:49:27 2007 -0600
@@ -32,11 +32,12 @@
  * specifying an offset into the guest array.
  */
 #define copy_to_guest_offset(hnd, off, ptr, nr) ({      \
-    typeof(ptr) _x = (hnd).p;                           \
-    const typeof(ptr) _y = (ptr);                       \
+    const typeof(*(ptr)) *_s = (ptr);                   \
+    char (*_d)[sizeof(*_s)] = (void *)(hnd).p;          \
+    ((void)((hnd).p == (ptr)));                         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
-    copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));       \
+    copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) :  \
+    copy_to_user(_d+(off), _s, sizeof(*_s)*(nr));       \
 })
 
 /*
@@ -44,29 +45,30 @@
  * specifying an offset into the guest array.
  */
 #define copy_from_guest_offset(ptr, hnd, off, nr) ({    \
-    const typeof(ptr) _x = (hnd).p;                     \
-    typeof(ptr) _y = (ptr);                             \
+    const typeof(*(ptr)) *_s = (hnd).p;                 \
+    typeof(*(ptr)) *_d = (ptr);                         \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\
-    copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));     \
+    copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+    copy_from_user(_d, _s+(off), sizeof(*_d)*(nr));     \
 })
 
 /* Copy sub-field of a structure to guest context via a guest handle. */
 #define copy_field_to_guest(hnd, ptr, field) ({         \
-    typeof(&(ptr)->field) _x = &(hnd).p->field;         \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;     \
+    void *_d = &(hnd).p->field;                         \
+    ((void)(&(hnd).p->field == &(ptr)->field));         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
-    copy_to_user(_x, _y, sizeof(*_x));                  \
+    copy_to_user_hvm(_d, _s, sizeof(*_s)) :             \
+    copy_to_user(_d, _s, sizeof(*_s));                  \
 })
 
 /* Copy sub-field of a structure from guest context via a guest handle. */
 #define copy_field_from_guest(ptr, hnd, field) ({       \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    typeof(&(ptr)->field) _y = &(ptr)->field;           \
+    const typeof(&(ptr)->field) _s = &(hnd).p->field;   \
+    typeof(&(ptr)->field) _d = &(ptr)->field;           \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x, sizeof(*_x)) :           \
-    copy_from_user(_y, _x, sizeof(*_x));                \
+    copy_from_user_hvm(_d, _s, sizeof(*_d)) :           \
+    copy_from_user(_d, _s, sizeof(*_d));                \
 })
 
 /*
@@ -78,35 +80,37 @@
      array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
 
 #define __copy_to_guest_offset(hnd, off, ptr, nr) ({    \
-    typeof(ptr) _x = (hnd).p;                           \
-    const typeof(ptr) _y = (ptr);                       \
+    const typeof(*(ptr)) *_s = (ptr);                   \
+    char (*_d)[sizeof(*_s)] = (void *)(hnd).p;          \
+    ((void)((hnd).p == (ptr)));                         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) :  \
-    __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));     \
+    copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) :  \
+    __copy_to_user(_d+(off), _s, sizeof(*_s)*(nr));     \
 })
 
 #define __copy_from_guest_offset(ptr, hnd, off, nr) ({  \
-    const typeof(ptr) _x = (hnd).p;                     \
-    typeof(ptr) _y = (ptr);                             \
+    const typeof(*(ptr)) *_s = (hnd).p;                 \
+    typeof(*(ptr)) *_d = (ptr);                         \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \
-    __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));   \
+    copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+    __copy_from_user(_d, _s+(off), sizeof(*_d)*(nr));   \
 })
 
 #define __copy_field_to_guest(hnd, ptr, field) ({       \
-    typeof(&(ptr)->field) _x = &(hnd).p->field;         \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;     \
+    void *_d = &(hnd).p->field;                         \
+    ((void)(&(hnd).p->field == &(ptr)->field));         \
     is_hvm_vcpu(current) ?                              \
-    copy_to_user_hvm(_x, _y, sizeof(*_x)) :             \
-    __copy_to_user(_x, _y, sizeof(*_x));                \
+    copy_to_user_hvm(_d, _s, sizeof(*_s)) :             \
+    __copy_to_user(_d, _s, sizeof(*_s));                \
 })
 
 #define __copy_field_from_guest(ptr, hnd, field) ({     \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    typeof(&(ptr)->field) _y = &(ptr)->field;           \
+    const typeof(&(ptr)->field) _s = &(hnd).p->field;   \
+    typeof(&(ptr)->field) _d = &(ptr)->field;           \
     is_hvm_vcpu(current) ?                              \
-    copy_from_user_hvm(_y, _x, sizeof(*_x)) :           \
-    __copy_from_user(_y, _x, sizeof(*_x));              \
+    copy_from_user_hvm(_d, _s, sizeof(*_d)) :           \
+    __copy_from_user(_d, _s, sizeof(*_d));              \
 })
 
 #endif /* __ASM_X86_GUEST_ACCESS_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h     Wed Jun 20 12:49:27 2007 -0600
@@ -55,6 +55,14 @@ typedef struct segment_register {
     u64        base;
 } __attribute__ ((packed)) segment_register_t;
 
+/* Interrupt acknowledgement sources. */
+enum hvm_intack {
+    hvm_intack_none,
+    hvm_intack_pic,
+    hvm_intack_lapic,
+    hvm_intack_nmi
+};
+
 /*
  * The hardware virtual machine (HVM) interface abstracts away from the
  * x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -106,7 +114,7 @@ struct hvm_function_table {
     int (*long_mode_enabled)(struct vcpu *v);
     int (*pae_enabled)(struct vcpu *v);
     int (*nx_enabled)(struct vcpu *v);
-    int (*interrupts_enabled)(struct vcpu *v);
+    int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
     int (*guest_x86_mode)(struct vcpu *v);
     unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
     unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
@@ -124,6 +132,13 @@ struct hvm_function_table {
     void (*update_guest_cr3)(struct vcpu *v);
 
     /*
+     * Called to ensure than all guest-specific mappings in a tagged TLB
+     * are flushed; does *not* flush Xen's TLB entries, and on
+     * processors without a tagged TLB it will be a noop.
+     */
+    void (*flush_guest_tlbs)(void);
+
+    /*
      * Reflect the virtual APIC's value in the guest's V_TPR register
      */
     void (*update_vtpr)(struct vcpu *v, unsigned long value);
@@ -148,6 +163,7 @@ struct hvm_function_table {
 };
 
 extern struct hvm_function_table hvm_funcs;
+extern int hvm_enabled;
 
 int hvm_domain_initialise(struct domain *d);
 void hvm_domain_relinquish_resources(struct domain *d);
@@ -191,16 +207,16 @@ hvm_long_mode_enabled(struct vcpu *v)
 #define hvm_long_mode_enabled(v) (v,0)
 #endif
 
- static inline int
+static inline int
 hvm_pae_enabled(struct vcpu *v)
 {
     return hvm_funcs.pae_enabled(v);
 }
 
 static inline int
-hvm_interrupts_enabled(struct vcpu *v)
-{
-    return hvm_funcs.interrupts_enabled(v);
+hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+    return hvm_funcs.interrupts_enabled(v, type);
 }
 
 static inline int
@@ -230,6 +246,13 @@ hvm_update_vtpr(struct vcpu *v, unsigned
 }
 
 void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3);
+
+static inline void 
+hvm_flush_guest_tlbs(void)
+{
+    if ( hvm_enabled )
+        hvm_funcs.flush_guest_tlbs();
+}
 
 void hvm_hypercall_page_initialise(struct domain *d,
                                    void *hypercall_page);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/irq.h     Wed Jun 20 12:49:27 2007 -0600
@@ -24,10 +24,10 @@
 
 #include <xen/types.h>
 #include <xen/spinlock.h>
+#include <asm/hvm/hvm.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vioapic.h>
 #include <public/hvm/save.h>
-
 
 struct hvm_irq {
     /*
@@ -58,7 +58,6 @@ struct hvm_irq {
             HVMIRQ_callback_gsi,
             HVMIRQ_callback_pci_intx
         } callback_via_type;
-        uint32_t pad; /* So the next field will be aligned */
     };
     union {
         uint32_t gsi;
@@ -115,9 +114,12 @@ void hvm_set_callback_irq_level(void);
 void hvm_set_callback_irq_level(void);
 void hvm_set_callback_via(struct domain *d, uint64_t via);
 
-int cpu_get_interrupt(struct vcpu *v, int *type);
-int cpu_has_pending_irq(struct vcpu *v);
-int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type);
+/* Check/Acknowledge next pending interrupt. */
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
+int hvm_vcpu_ack_pending_irq(
+    struct vcpu *v, enum hvm_intack type, int *vector);
+
+int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src);
 int is_isa_irq_masked(struct vcpu *v, int isa_irq);
 
 #endif /* __ASM_X86_HVM_IRQ_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:49:27 2007 -0600
@@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
 /* End of save/restore */
 
 extern char hvm_io_bitmap[];
-extern int hvm_enabled;
 
 void hvm_enable(struct hvm_function_table *);
 void hvm_disable(void);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/svm/asid.h
--- a/xen/include/asm-x86/hvm/svm/asid.h        Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/asid.h        Wed Jun 20 12:49:27 2007 -0600
@@ -30,6 +30,7 @@ void svm_asid_init(struct cpuinfo_x86 *c
 void svm_asid_init(struct cpuinfo_x86 *c);
 void svm_asid_init_vcpu(struct vcpu *v);
 void svm_asid_inv_asid(struct vcpu *v);
+void svm_asid_inc_generation(void);
 
 /*
  * ASID related, guest triggered events.
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Jun 20 12:49:27 2007 -0600
@@ -30,11 +30,13 @@
 
 struct hvm_vcpu {
     unsigned long       hw_cr3;     /* value we give to HW to use */
-    unsigned long       ioflags;
     struct hvm_io_op    io_op;
     struct vlapic       vlapic;
     s64                 cache_tsc_offset;
     u64                 guest_time;
+
+    /* Is an NMI pending for delivery to this VCPU core? */
+    bool_t              nmi_pending; /* NB. integrate flag with save/restore */
 
     /* Lock and list for virtual platform timers. */
     spinlock_t          tm_lock;
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h  Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vlapic.h  Wed Jun 20 12:49:27 2007 -0600
@@ -76,7 +76,7 @@ int vlapic_find_highest_irr(struct vlapi
 int vlapic_find_highest_irr(struct vlapic *vlapic);
 
 int vlapic_has_interrupt(struct vcpu *v);
-int cpu_get_apic_interrupt(struct vcpu *v, int *mode);
+int cpu_get_apic_interrupt(struct vcpu *v);
 
 int  vlapic_init(struct vcpu *v);
 void vlapic_destroy(struct vcpu *v);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:49:27 2007 -0600
@@ -336,9 +336,16 @@ static inline void vmx_inject_sw_excepti
                            instruction_len);
 }
 
-static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code)
-{
-    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0);
+static inline void vmx_inject_extint(struct vcpu *v, int trap)
+{
+    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
+                           VMX_DELIVER_NO_ERROR_CODE, 0);
+}
+
+static inline void vmx_inject_nmi(struct vcpu *v)
+{
+    __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
+                           VMX_DELIVER_NO_ERROR_CODE, 0);
 }
 
 #endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h    Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpic.h    Wed Jun 20 12:49:27 2007 -0600
@@ -32,7 +32,7 @@ void vpic_irq_positive_edge(struct domai
 void vpic_irq_positive_edge(struct domain *d, int irq);
 void vpic_irq_negative_edge(struct domain *d, int irq);
 void vpic_init(struct domain *d);
-int cpu_get_pic_interrupt(struct vcpu *v, int *type);
+int cpu_get_pic_interrupt(struct vcpu *v);
 int is_periodic_irq(struct vcpu *v, int irq, int type);
 
 #endif  /* __ASM_X86_HVM_VPIC_H__ */  
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h     Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpt.h     Wed Jun 20 12:49:27 2007 -0600
@@ -29,6 +29,7 @@
 #include <xen/timer.h>
 #include <xen/list.h>
 #include <asm/hvm/vpic.h>
+#include <asm/hvm/irq.h>
 #include <public/hvm/save.h>
 
 struct HPETState;
@@ -119,7 +120,7 @@ void pt_freeze_time(struct vcpu *v);
 void pt_freeze_time(struct vcpu *v);
 void pt_thaw_time(struct vcpu *v);
 void pt_update_irq(struct vcpu *v);
-void pt_intr_post(struct vcpu *v, int vector, int type);
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src);
 void pt_reset(struct vcpu *v);
 void pt_migrate(struct vcpu *v);
 void create_periodic_time(
diff -r c20bc60f9243 -r 810885428743 xen/include/xen/compat.h
--- a/xen/include/xen/compat.h  Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/compat.h  Wed Jun 20 12:49:27 2007 -0600
@@ -44,9 +44,10 @@
  * specifying an offset into the guest array.
  */
 #define copy_to_compat_offset(hnd, off, ptr, nr) ({                  \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(*(ptr)) *const _y = (ptr);                          \
-    copy_to_user(_x + (off), _y, sizeof(*_x) * (nr));                \
+    const typeof(*(ptr)) *_s = (ptr);                                \
+    char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c;           \
+    ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr)));     \
+    copy_to_user(_d + (off), _s, sizeof(*_s) * (nr));                \
 })
 
 /*
@@ -54,9 +55,9 @@
  * specifying an offset into the guest array.
  */
 #define copy_from_compat_offset(ptr, hnd, off, nr) ({                \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(ptr) _y = (ptr);                                    \
-    copy_from_user(_y, _x + (off), sizeof(*_x) * (nr));              \
+    const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+    typeof(*(ptr)) *_d = (ptr);                                      \
+    copy_from_user(_d, _s + (off), sizeof(*_d) * (nr));              \
 })
 
 #define copy_to_compat(hnd, ptr, nr)                                 \
@@ -67,16 +68,19 @@
 
 /* Copy sub-field of a structure to guest context via a compat handle. */
 #define copy_field_to_compat(hnd, ptr, field) ({                     \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    const typeof((ptr)->field) *const _y = &(ptr)->field;            \
-    copy_to_user(_x, _y, sizeof(*_x));                               \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;                  \
+    void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;   \
+    ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field ==    \
+            &(ptr)->field));                                         \
+    copy_to_user(_d, _s, sizeof(*_s));                               \
 })
 
 /* Copy sub-field of a structure from guest context via a compat handle. */
 #define copy_field_from_compat(ptr, hnd, field) ({                   \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    typeof((ptr)->field) *const _y = &(ptr)->field;                  \
-    copy_from_user(_y, _x, sizeof(*_x));                             \
+    const typeof(&(ptr)->field) _s =                                 \
+        &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;          \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                        \
+    copy_from_user(_d, _s, sizeof(*_d));                             \
 })
 
 /*
@@ -84,18 +88,20 @@
  * Allows use of faster __copy_* functions.
  */
 #define compat_handle_okay(hnd, nr)                                  \
-    compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), 
sizeof(**(hnd)._))
+    compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr),        \
+                           sizeof(**(hnd)._))
 
 #define __copy_to_compat_offset(hnd, off, ptr, nr) ({                \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(*(ptr)) *const _y = (ptr);                          \
-    __copy_to_user(_x + (off), _y, sizeof(*_x) * (nr));              \
+    const typeof(*(ptr)) *_s = (ptr);                                \
+    char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c;           \
+    ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr)));     \
+    __copy_to_user(_d + (off), _s, sizeof(*_s) * (nr));              \
 })
 
 #define __copy_from_compat_offset(ptr, hnd, off, nr) ({              \
-    const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
-    const typeof(ptr) _y = (ptr);                                    \
-    __copy_from_user(_y, _x + (off), sizeof(*_x) * (nr));            \
+    const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+    typeof(*(ptr)) *_d = (ptr);                                      \
+    __copy_from_user(_d, _s + (off), sizeof(*_d) * (nr));            \
 })
 
 #define __copy_to_compat(hnd, ptr, nr)                               \
@@ -105,15 +111,18 @@
     __copy_from_compat_offset(ptr, hnd, 0, nr)
 
 #define __copy_field_to_compat(hnd, ptr, field) ({                   \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    const typeof((ptr)->field) *const _y = &(ptr)->field;            \
-    __copy_to_user(_x, _y, sizeof(*_x));                             \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;                  \
+    void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;   \
+    ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field ==    \
+            &(ptr)->field));                                         \
+    __copy_to_user(_d, _s, sizeof(*_s));                             \
 })
 
 #define __copy_field_from_compat(ptr, hnd, field) ({                 \
-    typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) 
*)(full_ptr_t)(hnd).c)->field; \
-    typeof((ptr)->field) *const _y = &(ptr)->field;                  \
-    __copy_from_user(_y, _x, sizeof(*_x));                           \
+    const typeof(&(ptr)->field) _s =                                 \
+        &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field;          \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                        \
+    __copy_from_user(_d, _s, sizeof(*_d));                           \
 })
 
 
@@ -169,7 +178,8 @@ int switch_compat(struct domain *);
 int switch_compat(struct domain *);
 int switch_native(struct domain *);
 
-#define BITS_PER_GUEST_LONG(d) (!IS_COMPAT(d) ? BITS_PER_LONG : 
COMPAT_BITS_PER_LONG)
+#define BITS_PER_GUEST_LONG(d) \
+    (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)
 
 #else
 
diff -r c20bc60f9243 -r 810885428743 xen/include/xen/xencomm.h
--- a/xen/include/xen/xencomm.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/xencomm.h Wed Jun 20 12:49:27 2007 -0600
@@ -47,17 +47,17 @@ static inline unsigned long xencomm_inli
     ((hnd).p == NULL || xencomm_handle_is_null((hnd).p))
 
 /* Offset the given guest handle into the array it refers to. */
-#define guest_handle_add_offset(hnd, nr) ({         \
-    const typeof((hnd).p) _ptr;                     \
-    xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr));   \
+#define guest_handle_add_offset(hnd, nr) ({                             \
+    const typeof((hnd).p) _ptr;                                         \
+    xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr));        \
 })
 
 /* Cast a guest handle to the specified type of handle. */
 #define guest_handle_cast(hnd, type) ({         \
     type *_x = (hnd).p;                         \
-    XEN_GUEST_HANDLE(type) _y; \
-    set_xen_guest_handle(_y, _x); \
-    _y; \
+    XEN_GUEST_HANDLE(type) _y;                  \
+    set_xen_guest_handle(_y, _x);               \
+    _y;                                         \
 })
 
 /* Since we run in real mode, we can safely access all addresses. That also
@@ -87,29 +87,32 @@ static inline unsigned long xencomm_inli
     __copy_field_from_guest(ptr, hnd, field)
 
 #define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
+    const typeof(*(ptr)) *_s = (ptr);                               \
+    void *_d = (hnd).p;                                             \
+    ((void)((hnd).p == (ptr)));                                     \
+    xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
 })
 
 #define __copy_field_to_guest(hnd, ptr, field) ({                   \
-    const int _off = offsetof(typeof(*ptr), field);                  \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;                 \
+    void *_d = (hnd).p;                                             \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
+    xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off);               \
 })
 
 #define __copy_from_guest_offset(ptr, hnd, idx, nr) ({              \
-    const typeof(ptr) _x = (hnd).p;                                 \
-    const typeof(ptr) _y = (ptr);                                   \
-    xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx));  \
+    const typeof(*(ptr)) *_s = (hnd).p;                             \
+    typeof(*(ptr)) *_d = (ptr);                                     \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
 })
 
 #define __copy_field_from_guest(ptr, hnd, field) ({                 \
-    const int _off = offsetof(typeof(*ptr), field);                 \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;               \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;                 \
-    xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \
+    unsigned int _off = offsetof(typeof(*(hnd).p), field);          \
+    const void *_s = (hnd).p;                                       \
+    typeof(&(ptr)->field) _d = &(ptr)->field;                       \
+    ((void)(&(hnd).p->field == &(ptr)->field));                     \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off);             \
 })
 
 #endif /* __XENCOMM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>