WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86 hvm: Replace old MMIO emulator with x

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86 hvm: Replace old MMIO emulator with x86_emulate()-based harness.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 21 Feb 2008 07:10:49 -0800
Delivery-date: Fri, 22 Feb 2008 07:47:37 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1203518205 0
# Node ID 3f1cf03826fe642434197f898c3aac55dc81ad25
# Parent  f853c049709546b4f1fa1b4b03ddff165c163d38
x86 hvm: Replace old MMIO emulator with x86_emulate()-based harness.
Re-factor VMX real-mode emulation to use the same harness.
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 xen/arch/x86/hvm/instrlen.c          |  419 ------------
 tools/ioemu/target-i386-dm/helper2.c |  107 ---
 xen/arch/x86/hvm/Makefile            |    2 
 xen/arch/x86/hvm/emulate.c           |  755 +++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c               |   50 -
 xen/arch/x86/hvm/intercept.c         |   62 -
 xen/arch/x86/hvm/io.c                |  723 ----------------------
 xen/arch/x86/hvm/platform.c          | 1136 -----------------------------------
 xen/arch/x86/hvm/stdvga.c            |   34 -
 xen/arch/x86/hvm/svm/svm.c           |  477 ++++++++------
 xen/arch/x86/hvm/vmx/realmode.c      |  773 ++---------------------
 xen/arch/x86/hvm/vmx/vmx.c           |   58 +
 xen/arch/x86/mm/shadow/multi.c       |    6 
 xen/include/asm-x86/hvm/emulate.h    |   55 +
 xen/include/asm-x86/hvm/hvm.h        |   12 
 xen/include/asm-x86/hvm/io.h         |   18 
 xen/include/asm-x86/hvm/support.h    |    1 
 xen/include/asm-x86/hvm/vcpu.h       |    9 
 xen/include/asm-x86/hvm/vmx/vmcs.h   |    5 
 xen/include/asm-x86/hvm/vmx/vmx.h    |    8 
 xen/include/public/hvm/ioreq.h       |    6 
 21 files changed, 1314 insertions(+), 3402 deletions(-)

diff -r f853c0497095 -r 3f1cf03826fe tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Tue Feb 19 11:14:40 2008 -0700
+++ b/tools/ioemu/target-i386-dm/helper2.c      Wed Feb 20 14:36:45 2008 +0000
@@ -379,82 +379,7 @@ void cpu_ioreq_move(CPUState *env, ioreq
     }
 }
 
-void cpu_ioreq_and(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 & (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_add(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 + (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_sub(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 - (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_or(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 | (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void cpu_ioreq_xor(CPUState *env, ioreq_t *req)
-{
-    target_ulong tmp1, tmp2;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    if (req->dir == IOREQ_WRITE) {
-        tmp2 = tmp1 ^ (target_ulong) req->data;
-        write_physical(req->addr, req->size, &tmp2);
-    }
-    req->data = tmp1;
-}
-
-void timeoffset_get()
+void timeoffset_get(void)
 {
     char *p;
 
@@ -481,18 +406,6 @@ void cpu_ioreq_timeoffset(CPUState *env,
     fprintf(logfile, "Time offset set %ld, added offset %ld\n", time_offset, 
req->data);
     sprintf(b, "%ld", time_offset);
     xenstore_vm_write(domid, "rtc/timeoffset", b);
-}
-
-void cpu_ioreq_xchg(CPUState *env, ioreq_t *req)
-{
-    unsigned long tmp1;
-
-    if (req->data_is_ptr != 0)
-        hw_error("expected scalar value");
-
-    read_physical(req->addr, req->size, &tmp1);
-    write_physical(req->addr, req->size, &req->data);
-    req->data = tmp1;
 }
 
 void __handle_ioreq(CPUState *env, ioreq_t *req)
@@ -507,24 +420,6 @@ void __handle_ioreq(CPUState *env, ioreq
         break;
     case IOREQ_TYPE_COPY:
         cpu_ioreq_move(env, req);
-        break;
-    case IOREQ_TYPE_AND:
-        cpu_ioreq_and(env, req);
-        break;
-    case IOREQ_TYPE_ADD:
-        cpu_ioreq_add(env, req);
-        break;
-    case IOREQ_TYPE_SUB:
-        cpu_ioreq_sub(env, req);
-        break;
-    case IOREQ_TYPE_OR:
-        cpu_ioreq_or(env, req);
-        break;
-    case IOREQ_TYPE_XOR:
-        cpu_ioreq_xor(env, req);
-        break;
-    case IOREQ_TYPE_XCHG:
-        cpu_ioreq_xchg(env, req);
         break;
     case IOREQ_TYPE_TIMEOFFSET:
         cpu_ioreq_timeoffset(env, req);
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/Makefile
--- a/xen/arch/x86/hvm/Makefile Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/Makefile Wed Feb 20 14:36:45 2008 +0000
@@ -1,9 +1,9 @@ subdir-y += svm
 subdir-y += svm
 subdir-y += vmx
 
+obj-y += emulate.o
 obj-y += hvm.o
 obj-y += i8254.o
-obj-y += instrlen.o
 obj-y += intercept.o
 obj-y += io.o
 obj-y += iommu.o
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/emulate.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/emulate.c        Wed Feb 20 14:36:45 2008 +0000
@@ -0,0 +1,755 @@
+/******************************************************************************
+ * hvm/emulate.c
+ * 
+ * HVM instruction emulation. Used for MMIO and VMX real mode.
+ * 
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/paging.h>
+#include <asm/event.h>
+#include <asm/hvm/emulate.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+
+/*
+ * Convert addr from linear to physical form, valid over the range
+ * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
+ * the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
+ */
+static int hvmemul_linear_to_phys(
+    unsigned long addr,
+    paddr_t *paddr,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    enum hvm_access_type access_type,
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    struct vcpu *curr = current;
+    unsigned long pfn, npfn, done, todo, i;
+    struct segment_register *sreg;
+    uint32_t pfec;
+
+    /* Clip repetitions to a sensible maximum. */
+    *reps = min_t(unsigned long, *reps, 4096);
+
+    /* With no paging it's easy: linear == physical. */
+    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
+    {
+        *paddr = addr;
+        return X86EMUL_OKAY;
+    }
+
+    *paddr = addr & ~PAGE_MASK;
+
+    /* Gather access-type information for the page walks. */
+    sreg = hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
+    pfec = PFEC_page_present;
+    if ( sreg->attr.fields.dpl == 3 )
+        pfec |= PFEC_user_mode;
+    if ( access_type == hvm_access_write )
+        pfec |= PFEC_write_access;
+
+    /* Get the first PFN in the range. */
+    if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
+    {
+        hvm_inject_exception(TRAP_page_fault, pfec, addr);
+        return X86EMUL_EXCEPTION;
+    }
+
+    /* If the range does not straddle a page boundary then we're done. */
+    done = PAGE_SIZE - (addr & ~PAGE_MASK);
+    todo = *reps * bytes_per_rep;
+    if ( done >= todo )
+        goto done;
+
+    addr += done;
+    for ( i = 1; done < todo; i++ )
+    {
+        /* Get the next PFN in the range. */
+        if ( (npfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
+        {
+            hvm_inject_exception(TRAP_page_fault, pfec, addr);
+            return X86EMUL_EXCEPTION;
+        }
+
+        /* Is it contiguous with the preceding PFNs? If not then we're done. */
+        if ( npfn != (pfn + i) )
+        {
+            done /= bytes_per_rep;
+            if ( done == 0 )
+                return X86EMUL_UNHANDLEABLE;
+            *reps = done;
+            break;
+        }
+
+        addr += PAGE_SIZE;
+        done += PAGE_SIZE;
+    }
+
+ done:
+    *paddr |= (paddr_t)pfn << PAGE_SHIFT;
+    return X86EMUL_OKAY;
+}
+    
+
+static int hvmemul_virtual_to_linear(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned int bytes,
+    enum hvm_access_type access_type,
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    unsigned long *paddr)
+{
+    struct segment_register *reg;
+    int okay;
+
+    if ( seg == x86_seg_none )
+    {
+        *paddr = offset;
+        return X86EMUL_OKAY;
+    }
+
+    reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+    okay = hvm_virtual_to_linear_addr(
+        seg, reg, offset, bytes, access_type,
+        hvmemul_ctxt->ctxt.addr_size, paddr);
+
+    if ( !okay )
+    {
+        hvmemul_ctxt->flags.exn_pending = 1;
+        hvmemul_ctxt->exn_vector = TRAP_gp_fault;
+        hvmemul_ctxt->exn_insn_len = 0;
+        return X86EMUL_EXCEPTION;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int __hvmemul_read(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long *val,
+    unsigned int bytes,
+    enum hvm_access_type access_type,
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    unsigned long addr;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, access_type, hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    *val = 0;
+
+    rc = ((access_type == hvm_access_insn_fetch) ?
+          hvm_fetch_from_guest_virt(val, addr, bytes) :
+          hvm_copy_from_guest_virt(val, addr, bytes));
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
+        return X86EMUL_EXCEPTION;
+
+    if ( rc == HVMCOPY_bad_gfn_to_mfn )
+    {
+        struct vcpu *curr = current;
+        unsigned long reps = 1;
+        paddr_t gpa;
+
+        if ( access_type == hvm_access_insn_fetch )
+            return X86EMUL_UNHANDLEABLE;
+
+        rc = hvmemul_linear_to_phys(
+            addr, &gpa, bytes, &reps, access_type, hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+
+        if ( curr->arch.hvm_vcpu.io_in_progress )
+            return X86EMUL_UNHANDLEABLE;
+
+        if ( !curr->arch.hvm_vcpu.io_completed )
+        {
+            curr->arch.hvm_vcpu.io_in_progress = 1;
+            send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes,
+                          0, IOREQ_READ, 0, 0);
+        }
+
+        if ( !curr->arch.hvm_vcpu.io_completed )
+            return X86EMUL_RETRY;
+
+        *val = curr->arch.hvm_vcpu.io_data;
+        curr->arch.hvm_vcpu.io_completed = 0;
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long *val,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    return __hvmemul_read(
+        seg, offset, val, bytes, hvm_access_read,
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
+}
+
+static int hvmemul_insn_fetch(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long *val,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    unsigned int insn_off = offset - hvmemul_ctxt->insn_buf_eip;
+
+    /* Fall back if requested bytes are not in the prefetch cache. */
+    if ( unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) )
+        return __hvmemul_read(
+            seg, offset, val, bytes,
+            hvm_access_insn_fetch, hvmemul_ctxt);
+
+    /* Hit the cache. Simple memcpy. */
+    *val = 0;
+    memcpy(val, &hvmemul_ctxt->insn_buf[insn_off], bytes);
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long val,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    unsigned long addr;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, hvm_access_write, hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvm_copy_to_guest_virt(addr, &val, bytes);
+    if ( rc == HVMCOPY_bad_gva_to_gfn )
+        return X86EMUL_EXCEPTION;
+
+    if ( rc == HVMCOPY_bad_gfn_to_mfn )
+    {
+        struct vcpu *curr = current;
+        unsigned long reps = 1;
+        paddr_t gpa;
+
+        rc = hvmemul_linear_to_phys(
+            addr, &gpa, bytes, &reps, hvm_access_write, hvmemul_ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+
+        if ( curr->arch.hvm_vcpu.io_in_progress )
+            return X86EMUL_UNHANDLEABLE;
+
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, bytes,
+                      val, IOREQ_WRITE, 0, 0);
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_cmpxchg(
+    enum x86_segment seg,
+    unsigned long offset,
+    unsigned long old,
+    unsigned long new,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    /* Fix this in case the guest is really relying on r-m-w atomicity. */
+    return hvmemul_write(seg, offset, new, bytes, ctxt);
+}
+
+static int hvmemul_rep_ins(
+    uint16_t src_port,
+    enum x86_segment dst_seg,
+    unsigned long dst_offset,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *curr = current;
+    unsigned long addr;
+    paddr_t gpa;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        dst_seg, dst_offset, *reps * bytes_per_rep, hvm_access_write,
+        hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        addr, &gpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+    {
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_pio_req(src_port, *reps, bytes_per_rep,
+                     gpa, IOREQ_READ,
+                     !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+    }
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+        return X86EMUL_RETRY;
+
+    curr->arch.hvm_vcpu.io_completed = 0;
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_outs(
+    enum x86_segment src_seg,
+    unsigned long src_offset,
+    uint16_t dst_port,
+    unsigned int bytes_per_rep,
+    unsigned long *reps,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *curr = current;
+    unsigned long addr;
+    paddr_t gpa;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        src_seg, src_offset, *reps * bytes_per_rep, hvm_access_read,
+        hvmemul_ctxt, &addr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        addr, &gpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    curr->arch.hvm_vcpu.io_in_progress = 1;
+    send_pio_req(dst_port, *reps, bytes_per_rep,
+                 gpa, IOREQ_WRITE,
+                 !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_rep_movs(
+   enum x86_segment src_seg,
+   unsigned long src_offset,
+   enum x86_segment dst_seg,
+   unsigned long dst_offset,
+   unsigned int bytes_per_rep,
+   unsigned long *reps,
+   struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct vcpu *curr = current;
+    unsigned long saddr, daddr;
+    paddr_t sgpa, dgpa;
+    p2m_type_t p2mt;
+    int rc;
+
+    rc = hvmemul_virtual_to_linear(
+        src_seg, src_offset, *reps * bytes_per_rep, hvm_access_read,
+        hvmemul_ctxt, &saddr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_virtual_to_linear(
+        dst_seg, dst_offset, *reps * bytes_per_rep, hvm_access_write,
+        hvmemul_ctxt, &daddr);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        saddr, &sgpa, bytes_per_rep, reps, hvm_access_read, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    rc = hvmemul_linear_to_phys(
+        daddr, &dgpa, bytes_per_rep, reps, hvm_access_write, hvmemul_ctxt);
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt);
+    if ( !p2m_is_ram(p2mt) )
+    {
+        if ( !curr->arch.hvm_vcpu.io_completed )
+        {
+            curr->arch.hvm_vcpu.io_in_progress = 1;
+            send_mmio_req(IOREQ_TYPE_COPY, sgpa, *reps, bytes_per_rep,
+                      dgpa, IOREQ_READ,
+                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+        }
+
+        if ( !curr->arch.hvm_vcpu.io_completed )
+            return X86EMUL_RETRY;
+
+        curr->arch.hvm_vcpu.io_completed = 0;
+    }
+    else
+    {
+        (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt);
+        if ( p2m_is_ram(p2mt) )
+            return X86EMUL_UNHANDLEABLE;
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_mmio_req(IOREQ_TYPE_COPY, dgpa, *reps, bytes_per_rep,
+                      sgpa, IOREQ_WRITE,
+                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_segment(
+    enum x86_segment seg,
+    struct segment_register *reg,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+    memcpy(reg, sreg, sizeof(struct segment_register));
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_segment(
+    enum x86_segment seg,
+    struct segment_register *reg,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+
+    if ( seg == x86_seg_ss )
+        hvmemul_ctxt->flags.mov_ss = 1;
+
+    memcpy(sreg, reg, sizeof(struct segment_register));
+    __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty);
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_io(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+    {
+        curr->arch.hvm_vcpu.io_in_progress = 1;
+        send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0);
+    }
+
+    if ( !curr->arch.hvm_vcpu.io_completed )
+        return X86EMUL_RETRY;
+
+    *val = curr->arch.hvm_vcpu.io_data;
+    curr->arch.hvm_vcpu.io_completed = 0;
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_io(
+    unsigned int port,
+    unsigned int bytes,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct vcpu *curr = current;
+
+    if ( port == 0xe9 )
+    {
+        hvm_print_line(curr, val);
+        return X86EMUL_OKAY;
+    }
+
+    if ( curr->arch.hvm_vcpu.io_in_progress )
+        return X86EMUL_UNHANDLEABLE;
+
+    curr->arch.hvm_vcpu.io_in_progress = 1;
+    send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0);
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_read_cr(
+    unsigned int reg,
+    unsigned long *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+    case 2:
+    case 3:
+    case 4:
+        *val = current->arch.hvm_vcpu.guest_cr[reg];
+        return X86EMUL_OKAY;
+    default:
+        break;
+    }
+
+    return X86EMUL_UNHANDLEABLE;
+}
+
+static int hvmemul_write_cr(
+    unsigned int reg,
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    switch ( reg )
+    {
+    case 0:
+        return hvm_set_cr0(val);
+    case 2:
+        current->arch.hvm_vcpu.guest_cr[2] = val;
+        return X86EMUL_OKAY;
+    case 3:
+        return hvm_set_cr3(val);
+    case 4:
+        return hvm_set_cr4(val);
+    default:
+        break;
+    }
+
+    return X86EMUL_UNHANDLEABLE;
+}
+
+static int hvmemul_read_msr(
+    unsigned long reg,
+    uint64_t *val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct cpu_user_regs _regs;
+    int rc;
+
+    _regs.ecx = (uint32_t)reg;
+
+    if ( (rc = hvm_funcs.msr_read_intercept(&_regs)) != 0 )
+        return rc;
+
+    *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_write_msr(
+    unsigned long reg,
+    uint64_t val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct cpu_user_regs _regs;
+
+    _regs.edx = (uint32_t)(val >> 32);
+    _regs.eax = (uint32_t)val;
+    _regs.ecx = (uint32_t)reg;
+
+    return hvm_funcs.msr_write_intercept(&_regs);
+}
+
+static int hvmemul_write_rflags(
+    unsigned long val,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    if ( (val & X86_EFLAGS_IF) && !(ctxt->regs->eflags & X86_EFLAGS_IF) )
+        hvmemul_ctxt->flags.sti = 1;
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_wbinvd(
+    struct x86_emulate_ctxt *ctxt)
+{
+    hvm_funcs.wbinvd_intercept();
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_cpuid(
+    unsigned int *eax,
+    unsigned int *ebx,
+    unsigned int *ecx,
+    unsigned int *edx,
+    struct x86_emulate_ctxt *ctxt)
+{
+    hvm_funcs.cpuid_intercept(eax, ebx, ecx, edx);
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_hlt(
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    hvmemul_ctxt->flags.hlt = 1;
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_inject_hw_exception(
+    uint8_t vector,
+    uint16_t error_code,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    if ( error_code != 0 )
+        return X86EMUL_UNHANDLEABLE;
+
+    hvmemul_ctxt->flags.exn_pending = 1;
+    hvmemul_ctxt->exn_vector = vector;
+    hvmemul_ctxt->exn_insn_len = 0;
+
+    return X86EMUL_OKAY;
+}
+
+static int hvmemul_inject_sw_interrupt(
+    uint8_t vector,
+    uint8_t insn_len,
+    struct x86_emulate_ctxt *ctxt)
+{
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    hvmemul_ctxt->flags.exn_pending = 1;
+    hvmemul_ctxt->exn_vector = vector;
+    hvmemul_ctxt->exn_insn_len = insn_len;
+
+    return X86EMUL_OKAY;
+}
+
+static void hvmemul_load_fpu_ctxt(
+    struct x86_emulate_ctxt *ctxt)
+{
+    if ( !current->fpu_dirtied )
+        hvm_funcs.fpu_dirty_intercept();
+}
+
+static struct x86_emulate_ops hvm_emulate_ops = {
+    .read          = hvmemul_read,
+    .insn_fetch    = hvmemul_insn_fetch,
+    .write         = hvmemul_write,
+    .cmpxchg       = hvmemul_cmpxchg,
+    .rep_ins       = hvmemul_rep_ins,
+    .rep_outs      = hvmemul_rep_outs,
+    .rep_movs      = hvmemul_rep_movs,
+    .read_segment  = hvmemul_read_segment,
+    .write_segment = hvmemul_write_segment,
+    .read_io       = hvmemul_read_io,
+    .write_io      = hvmemul_write_io,
+    .read_cr       = hvmemul_read_cr,
+    .write_cr      = hvmemul_write_cr,
+    .read_msr      = hvmemul_read_msr,
+    .write_msr     = hvmemul_write_msr,
+    .write_rflags  = hvmemul_write_rflags,
+    .wbinvd        = hvmemul_wbinvd,
+    .cpuid         = hvmemul_cpuid,
+    .hlt           = hvmemul_hlt,
+    .inject_hw_exception = hvmemul_inject_hw_exception,
+    .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
+    .load_fpu_ctxt = hvmemul_load_fpu_ctxt
+};
+
+int hvm_emulate_one(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
+    unsigned long addr;
+
+    hvmemul_ctxt->ctxt.addr_size =
+        hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16;
+    hvmemul_ctxt->ctxt.sp_size =
+        hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16;
+
+    hvmemul_ctxt->insn_buf_eip = regs->eip;
+    hvmemul_ctxt->insn_buf_bytes =
+        (hvm_virtual_to_linear_addr(
+            x86_seg_cs, &hvmemul_ctxt->seg_reg[x86_seg_cs],
+            regs->eip, sizeof(hvmemul_ctxt->insn_buf),
+            hvm_access_insn_fetch, hvmemul_ctxt->ctxt.addr_size, &addr) &&
+         !hvm_fetch_from_guest_virt_nofault(
+             hvmemul_ctxt->insn_buf, addr, sizeof(hvmemul_ctxt->insn_buf)))
+        ? sizeof(hvmemul_ctxt->insn_buf) : 0;
+
+    hvmemul_ctxt->flag_word = 0;
+
+    return x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
+}
+
+void hvm_emulate_prepare(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    struct cpu_user_regs *regs)
+{
+    hvmemul_ctxt->ctxt.regs = regs;
+    hvmemul_ctxt->seg_reg_accessed = 0;
+    hvmemul_ctxt->seg_reg_dirty = 0;
+    hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
+    hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
+}
+
+void hvm_emulate_writeback(
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    enum x86_segment seg;
+
+    seg = find_first_bit(&hvmemul_ctxt->seg_reg_dirty,
+                         ARRAY_SIZE(hvmemul_ctxt->seg_reg));
+
+    while ( seg < ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
+    {
+        hvm_set_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
+        seg = find_next_bit(&hvmemul_ctxt->seg_reg_dirty,
+                            ARRAY_SIZE(hvmemul_ctxt->seg_reg),
+                            seg+1);
+    }
+}
+
+struct segment_register *hvmemul_get_seg_reg(
+    enum x86_segment seg,
+    struct hvm_emulate_ctxt *hvmemul_ctxt)
+{
+    if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) )
+        hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
+    return &hvmemul_ctxt->seg_reg[seg];
+}
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/hvm.c    Wed Feb 20 14:36:45 2008 +0000
@@ -729,7 +729,7 @@ int hvm_set_efer(uint64_t value)
         gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
                  "EFER: %"PRIx64"\n", value);
         hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
+        return X86EMUL_EXCEPTION;
     }
 
     if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) &&
@@ -738,14 +738,14 @@ int hvm_set_efer(uint64_t value)
         gdprintk(XENLOG_WARNING,
                  "Trying to change EFER.LME with paging enabled\n");
         hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
+        return X86EMUL_EXCEPTION;
     }
 
     value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;
     v->arch.hvm_vcpu.guest_efer = value;
     hvm_update_guest_efer(v);
 
-    return 1;
+    return X86EMUL_OKAY;
 }
 
 extern void shadow_blow_tables_per_domain(struct domain *d);
@@ -787,8 +787,7 @@ int hvm_set_cr0(unsigned long value)
         HVM_DBG_LOG(DBG_LEVEL_1,
                     "Guest attempts to set upper 32 bits in CR0: %lx",
                     value);
-        hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
+        goto gpf;
     }
 
     value &= ~HVM_CR0_GUEST_RESERVED_BITS;
@@ -797,10 +796,7 @@ int hvm_set_cr0(unsigned long value)
     value |= X86_CR0_ET;
 
     if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )
-    {
-        hvm_inject_exception(TRAP_gp_fault, 0, 0);
-        return 0;
-    }
+        goto gpf;
 
     if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
     {
@@ -809,8 +805,7 @@ int hvm_set_cr0(unsigned long value)
             if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )
             {
                 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");
-                hvm_inject_exception(TRAP_gp_fault, 0, 0);
-                return 0;
+                goto gpf;
             }
             HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");
             v->arch.hvm_vcpu.guest_efer |= EFER_LMA;
@@ -828,7 +823,7 @@ int hvm_set_cr0(unsigned long value)
                 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",
                          v->arch.hvm_vcpu.guest_cr[3], mfn);
                 domain_crash(v->domain);
-                return 0;
+                return X86EMUL_UNHANDLEABLE;
             }
 
             /* Now arch.guest_table points to machine physical. */
@@ -895,7 +890,11 @@ int hvm_set_cr0(unsigned long value)
     if ( (value ^ old_value) & X86_CR0_PG )
         paging_update_paging_modes(v);
 
-    return 1;
+    return X86EMUL_OKAY;
+
+ gpf:
+    hvm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
 }
 
 int hvm_set_cr3(unsigned long value)
@@ -922,12 +921,12 @@ int hvm_set_cr3(unsigned long value)
 
     v->arch.hvm_vcpu.guest_cr[3] = value;
     paging_update_cr3(v);
-    return 1;
+    return X86EMUL_OKAY;
 
  bad_cr3:
     gdprintk(XENLOG_ERR, "Invalid CR3\n");
     domain_crash(v->domain);
-    return 0;
+    return X86EMUL_UNHANDLEABLE;
 }
 
 int hvm_set_cr4(unsigned long value)
@@ -958,11 +957,11 @@ int hvm_set_cr4(unsigned long value)
     if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )
         paging_update_paging_modes(v);
 
-    return 1;
+    return X86EMUL_OKAY;
 
  gpf:
     hvm_inject_exception(TRAP_gp_fault, 0, 0);
-    return 0;
+    return X86EMUL_EXCEPTION;
 }
 
 int hvm_virtual_to_linear_addr(
@@ -977,7 +976,15 @@ int hvm_virtual_to_linear_addr(
     unsigned long addr = offset;
     uint32_t last_byte;
 
-    if ( addr_size != 64 )
+    if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
+    {
+        /*
+         * REAL MODE: Don't bother with segment access checks.
+         * Certain of them are not done in native real mode anyway.
+         */
+        addr = (uint32_t)(addr + reg->base);
+    }
+    else if ( addr_size != 64 )
     {
         /*
          * COMPATIBILITY MODE: Apply segment checks and add base.
@@ -1304,7 +1311,7 @@ void hvm_task_switch(
     if ( ptss == NULL )
         goto out;
 
-    if ( !hvm_set_cr3(ptss->cr3) )
+    if ( hvm_set_cr3(ptss->cr3) )
     {
         hvm_unmap(ptss);
         goto out;
@@ -1399,7 +1406,10 @@ static enum hvm_copy_result __hvm_copy(
      * VMREADs on every data access hurts emulation performance.
      * Hence we do not gather extra PFEC flags if CR0.PG == 0.
      */
-    if ( virt && (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
+    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
+        virt = 0;
+
+    if ( virt )
     {
         struct segment_register sreg;
         hvm_get_segment_register(curr, x86_seg_ss, &sreg);
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/instrlen.c
--- a/xen/arch/x86/hvm/instrlen.c       Tue Feb 19 11:14:40 2008 -0700
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,419 +0,0 @@
-/*
- * instrlen.c - calculates the instruction length for all operating modes
- * 
- * Travis Betak, travis.betak@xxxxxxx
- * Copyright (c) 2005,2006 AMD
- * Copyright (c) 2005 Keir Fraser
- *
- * Essentially a very, very stripped version of Keir Fraser's work in
- * x86_emulate.c.  Used for MMIO.
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/mm.h>
-#include <asm-x86/x86_emulate.h>
-
-/* read from guest memory */
-extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip,
-        int length);
-
-/*
- * Opcode effective-address decode tables.
- * Note that we only emulate instructions that have at least one memory
- * operand (excluding implicit stack references). We assume that stack
- * references and instruction fetches will never occur in special memory
- * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
- * not be handled.
- */
-
-/* Operand sizes: 8-bit operands or specified/overridden size. */
-#define ByteOp      (1<<0) /* 8-bit operands. */
-/* Destination operand type. */
-#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
-#define DstReg      (2<<1) /* Register operand. */
-#define DstMem      (3<<1) /* Memory operand. */
-#define DstMask     (3<<1)
-/* Source operand type. */
-#define SrcNone     (0<<3) /* No source operand. */
-#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
-#define SrcReg      (1<<3) /* Register operand. */
-#define SrcMem      (2<<3) /* Memory operand. */
-#define SrcMem16    (3<<3) /* Memory operand (16-bit). */
-#define SrcMem32    (4<<3) /* Memory operand (32-bit). */
-#define SrcImm      (5<<3) /* Immediate operand. */
-#define SrcImmByte  (6<<3) /* 8-bit sign-extended immediate operand. */
-#define SrcMask     (7<<3)
-/* Generic ModRM decode. */
-#define ModRM       (1<<6)
-/* Destination is only written; never read. */
-#define Mov         (1<<7)
-
-static uint8_t opcode_table[256] = {
-    /* 0x00 - 0x07 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x08 - 0x0F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x10 - 0x17 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x18 - 0x1F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x20 - 0x27 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x28 - 0x2F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x30 - 0x37 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x38 - 0x3F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, 0,
-    /* 0x40 - 0x4F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x50 - 0x5F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x60 - 0x6F */
-    0, 0, 0, DstReg|SrcMem32|ModRM|Mov /* movsxd (x86/64) */,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x70 - 0x7F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x80 - 0x87 */
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    /* 0x88 - 0x8F */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
-    0, 0, 0, DstMem|SrcNone|ModRM|Mov,
-    /* 0x90 - 0x9F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xA0 - 0xA7 */
-    ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
-    ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
-    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
-    ByteOp|ImplicitOps, ImplicitOps,
-    /* 0xA8 - 0xAF */
-    0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
-    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
-    ByteOp|ImplicitOps, ImplicitOps,
-    /* 0xB0 - 0xBF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xC0 - 0xC7 */
-    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
-    0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
-    /* 0xC8 - 0xCF */
-    0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xD0 - 0xD7 */
-    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
-    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
-    0, 0, 0, 0,
-    /* 0xD8 - 0xDF */
-    0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xE0 - 0xEF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xF0 - 0xF7 */
-    0, 0, 0, 0,
-    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
-    /* 0xF8 - 0xFF */
-    0, 0, 0, 0,
-    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
-};
-
-static uint8_t twobyte_table[256] = {
-    /* 0x00 - 0x0F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
-    /* 0x10 - 0x1F */
-    0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x20 - 0x2F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x30 - 0x3F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x40 - 0x47 */
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    /* 0x48 - 0x4F */
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
-    /* 0x50 - 0x5F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x60 - 0x6F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x70 - 0x7F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x80 - 0x8F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0x90 - 0x9F */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xA0 - 0xA7 */
-    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, 
-    /* 0xA8 - 0xAF */
-    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
-    /* 0xB0 - 0xB7 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
-    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
-    /* 0xB8 - 0xBF */
-    0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM,
-    0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
-    /* 0xC0 - 0xCF */
-    0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xD0 - 0xDF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xE0 - 0xEF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    /* 0xF0 - 0xFF */
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* 
- * insn_fetch - fetch the next byte from instruction stream
- */
-#define insn_fetch()                                                      \
-({ uint8_t _x;                                                            \
-   if ( length >= 15 )                                                    \
-       return -1;                                                         \
-   if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) {                         \
-       unsigned long err;                                                 \
-       struct segment_register ss;                                        \
-       gdprintk(XENLOG_WARNING,                                           \
-                "Cannot read from address %lx (eip %lx, mode %d)\n",      \
-                pc, org_pc, address_bytes);                               \
-       err = 0; /* Must be not-present: we don't enforce reserved bits */ \
-       if ( hvm_nx_enabled(current) )                                     \
-           err |= PFEC_insn_fetch;                                        \
-       hvm_get_segment_register(current, x86_seg_ss, &ss);                \
-       if ( ss.attr.fields.dpl == 3 )                                     \
-           err |= PFEC_user_mode;                                         \
-       hvm_inject_exception(TRAP_page_fault, err, pc);                    \
-       return -1;                                                         \
-   }                                                                      \
-   if ( buf )                                                             \
-       buf[length] = _x;                                                  \
-   length += 1;                                                           \
-   pc += 1;                                                               \
-   _x;                                                                    \
-})
-
-#define insn_skip(_n) do {                      \
-    int _i;                                     \
-    for ( _i = 0; _i < (_n); _i++) {            \
-        (void) insn_fetch();                    \
-    }                                           \
-} while (0)
-
-/**
- * hvm_instruction_fetch - read the current instruction and return its length
- *
- * @org_pc: guest instruction pointer
- * @address_bytes: guest address width
- * @buf: (optional) buffer to load actual instruction bytes into
- *
- * Doesn't increment the guest's instruction pointer, but may
- * issue faults to the guest.  Returns -1 on failure.
- */
-int hvm_instruction_fetch(unsigned long org_pc, int address_bytes,
-                          unsigned char *buf)
-{
-    uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
-    unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
-    int length = 0;
-    unsigned long pc = org_pc;
-
-    op_bytes = op_default = ad_bytes = ad_default = address_bytes;
-    if ( op_bytes == 8 )
-    {
-        op_bytes = op_default = 4;
-#ifndef __x86_64__
-        return -1;
-#endif
-    }
-
-    /* Legacy prefixes. */
-    for ( ; ; )
-    {
-        switch ( b = insn_fetch() )
-        {
-        case 0x66: /* operand-size override */
-            op_bytes = op_default ^ 6;      /* switch between 2/4 bytes */
-            break;
-        case 0x67: /* address-size override */
-            if ( ad_default == 8 )
-                ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */
-            else
-                ad_bytes = ad_default ^ 6;  /* switch between 2/4 bytes */
-            break;
-        case 0x2e: /* CS override */
-        case 0x3e: /* DS override */
-        case 0x26: /* ES override */
-        case 0x64: /* FS override */
-        case 0x65: /* GS override */
-        case 0x36: /* SS override */
-        case 0xf0: /* LOCK */
-        case 0xf3: /* REP/REPE/REPZ */
-        case 0xf2: /* REPNE/REPNZ */
-            break;
-#ifdef __x86_64__
-        case 0x40 ... 0x4f:
-            if ( ad_default == 8 )
-            {
-                rex_prefix = b;
-                continue;
-            }
-            /* FALLTHRU */
-#endif
-        default:
-            goto done_prefixes;
-        }
-        rex_prefix = 0;
-    }
-done_prefixes:
-
-    /* REX prefix. */
-    if ( rex_prefix & 8 )
-        op_bytes = 8;                   /* REX.W */
-    /* REX.B, REX.R, and REX.X do not need to be decoded. */
-
-    /* Opcode byte(s). */
-    d = opcode_table[b];
-    if ( d == 0 )
-    {
-        /* Two-byte opcode? */
-        if ( b == 0x0f )
-        {
-            twobyte = 1;
-            b = insn_fetch();
-            d = twobyte_table[b];
-        }
-
-        /* Unrecognised? */
-        if ( d == 0 )
-            goto cannot_emulate;
-    }
-
-    /* ModRM and SIB bytes. */
-    if ( d & ModRM )
-    {
-        uint8_t modrm = insn_fetch();
-        uint8_t modrm_mod = (modrm & 0xc0) >> 6;
-        uint8_t modrm_rm  = (modrm & 0x07);
-
-        modrm_reg = (modrm & 0x38) >> 3;
-        if ( modrm_mod == 3 )
-        {
-            gdprintk(XENLOG_WARNING, "Cannot parse ModRM.mod == 3.\n");
-            goto cannot_emulate;
-        }
-
-        if ( ad_bytes == 2 )
-        {
-            /* 16-bit ModR/M decode. */
-            switch ( modrm_mod )
-            {
-            case 0:
-                if ( modrm_rm == 6 ) 
-                    insn_skip(2); /* skip disp16 */
-                break;
-            case 1:
-                insn_skip(1); /* skip disp8 */
-                break;
-            case 2:
-                insn_skip(2); /* skip disp16 */
-                break;
-            }
-        }
-        else
-        {
-            /* 32/64-bit ModR/M decode. */
-            switch ( modrm_mod )
-            {
-            case 0:
-                if ( (modrm_rm == 4) && 
-                     ((insn_fetch() & 7) == 5) )
-                    insn_skip(4); /* skip disp32 specified by SIB.base */
-                else if ( modrm_rm == 5 )
-                    insn_skip(4); /* skip disp32 */
-                break;
-            case 1:
-                if ( modrm_rm == 4 )
-                    insn_skip(1);
-                insn_skip(1); /* skip disp8 */
-                break;
-            case 2:
-                if ( modrm_rm == 4 )
-                    insn_skip(1);
-                insn_skip(4); /* skip disp32 */
-                break;
-            }
-        }
-    }
-
-    /* Decode and fetch the destination operand: register or memory. */
-    switch ( d & DstMask )
-    {
-    case ImplicitOps:
-        /* Special instructions do their own operand decoding. */
-        goto done;
-    }
-
-    /* Decode and fetch the source operand: register, memory or immediate. */
-    switch ( d & SrcMask )
-    {
-    case SrcImm:
-        tmp = (d & ByteOp) ? 1 : op_bytes;
-        if ( tmp == 8 ) tmp = 4;
-        /* NB. Immediates are sign-extended as necessary. */
-        insn_skip(tmp);
-        break;
-    case SrcImmByte:
-        insn_skip(1);
-        break;
-    }
-
-    if ( twobyte )
-        goto done;
-
-    switch ( b )
-    {
-    case 0xa0 ... 0xa3: /* mov */
-        insn_skip(ad_bytes); /* skip src/dst displacement */
-        break;
-    case 0xf6 ... 0xf7: /* Grp3 */
-        switch ( modrm_reg )
-        {
-        case 0 ... 1: /* test */
-            /* Special case in Grp3: test has an immediate source operand. */
-            tmp = (d & ByteOp) ? 1 : op_bytes;
-            if ( tmp == 8 ) tmp = 4;
-            insn_skip(tmp);
-            break;
-        }
-        break;
-    }
-
-done:
-    return length < 16 ? length : -1;
-
-cannot_emulate:
-    gdprintk(XENLOG_WARNING,
-            "Cannot emulate %02x at address %lx (%lx, addr_bytes %d)\n",
-            b, pc - 1, org_pc, address_bytes);
-    return -1;
-}
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/intercept.c
--- a/xen/arch/x86/hvm/intercept.c      Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/intercept.c      Wed Feb 20 14:36:45 2008 +0000
@@ -31,7 +31,6 @@
 #include <xen/event.h>
 #include <asm/iommu.h>
 
-
 extern struct hvm_mmio_handler hpet_mmio_handler;
 extern struct hvm_mmio_handler vlapic_mmio_handler;
 extern struct hvm_mmio_handler vioapic_mmio_handler;
@@ -50,12 +49,11 @@ static inline void hvm_mmio_access(struc
                                    hvm_mmio_read_t read_handler,
                                    hvm_mmio_write_t write_handler)
 {
-    unsigned int tmp1, tmp2;
     unsigned long data;
 
-    switch ( p->type ) {
+    switch ( p->type )
+    {
     case IOREQ_TYPE_COPY:
-    {
         if ( !p->data_is_ptr ) {
             if ( p->dir == IOREQ_READ )
                 p->data = read_handler(v, p->addr, p->size);
@@ -86,62 +84,6 @@ static inline void hvm_mmio_access(struc
                 }
             }
         }
-        break;
-    }
-
-    case IOREQ_TYPE_AND:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 & (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_ADD:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if (p->dir == IOREQ_WRITE) {
-            tmp2 = tmp1 + (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_OR:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 | (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_XOR:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 ^ (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_XCHG:
-        /*
-         * Note that we don't need to be atomic here since VCPU is accessing
-         * its own local APIC.
-         */
-        tmp1 = read_handler(v, p->addr, p->size);
-        write_handler(v, p->addr, p->size, (unsigned long) p->data);
-        p->data = tmp1;
-        break;
-
-    case IOREQ_TYPE_SUB:
-        tmp1 = read_handler(v, p->addr, p->size);
-        if ( p->dir == IOREQ_WRITE ) {
-            tmp2 = tmp1 - (unsigned long) p->data;
-            write_handler(v, p->addr, p->size, tmp2);
-        }
-        p->data = tmp1;
         break;
 
     default:
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c     Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/io.c     Wed Feb 20 14:36:45 2008 +0000
@@ -46,379 +46,8 @@
 #include <xen/iocap.h>
 #include <public/hvm/ioreq.h>
 
-#if defined (__i386__)
-static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
-{
-    switch (size) {
-    case BYTE:
-        switch (index) {
-        case 0:
-            regs->eax &= 0xFFFFFF00;
-            regs->eax |= (value & 0xFF);
-            break;
-        case 1:
-            regs->ecx &= 0xFFFFFF00;
-            regs->ecx |= (value & 0xFF);
-            break;
-        case 2:
-            regs->edx &= 0xFFFFFF00;
-            regs->edx |= (value & 0xFF);
-            break;
-        case 3:
-            regs->ebx &= 0xFFFFFF00;
-            regs->ebx |= (value & 0xFF);
-            break;
-        case 4:
-            regs->eax &= 0xFFFF00FF;
-            regs->eax |= ((value & 0xFF) << 8);
-            break;
-        case 5:
-            regs->ecx &= 0xFFFF00FF;
-            regs->ecx |= ((value & 0xFF) << 8);
-            break;
-        case 6:
-            regs->edx &= 0xFFFF00FF;
-            regs->edx |= ((value & 0xFF) << 8);
-            break;
-        case 7:
-            regs->ebx &= 0xFFFF00FF;
-            regs->ebx |= ((value & 0xFF) << 8);
-            break;
-        default:
-            goto crash;
-        }
-        break;
-    case WORD:
-        switch (index) {
-        case 0:
-            regs->eax &= 0xFFFF0000;
-            regs->eax |= (value & 0xFFFF);
-            break;
-        case 1:
-            regs->ecx &= 0xFFFF0000;
-            regs->ecx |= (value & 0xFFFF);
-            break;
-        case 2:
-            regs->edx &= 0xFFFF0000;
-            regs->edx |= (value & 0xFFFF);
-            break;
-        case 3:
-            regs->ebx &= 0xFFFF0000;
-            regs->ebx |= (value & 0xFFFF);
-            break;
-        case 4:
-            regs->esp &= 0xFFFF0000;
-            regs->esp |= (value & 0xFFFF);
-            break;
-        case 5:
-            regs->ebp &= 0xFFFF0000;
-            regs->ebp |= (value & 0xFFFF);
-            break;
-        case 6:
-            regs->esi &= 0xFFFF0000;
-            regs->esi |= (value & 0xFFFF);
-            break;
-        case 7:
-            regs->edi &= 0xFFFF0000;
-            regs->edi |= (value & 0xFFFF);
-            break;
-        default:
-            goto crash;
-        }
-        break;
-    case LONG:
-        switch (index) {
-        case 0:
-            regs->eax = value;
-            break;
-        case 1:
-            regs->ecx = value;
-            break;
-        case 2:
-            regs->edx = value;
-            break;
-        case 3:
-            regs->ebx = value;
-            break;
-        case 4:
-            regs->esp = value;
-            break;
-        case 5:
-            regs->ebp = value;
-            break;
-        case 6:
-            regs->esi = value;
-            break;
-        case 7:
-            regs->edi = value;
-            break;
-        default:
-            goto crash;
-        }
-        break;
-    default:
-    crash:
-        gdprintk(XENLOG_ERR, "size:%x, index:%x are invalid!\n", size, index);
-        domain_crash_synchronous();
-    }
-}
-#else
-static inline void __set_reg_value(unsigned long *reg, int size, long value)
-{
-    switch (size) {
-    case BYTE_64:
-        *reg &= ~0xFF;
-        *reg |= (value & 0xFF);
-        break;
-    case WORD:
-        *reg &= ~0xFFFF;
-        *reg |= (value & 0xFFFF);
-        break;
-    case LONG:
-        *reg &= ~0xFFFFFFFF;
-        *reg |= (value & 0xFFFFFFFF);
-        break;
-    case QUAD:
-        *reg = value;
-        break;
-    default:
-        gdprintk(XENLOG_ERR, "size:%x is invalid\n", size);
-        domain_crash_synchronous();
-    }
-}
-
-static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
-{
-    if (size == BYTE) {
-        switch (index) {
-        case 0:
-            regs->rax &= ~0xFF;
-            regs->rax |= (value & 0xFF);
-            break;
-        case 1:
-            regs->rcx &= ~0xFF;
-            regs->rcx |= (value & 0xFF);
-            break;
-        case 2:
-            regs->rdx &= ~0xFF;
-            regs->rdx |= (value & 0xFF);
-            break;
-        case 3:
-            regs->rbx &= ~0xFF;
-            regs->rbx |= (value & 0xFF);
-            break;
-        case 4:
-            regs->rax &= 0xFFFFFFFFFFFF00FF;
-            regs->rax |= ((value & 0xFF) << 8);
-            break;
-        case 5:
-            regs->rcx &= 0xFFFFFFFFFFFF00FF;
-            regs->rcx |= ((value & 0xFF) << 8);
-            break;
-        case 6:
-            regs->rdx &= 0xFFFFFFFFFFFF00FF;
-            regs->rdx |= ((value & 0xFF) << 8);
-            break;
-        case 7:
-            regs->rbx &= 0xFFFFFFFFFFFF00FF;
-            regs->rbx |= ((value & 0xFF) << 8);
-            break;
-        default:
-            gdprintk(XENLOG_ERR, "size:%x, index:%x are invalid!\n",
-                     size, index);
-            domain_crash_synchronous();
-            break;
-        }
-        return;
-    }
-
-    switch (index) {
-    case 0:
-        __set_reg_value(&regs->rax, size, value);
-        break;
-    case 1:
-        __set_reg_value(&regs->rcx, size, value);
-        break;
-    case 2:
-        __set_reg_value(&regs->rdx, size, value);
-        break;
-    case 3:
-        __set_reg_value(&regs->rbx, size, value);
-        break;
-    case 4:
-        __set_reg_value(&regs->rsp, size, value);
-        break;
-    case 5:
-        __set_reg_value(&regs->rbp, size, value);
-        break;
-    case 6:
-        __set_reg_value(&regs->rsi, size, value);
-        break;
-    case 7:
-        __set_reg_value(&regs->rdi, size, value);
-        break;
-    case 8:
-        __set_reg_value(&regs->r8, size, value);
-        break;
-    case 9:
-        __set_reg_value(&regs->r9, size, value);
-        break;
-    case 10:
-        __set_reg_value(&regs->r10, size, value);
-        break;
-    case 11:
-        __set_reg_value(&regs->r11, size, value);
-        break;
-    case 12:
-        __set_reg_value(&regs->r12, size, value);
-        break;
-    case 13:
-        __set_reg_value(&regs->r13, size, value);
-        break;
-    case 14:
-        __set_reg_value(&regs->r14, size, value);
-        break;
-    case 15:
-        __set_reg_value(&regs->r15, size, value);
-        break;
-    default:
-        gdprintk(XENLOG_ERR, "Invalid index\n");
-        domain_crash_synchronous();
-    }
-    return;
-}
-#endif
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs);
-
-static inline void set_eflags_CF(int size,
-                                 unsigned int instr,
-                                 unsigned long result,
-                                 unsigned long src,
-                                 unsigned long dst,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask = ~0UL >> (8 * (sizeof(mask) - size));
-
-    if ( instr == INSTR_ADD )
-    {
-        /* CF=1 <==> result is less than the augend and addend) */
-        if ( (result & mask) < (dst & mask) )
-        {
-            ASSERT((result & mask) < (src & mask));
-            regs->eflags |= X86_EFLAGS_CF;
-        }
-    }
-    else
-    {
-        ASSERT( instr == INSTR_CMP || instr == INSTR_SUB );
-        if ( (src & mask) > (dst & mask) )
-            regs->eflags |= X86_EFLAGS_CF;
-    }
-}
-
-static inline void set_eflags_OF(int size,
-                                 unsigned int instr,
-                                 unsigned long result,
-                                 unsigned long src,
-                                 unsigned long dst,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask =  1UL << ((8*size) - 1);
-
-    if ( instr == INSTR_ADD )
-    {
-        if ((src ^ result) & (dst ^ result) & mask);
-            regs->eflags |= X86_EFLAGS_OF;
-    }
-    else
-    {
-        ASSERT(instr == INSTR_CMP || instr == INSTR_SUB);
-        if ((dst ^ src) & (dst ^ result) & mask)
-            regs->eflags |= X86_EFLAGS_OF;
-    }
-}
-
-static inline void set_eflags_AF(int size,
-                                 unsigned long result,
-                                 unsigned long src,
-                                 unsigned long dst,
-                                 struct cpu_user_regs *regs)
-{
-    if ((result ^ src ^ dst) & 0x10)
-        regs->eflags |= X86_EFLAGS_AF;
-}
-
-static inline void set_eflags_ZF(int size, unsigned long result,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask = ~0UL >> (8 * (sizeof(mask) - size));
-
-    if ((result & mask) == 0)
-        regs->eflags |= X86_EFLAGS_ZF;
-}
-
-static inline void set_eflags_SF(int size, unsigned long result,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask;
-
-    if ( size == BYTE_64 )
-        size = BYTE;
-    ASSERT((size <= sizeof(mask)) && (size > 0));
-
-    mask = 1UL << ((8*size) - 1);
-
-    if (result & mask)
-        regs->eflags |= X86_EFLAGS_SF;
-}
-
-static char parity_table[256] = {
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
-};
-
-static inline void set_eflags_PF(int size, unsigned long result,
-                                 struct cpu_user_regs *regs)
-{
-    if (parity_table[result & 0xFF])
-        regs->eflags |= X86_EFLAGS_PF;
-}
-
-static void hvm_pio_assist(struct cpu_user_regs *regs, ioreq_t *p,
-                           struct hvm_io_op *pio_opp)
+static void hvm_pio_assist(
+    struct cpu_user_regs *regs, ioreq_t *p, struct hvm_io_op *pio_opp)
 {
     if ( p->data_is_ptr || (pio_opp->flags & OVERLAP) )
     {
@@ -472,335 +101,6 @@ static void hvm_pio_assist(struct cpu_us
     }
 }
 
-static void hvm_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p,
-                            struct hvm_io_op *mmio_opp)
-{
-    int sign = p->df ? -1 : 1;
-    int size = -1, index = -1;
-    unsigned long value = 0, result = 0;
-    unsigned long src, dst;
-
-    src = mmio_opp->operand[0];
-    dst = mmio_opp->operand[1];
-    size = operand_size(src);
-
-    HVMTRACE_1D(MMIO_ASSIST, current, p->data);
-        
-    switch (mmio_opp->instr) {
-    case INSTR_MOV:
-        if (dst & REGISTER) {
-            index = operand_index(dst);
-            set_reg_value(size, index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_MOVZX:
-        if (dst & REGISTER) {
-            switch (size) {
-            case BYTE:
-                p->data &= 0xFFULL;
-                break;
-
-            case WORD:
-                p->data &= 0xFFFFULL;
-                break;
-
-            case LONG:
-                p->data &= 0xFFFFFFFFULL;
-                break;
-
-            default:
-                printk("Impossible source operand size of movzx instr: %d\n", 
size);
-                domain_crash_synchronous();
-            }
-            index = operand_index(dst);
-            set_reg_value(operand_size(dst), index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_MOVSX:
-        if (dst & REGISTER) {
-            switch (size) {
-            case BYTE:
-                p->data &= 0xFFULL;
-                if ( p->data & 0x80ULL )
-                    p->data |= 0xFFFFFFFFFFFFFF00ULL;
-                break;
-
-            case WORD:
-                p->data &= 0xFFFFULL;
-                if ( p->data & 0x8000ULL )
-                    p->data |= 0xFFFFFFFFFFFF0000ULL;
-                break;
-
-            case LONG:
-                p->data &= 0xFFFFFFFFULL;
-                if ( p->data & 0x80000000ULL )
-                    p->data |= 0xFFFFFFFF00000000ULL;
-                break;
-
-            default:
-                printk("Impossible source operand size of movsx instr: %d\n", 
size);
-                domain_crash_synchronous();
-            }
-            index = operand_index(dst);
-            set_reg_value(operand_size(dst), index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_MOVS:
-        sign = p->df ? -1 : 1;
-
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-
-        if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) {
-            unsigned long addr = mmio_opp->addr;
-
-            if (hvm_paging_enabled(current))
-            {
-                int rv = hvm_copy_to_guest_virt(addr, &p->data, p->size);
-                if ( rv == HVMCOPY_bad_gva_to_gfn )
-                    return; /* exception already injected */
-            }
-            else
-                (void)hvm_copy_to_guest_phys(addr, &p->data, p->size);
-        }
-
-        regs->esi += sign * p->count * p->size;
-        regs->edi += sign * p->count * p->size;
-
-        break;
-
-    case INSTR_STOS:
-        sign = p->df ? -1 : 1;
-        regs->edi += sign * p->count * p->size;
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-        break;
-
-    case INSTR_LODS:
-        set_reg_value(size, 0, 0, regs, p->data);
-        sign = p->df ? -1 : 1;
-        regs->esi += sign * p->count * p->size;
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-        break;
-
-    case INSTR_AND:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data & value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data & value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data & value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_ADD:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data + value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data + value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data + value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The CF, OF, SF, ZF, AF, and PF flags are set according
-         * to the result
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_CF(size, mmio_opp->instr, result, value,
-                      (unsigned long) p->data, regs);
-        set_eflags_OF(size, mmio_opp->instr, result, value,
-                      (unsigned long) p->data, regs);
-        set_eflags_AF(size, result, value, (unsigned long) p->data, regs);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_OR:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data | value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data | value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data | value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_XOR:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data ^ value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data ^ value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data ^ value;
-            set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_CMP:
-    case INSTR_SUB:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            result = (unsigned long) p->data - value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            result = (unsigned long) p->data - value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            result = value - (unsigned long) p->data;
-            if ( mmio_opp->instr == INSTR_SUB )
-                set_reg_value(size, index, 0, regs, result);
-        }
-
-        /*
-         * The CF, OF, SF, ZF, AF, and PF flags are set according
-         * to the result
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        if ( src & (REGISTER | IMMEDIATE) )
-        {
-            set_eflags_CF(size, mmio_opp->instr, result, value,
-                          (unsigned long) p->data, regs);
-            set_eflags_OF(size, mmio_opp->instr, result, value,
-                          (unsigned long) p->data, regs);
-        }
-        else
-        {
-            set_eflags_CF(size, mmio_opp->instr, result,
-                          (unsigned long) p->data, value, regs);
-            set_eflags_OF(size, mmio_opp->instr, result,
-                          (unsigned long) p->data, value, regs);
-        }
-        set_eflags_AF(size, result, value, (unsigned long) p->data, regs);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_TEST:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-        }
-        result = (unsigned long) p->data & value;
-
-        /*
-         * Sets the SF, ZF, and PF status flags. CF and OF are set to 0
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, result, regs);
-        set_eflags_SF(size, result, regs);
-        set_eflags_PF(size, result, regs);
-        break;
-
-    case INSTR_BT:
-        if ( src & REGISTER )
-        {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-        }
-        else if ( src & IMMEDIATE )
-            value = mmio_opp->immediate;
-        if (p->data & (1 << (value & ((1 << 5) - 1))))
-            regs->eflags |= X86_EFLAGS_CF;
-        else
-            regs->eflags &= ~X86_EFLAGS_CF;
-
-        break;
-
-    case INSTR_XCHG:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            set_reg_value(size, index, 0, regs, p->data);
-        } else {
-            index = operand_index(dst);
-            set_reg_value(size, index, 0, regs, p->data);
-        }
-        break;
-
-    case INSTR_PUSH:
-        mmio_opp->addr += hvm_get_segment_base(current, x86_seg_ss);
-        {
-            unsigned long addr = mmio_opp->addr;
-            int rv = hvm_copy_to_guest_virt(addr, &p->data, size);
-            if ( rv == HVMCOPY_bad_gva_to_gfn )
-                return; /* exception already injected */
-        }
-        break;
-    }
-}
-
 void hvm_io_assist(void)
 {
     vcpu_iodata_t *vio;
@@ -825,8 +125,18 @@ void hvm_io_assist(void)
 
     p->state = STATE_IOREQ_NONE;
 
-    if ( v->arch.hvm_vcpu.io_complete && v->arch.hvm_vcpu.io_complete() )
-        goto out;
+    if ( v->arch.hvm_vcpu.io_in_progress )
+    {
+        v->arch.hvm_vcpu.io_in_progress = 0;
+        if ( p->dir == IOREQ_READ )
+        {
+            v->arch.hvm_vcpu.io_completed = 1;
+            v->arch.hvm_vcpu.io_data = p->data;
+        }
+        if ( v->arch.hvm_vcpu.mmio_in_progress )
+            (void)handle_mmio();
+        goto out;
+    }
 
     switch ( p->type )
     {
@@ -836,8 +146,9 @@ void hvm_io_assist(void)
         hvm_pio_assist(regs, p, io_opp);
         break;
     default:
-        hvm_mmio_assist(regs, p, io_opp);
-        break;
+        gdprintk(XENLOG_ERR, "Unexpected HVM iorequest state %d.\n", p->state);
+        domain_crash(v->domain);
+        goto out;
     }
 
     /* Copy register changes back into current guest state. */
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c       Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/platform.c       Wed Feb 20 14:36:45 2008 +0000
@@ -33,801 +33,9 @@
 #include <asm/hvm/support.h>
 #include <asm/hvm/io.h>
 #include <public/hvm/ioreq.h>
-
 #include <xen/lib.h>
 #include <xen/sched.h>
-#include <asm/current.h>
-
-#define DECODE_success  1
-#define DECODE_failure  0
-
-#define mk_operand(size_reg, index, seg, flag) \
-    (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
-
-#if defined (__x86_64__)
-static inline long __get_reg_value(unsigned long reg, int size)
-{
-    switch ( size ) {
-    case BYTE_64:
-        return (char)(reg & 0xFF);
-    case WORD:
-        return (short)(reg & 0xFFFF);
-    case LONG:
-        return (int)(reg & 0xFFFFFFFF);
-    case QUAD:
-        return (long)(reg);
-    default:
-        printk("Error: (__get_reg_value) Invalid reg size\n");
-        domain_crash_synchronous();
-    }
-}
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
-{
-    if ( size == BYTE ) {
-        switch ( index ) {
-        case 0: /* %al */
-            return (char)(regs->rax & 0xFF);
-        case 1: /* %cl */
-            return (char)(regs->rcx & 0xFF);
-        case 2: /* %dl */
-            return (char)(regs->rdx & 0xFF);
-        case 3: /* %bl */
-            return (char)(regs->rbx & 0xFF);
-        case 4: /* %ah */
-            return (char)((regs->rax & 0xFF00) >> 8);
-        case 5: /* %ch */
-            return (char)((regs->rcx & 0xFF00) >> 8);
-        case 6: /* %dh */
-            return (char)((regs->rdx & 0xFF00) >> 8);
-        case 7: /* %bh */
-            return (char)((regs->rbx & 0xFF00) >> 8);
-        default:
-            printk("Error: (get_reg_value) Invalid index value\n");
-            domain_crash_synchronous();
-        }
-        /* NOTREACHED */
-    }
-
-    switch ( index ) {
-    case 0: return __get_reg_value(regs->rax, size);
-    case 1: return __get_reg_value(regs->rcx, size);
-    case 2: return __get_reg_value(regs->rdx, size);
-    case 3: return __get_reg_value(regs->rbx, size);
-    case 4: return __get_reg_value(regs->rsp, size);
-    case 5: return __get_reg_value(regs->rbp, size);
-    case 6: return __get_reg_value(regs->rsi, size);
-    case 7: return __get_reg_value(regs->rdi, size);
-    case 8: return __get_reg_value(regs->r8, size);
-    case 9: return __get_reg_value(regs->r9, size);
-    case 10: return __get_reg_value(regs->r10, size);
-    case 11: return __get_reg_value(regs->r11, size);
-    case 12: return __get_reg_value(regs->r12, size);
-    case 13: return __get_reg_value(regs->r13, size);
-    case 14: return __get_reg_value(regs->r14, size);
-    case 15: return __get_reg_value(regs->r15, size);
-    default:
-        printk("Error: (get_reg_value) Invalid index value\n");
-        domain_crash_synchronous();
-    }
-}
-#elif defined (__i386__)
-static inline long __get_reg_value(unsigned long reg, int size)
-{
-    switch ( size ) {
-    case WORD:
-        return (short)(reg & 0xFFFF);
-    case LONG:
-        return (int)(reg & 0xFFFFFFFF);
-    default:
-        printk("Error: (__get_reg_value) Invalid reg size\n");
-        domain_crash_synchronous();
-    }
-}
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
-{
-    if ( size == BYTE ) {
-        switch ( index ) {
-        case 0: /* %al */
-            return (char)(regs->eax & 0xFF);
-        case 1: /* %cl */
-            return (char)(regs->ecx & 0xFF);
-        case 2: /* %dl */
-            return (char)(regs->edx & 0xFF);
-        case 3: /* %bl */
-            return (char)(regs->ebx & 0xFF);
-        case 4: /* %ah */
-            return (char)((regs->eax & 0xFF00) >> 8);
-        case 5: /* %ch */
-            return (char)((regs->ecx & 0xFF00) >> 8);
-        case 6: /* %dh */
-            return (char)((regs->edx & 0xFF00) >> 8);
-        case 7: /* %bh */
-            return (char)((regs->ebx & 0xFF00) >> 8);
-        default:
-            printk("Error: (get_reg_value) Invalid index value\n");
-            domain_crash_synchronous();
-        }
-    }
-
-    switch ( index ) {
-    case 0: return __get_reg_value(regs->eax, size);
-    case 1: return __get_reg_value(regs->ecx, size);
-    case 2: return __get_reg_value(regs->edx, size);
-    case 3: return __get_reg_value(regs->ebx, size);
-    case 4: return __get_reg_value(regs->esp, size);
-    case 5: return __get_reg_value(regs->ebp, size);
-    case 6: return __get_reg_value(regs->esi, size);
-    case 7: return __get_reg_value(regs->edi, size);
-    default:
-        printk("Error: (get_reg_value) Invalid index value\n");
-        domain_crash_synchronous();
-    }
-}
-#endif
-
-static inline unsigned char *check_prefix(unsigned char *inst,
-                                          struct hvm_io_op *mmio_op,
-                                          unsigned char *ad_size,
-                                          unsigned char *op_size,
-                                          unsigned char *seg_sel,
-                                          unsigned char *rex_p)
-{
-    while ( 1 ) {
-        switch ( *inst ) {
-            /* rex prefix for em64t instructions */
-        case 0x40 ... 0x4f:
-            *rex_p = *inst;
-            break;
-        case 0xf3: /* REPZ */
-            mmio_op->flags = REPZ;
-            break;
-        case 0xf2: /* REPNZ */
-            mmio_op->flags = REPNZ;
-            break;
-        case 0xf0: /* LOCK */
-            break;
-        case 0x2e: /* CS */
-        case 0x36: /* SS */
-        case 0x3e: /* DS */
-        case 0x26: /* ES */
-        case 0x64: /* FS */
-        case 0x65: /* GS */
-            *seg_sel = *inst;
-            break;
-        case 0x66: /* 32bit->16bit */
-            *op_size = WORD;
-            break;
-        case 0x67:
-            *ad_size = WORD;
-            break;
-        default:
-            return inst;
-        }
-        inst++;
-    }
-}
-
-static inline unsigned long get_immediate(int ad_size, const unsigned char 
*inst, int op_size)
-{
-    int mod, reg, rm;
-    unsigned long val = 0;
-    int i;
-
-    mod = (*inst >> 6) & 3;
-    reg = (*inst >> 3) & 7;
-    rm = *inst & 7;
-
-    inst++; //skip ModR/M byte
-    if ( ad_size != WORD && mod != 3 && rm == 4 ) {
-        rm = *inst & 7;
-        inst++; //skip SIB byte
-    }
-
-    switch ( mod ) {
-    case 0:
-        if ( ad_size == WORD ) {
-            if ( rm == 6 )
-                inst = inst + 2; //disp16, skip 2 bytes
-        }
-        else {
-            if ( rm == 5 )
-                inst = inst + 4; //disp32, skip 4 bytes
-        }
-        break;
-    case 1:
-        inst++; //disp8, skip 1 byte
-        break;
-    case 2:
-        if ( ad_size == WORD )
-            inst = inst + 2; //disp16, skip 2 bytes
-        else
-            inst = inst + 4; //disp32, skip 4 bytes
-        break;
-    }
-
-    if ( op_size == QUAD )
-        op_size = LONG;
-
-    for ( i = 0; i < op_size; i++ ) {
-        val |= (*inst++ & 0xff) << (8 * i);
-    }
-
-    return val;
-}
-
-static inline unsigned long get_immediate_sign_ext(
-    int ad_size, const unsigned char *inst, int op_size)
-{
-    unsigned long result = get_immediate(ad_size, inst, op_size);
-    if ( op_size == BYTE )
-        return (int8_t)result;
-    if ( op_size == WORD )
-        return (int16_t)result;
-    return (int32_t)result;
-}
-
-static inline int get_index(const unsigned char *inst, unsigned char rex)
-{
-    int mod, reg, rm;
-    int rex_r, rex_b;
-
-    mod = (*inst >> 6) & 3;
-    reg = (*inst >> 3) & 7;
-    rm = *inst & 7;
-
-    rex_r = (rex >> 2) & 1;
-    rex_b = rex & 1;
-
-    //Only one operand in the instruction is register
-    if ( mod == 3 ) {
-        return (rm + (rex_b << 3));
-    } else {
-        return (reg + (rex_r << 3));
-    }
-    return 0;
-}
-
-static void init_instruction(struct hvm_io_op *mmio_op)
-{
-    mmio_op->instr = 0;
-
-    mmio_op->flags = 0;
-
-    mmio_op->operand[0] = 0;
-    mmio_op->operand[1] = 0;
-    mmio_op->immediate = 0;
-}
-
-#define GET_OP_SIZE_FOR_BYTE(size_reg)      \
-    do {                                    \
-        if ( rex )                          \
-            (size_reg) = BYTE_64;           \
-        else                                \
-            (size_reg) = BYTE;              \
-    } while( 0 )
-
-#define GET_OP_SIZE_FOR_NONEBYTE(op_size)   \
-    do {                                    \
-        if ( rex & 0x8 )                    \
-            (op_size) = QUAD;               \
-        else if ( (op_size) != WORD )       \
-            (op_size) = LONG;               \
-    } while( 0 )
-
-
-/*
- * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax)
- */
-static inline int mem_acc(unsigned char size, struct hvm_io_op *mmio)
-{
-    mmio->operand[0] = mk_operand(size, 0, 0, MEMORY);
-    mmio->operand[1] = mk_operand(size, 0, 0, REGISTER);
-    return DECODE_success;
-}
-
-/*
- * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32)
- */
-static inline int acc_mem(unsigned char size, struct hvm_io_op *mmio)
-{
-    mmio->operand[0] = mk_operand(size, 0, 0, REGISTER);
-    mmio->operand[1] = mk_operand(size, 0, 0, MEMORY);
-    return DECODE_success;
-}
-
-/*
- * Decode mem,reg operands (as in <opcode> r32/16, m32/16)
- */
-static int mem_reg(unsigned char size, unsigned char *opcode,
-                   struct hvm_io_op *mmio_op, unsigned char rex)
-{
-    int index = get_index(opcode + 1, rex);
-
-    mmio_op->operand[0] = mk_operand(size, 0, 0, MEMORY);
-    mmio_op->operand[1] = mk_operand(size, index, 0, REGISTER);
-    return DECODE_success;
-}
-
-/*
- * Decode reg,mem operands (as in <opcode> m32/16, r32/16)
- */
-static int reg_mem(unsigned char size, unsigned char *opcode,
-                   struct hvm_io_op *mmio_op, unsigned char rex)
-{
-    int index = get_index(opcode + 1, rex);
-
-    mmio_op->operand[0] = mk_operand(size, index, 0, REGISTER);
-    mmio_op->operand[1] = mk_operand(size, 0, 0, MEMORY);
-    return DECODE_success;
-}
-
-static int mmio_decode(int address_bytes, unsigned char *opcode,
-                       struct hvm_io_op *mmio_op,
-                       unsigned char *ad_size, unsigned char *op_size,
-                       unsigned char *seg_sel)
-{
-    unsigned char size_reg = 0;
-    unsigned char rex = 0;
-    int index;
-
-    *ad_size = 0;
-    *op_size = 0;
-    *seg_sel = 0;
-    init_instruction(mmio_op);
-
-    opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex);
-
-    switch ( address_bytes )
-    {
-    case 2:
-        if ( *op_size == WORD )
-            *op_size = LONG;
-        else if ( *op_size == LONG )
-            *op_size = WORD;
-        else if ( *op_size == 0 )
-            *op_size = WORD;
-        if ( *ad_size == WORD )
-            *ad_size = LONG;
-        else if ( *ad_size == LONG )
-            *ad_size = WORD;
-        else if ( *ad_size == 0 )
-            *ad_size = WORD;
-        break;
-    case 4:
-        if ( *op_size == 0 )
-            *op_size = LONG;
-        if ( *ad_size == 0 )
-            *ad_size = LONG;
-        break;
-#ifdef __x86_64__
-    case 8:
-        if ( *op_size == 0 )
-            *op_size = rex & 0x8 ? QUAD : LONG;
-        if ( *ad_size == WORD )
-            *ad_size = LONG;
-        else if ( *ad_size == 0 )
-            *ad_size = QUAD;
-        break;
-#endif
-    }
-
-    /* the operands order in comments conforms to AT&T convention */
-
-    switch ( *opcode ) {
-
-    case 0x00: /* add r8, m8 */
-        mmio_op->instr = INSTR_ADD;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x03: /* add m32/16, r32/16 */
-        mmio_op->instr = INSTR_ADD;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x08: /* or r8, m8 */ 
-        mmio_op->instr = INSTR_OR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x09: /* or r32/16, m32/16 */
-        mmio_op->instr = INSTR_OR;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x0A: /* or m8, r8 */
-        mmio_op->instr = INSTR_OR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x0B: /* or m32/16, r32/16 */
-        mmio_op->instr = INSTR_OR;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x20: /* and r8, m8 */
-        mmio_op->instr = INSTR_AND;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x21: /* and r32/16, m32/16 */
-        mmio_op->instr = INSTR_AND;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x22: /* and m8, r8 */
-        mmio_op->instr = INSTR_AND;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x23: /* and m32/16, r32/16 */
-        mmio_op->instr = INSTR_AND;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x2B: /* sub m32/16, r32/16 */
-        mmio_op->instr = INSTR_SUB;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x30: /* xor r8, m8 */
-        mmio_op->instr = INSTR_XOR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x31: /* xor r32/16, m32/16 */
-        mmio_op->instr = INSTR_XOR;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x32: /* xor m8, r8 */
-        mmio_op->instr = INSTR_XOR;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x38: /* cmp r8, m8 */
-        mmio_op->instr = INSTR_CMP;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x39: /* cmp r32/16, m32/16 */
-        mmio_op->instr = INSTR_CMP;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x3A: /* cmp m8, r8 */
-        mmio_op->instr = INSTR_CMP;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x3B: /* cmp m32/16, r32/16 */
-        mmio_op->instr = INSTR_CMP;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0x80:
-    case 0x81:
-    case 0x83:
-    {
-        unsigned char ins_subtype = (opcode[1] >> 3) & 7;
-
-        if ( opcode[0] == 0x80 ) {
-            *op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(size_reg);
-        } else {
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-            size_reg = *op_size;
-        }
-
-        /* opcode 0x83 always has a single byte operand */
-        if ( opcode[0] == 0x83 )
-            mmio_op->immediate =
-                get_immediate_sign_ext(*ad_size, opcode + 1, BYTE);
-        else
-            mmio_op->immediate =
-                get_immediate_sign_ext(*ad_size, opcode + 1, *op_size);
-
-        mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-        mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-        switch ( ins_subtype ) {
-        case 0: /* add $imm, m32/16 */
-            mmio_op->instr = INSTR_ADD;
-            return DECODE_success;
-
-        case 1: /* or $imm, m32/16 */
-            mmio_op->instr = INSTR_OR;
-            return DECODE_success;
-
-        case 4: /* and $imm, m32/16 */
-            mmio_op->instr = INSTR_AND;
-            return DECODE_success;
-
-        case 5: /* sub $imm, m32/16 */
-            mmio_op->instr = INSTR_SUB;
-            return DECODE_success;
-
-        case 6: /* xor $imm, m32/16 */
-            mmio_op->instr = INSTR_XOR;
-            return DECODE_success;
-
-        case 7: /* cmp $imm, m32/16 */
-            mmio_op->instr = INSTR_CMP;
-            return DECODE_success;
-
-        default:
-            printk("%x/%x, This opcode isn't handled yet!\n",
-                   *opcode, ins_subtype);
-            return DECODE_failure;
-        }
-    }
-
-    case 0x84:  /* test r8, m8 */
-        mmio_op->instr = INSTR_TEST;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x85: /* test r16/32, m16/32 */
-        mmio_op->instr = INSTR_TEST;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x86:  /* xchg m8, r8 */
-        mmio_op->instr = INSTR_XCHG;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x87:  /* xchg m16/32, r16/32 */
-        mmio_op->instr = INSTR_XCHG;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x88: /* mov r8, m8 */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, mmio_op, rex);
-
-    case 0x89: /* mov r32/16, m32/16 */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return reg_mem(*op_size, opcode, mmio_op, rex);
-
-    case 0x8A: /* mov m8, r8 */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, mmio_op, rex);
-
-    case 0x8B: /* mov m32/16, r32/16 */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_reg(*op_size, opcode, mmio_op, rex);
-
-    case 0xA0: /* mov <addr>, al */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_acc(size_reg, mmio_op);
-
-    case 0xA1: /* mov <addr>, ax/eax */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return mem_acc(*op_size, mmio_op);
-
-    case 0xA2: /* mov al, <addr> */
-        mmio_op->instr = INSTR_MOV;
-        *op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return acc_mem(size_reg, mmio_op);
-
-    case 0xA3: /* mov ax/eax, <addr> */
-        mmio_op->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return acc_mem(*op_size, mmio_op);
-
-    case 0xA4: /* movsb */
-        mmio_op->instr = INSTR_MOVS;
-        *op_size = BYTE;
-        return DECODE_success;
-
-    case 0xA5: /* movsw/movsl */
-        mmio_op->instr = INSTR_MOVS;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return DECODE_success;
-
-    case 0xAA: /* stosb */
-        mmio_op->instr = INSTR_STOS;
-        *op_size = BYTE;
-        return DECODE_success;
-
-    case 0xAB: /* stosw/stosl */
-        mmio_op->instr = INSTR_STOS;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return DECODE_success;
-
-    case 0xAC: /* lodsb */
-        mmio_op->instr = INSTR_LODS;
-        *op_size = BYTE;
-        return DECODE_success;
-
-    case 0xAD: /* lodsw/lodsl */
-        mmio_op->instr = INSTR_LODS;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        return DECODE_success;
-
-    case 0xC6:
-        if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm8, m8 */
-            mmio_op->instr = INSTR_MOV;
-            *op_size = BYTE;
-
-            mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
-            mmio_op->immediate  =
-                    get_immediate(*ad_size, opcode + 1, *op_size);
-            mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xC7:
-        if ( ((opcode[1] >> 3) & 7) == 0 ) { /* mov $imm16/32, m16/32 */
-            mmio_op->instr = INSTR_MOV;
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-
-            mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
-            mmio_op->immediate =
-                    get_immediate_sign_ext(*ad_size, opcode + 1, *op_size);
-            mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xF6:
-    case 0xF7:
-        if ( ((opcode[1] >> 3) & 7) == 0 ) { /* test $imm8/16/32, m8/16/32 */
-            mmio_op->instr = INSTR_TEST;
-
-            if ( opcode[0] == 0xF6 ) {
-                *op_size = BYTE;
-                GET_OP_SIZE_FOR_BYTE(size_reg);
-            } else {
-                GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-                size_reg = *op_size;
-            }
-
-            mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-            mmio_op->immediate =
-                    get_immediate_sign_ext(*ad_size, opcode + 1, *op_size);
-            mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xFE:
-    case 0xFF:
-    {
-        unsigned char ins_subtype = (opcode[1] >> 3) & 7;
-
-        if ( opcode[0] == 0xFE ) {
-            *op_size = BYTE;
-            GET_OP_SIZE_FOR_BYTE(size_reg);
-        } else {
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-            size_reg = *op_size;
-        }
-
-        mmio_op->immediate = 1;
-        mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-        mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-        switch ( ins_subtype ) {
-        case 0: /* inc */
-            mmio_op->instr = INSTR_ADD;
-            return DECODE_success;
-
-        case 1: /* dec */
-            mmio_op->instr = INSTR_SUB;
-            return DECODE_success;
-
-        case 6: /* push */
-            mmio_op->instr = INSTR_PUSH;
-            mmio_op->operand[0] = mmio_op->operand[1];
-            return DECODE_success;
-
-        default:
-            printk("%x/%x, This opcode isn't handled yet!\n",
-                   *opcode, ins_subtype);
-            return DECODE_failure;
-        }
-    }
-
-    case 0x0F:
-        break;
-
-    default:
-        printk("%x, This opcode isn't handled yet!\n", *opcode);
-        return DECODE_failure;
-    }
-
-    switch ( *++opcode ) {
-    case 0xB6: /* movzx m8, r16/r32/r64 */
-        mmio_op->instr = INSTR_MOVZX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xB7: /* movzx m16, r32/r64 */
-        mmio_op->instr = INSTR_MOVZX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xBE: /* movsx m8, r16/r32/r64 */
-        mmio_op->instr = INSTR_MOVSX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xBF: /* movsx m16, r32/r64 */
-        mmio_op->instr = INSTR_MOVSX;
-        GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-        index = get_index(opcode + 1, rex);
-        mmio_op->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
-        mmio_op->operand[1] = mk_operand(*op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xA3: /* bt r32, m32 */
-        mmio_op->instr = INSTR_BT;
-        index = get_index(opcode + 1, rex);
-        *op_size = LONG;
-        mmio_op->operand[0] = mk_operand(*op_size, index, 0, REGISTER);
-        mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-        return DECODE_success;
-
-    case 0xBA:
-        if ( ((opcode[1] >> 3) & 7) == 4 ) /* BT $imm8, m16/32/64 */
-        {
-            mmio_op->instr = INSTR_BT;
-            GET_OP_SIZE_FOR_NONEBYTE(*op_size);
-            mmio_op->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
-            mmio_op->immediate =
-                    (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
-            mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
-            return DECODE_success;
-        }
-        else
-        {
-            printk("0f %x, This opcode subtype isn't handled yet\n", *opcode);
-            return DECODE_failure;
-        }
-
-    default:
-        printk("0f %x, This opcode isn't handled yet\n", *opcode);
-        return DECODE_failure;
-    }
-}
+#include <asm/hvm/emulate.h>
 
 int inst_copy_from_guest(
     unsigned char *buf, unsigned long guest_eip, int inst_len)
@@ -984,323 +192,41 @@ void send_invalidate_req(void)
     hvm_send_assist_req(v);
 }
 
-static void mmio_operands(int type, unsigned long gpa,
-                          struct hvm_io_op *mmio_op,
-                          unsigned char op_size)
-{
-    unsigned long value = 0;
-    int df, index, size_reg;
-    struct cpu_user_regs *regs = &mmio_op->io_context;
-
-    df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
-
-    size_reg = operand_size(mmio_op->operand[0]);
-
-    if ( mmio_op->operand[0] & REGISTER ) {            /* dest is memory */
-        index = operand_index(mmio_op->operand[0]);
-        value = get_reg_value(size_reg, index, 0, regs);
-        send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0);
-    } else if ( mmio_op->operand[0] & IMMEDIATE ) {    /* dest is memory */
-        value = mmio_op->immediate;
-        send_mmio_req(type, gpa, 1, op_size, value, IOREQ_WRITE, df, 0);
-    } else if ( mmio_op->operand[0] & MEMORY ) {       /* dest is register */
-        /* send the request and wait for the value */
-        if ( (mmio_op->instr == INSTR_MOVZX) ||
-             (mmio_op->instr == INSTR_MOVSX) )
-            send_mmio_req(type, gpa, 1, size_reg, 0, IOREQ_READ, df, 0);
-        else
-            send_mmio_req(type, gpa, 1, op_size, 0, IOREQ_READ, df, 0);
-    } else {
-        printk("%s: invalid dest mode.\n", __func__);
-        domain_crash_synchronous();
-    }
-}
-
-#define GET_REPEAT_COUNT() \
-     (mmio_op->flags & REPZ ? (ad_size == WORD ? regs->ecx & 0xFFFF : 
regs->ecx) : 1)
-
-
-void handle_mmio(paddr_t gpa)
-{
-    unsigned long inst_addr;
-    struct hvm_io_op *mmio_op;
-    struct cpu_user_regs *regs;
-    unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel;
-    int i, address_bytes, df, inst_len;
-    struct vcpu *v = current;
-
-    mmio_op = &v->arch.hvm_vcpu.io_op;
-    regs = &mmio_op->io_context;
-
-    /* Copy current guest state into io instruction state structure. */
-    memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
-
-    df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
-
-    address_bytes = hvm_guest_x86_mode(v);
-    if (address_bytes < 2)
-        /* real or vm86 modes */
-        address_bytes = 2;
-    inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip;
-    memset(inst, 0, MAX_INST_LEN);
-    inst_len = hvm_instruction_fetch(inst_addr, address_bytes, inst);
-    if ( inst_len <= 0 )
-    {
-        gdprintk(XENLOG_DEBUG, "handle_mmio: failed to get instruction\n");
-        /* hvm_instruction_fetch() will have injected a #PF; get out now */
-        return;
-    }
-
-    if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size,
-                     &op_size, &seg_sel) == DECODE_failure )
-    {
+int handle_mmio(void)
+{
+    struct hvm_emulate_ctxt ctxt;
+    struct vcpu *curr = current;
+    int rc;
+
+    hvm_emulate_prepare(&ctxt, guest_cpu_user_regs());
+
+    rc = hvm_emulate_one(&ctxt);
+
+    switch ( rc )
+    {
+    case X86EMUL_UNHANDLEABLE:
         gdprintk(XENLOG_WARNING,
-                 "handle_mmio: failed to decode instruction\n");
-        gdprintk(XENLOG_WARNING,
-                 "mmio opcode: gpa 0x%"PRIpaddr", len %d:", gpa, inst_len);
-        for ( i = 0; i < inst_len; i++ )
-            printk(" %02x", inst[i] & 0xFF);
-        printk("\n");
-
-        hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
-        return;
-    }
-
-    regs->eip += inst_len; /* advance %eip */
-
-    switch ( mmio_op->instr ) {
-    case INSTR_MOV:
-        mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size);
+                 "MMIO emulation failed @ %04x:%lx: "
+                 "%02x %02x %02x %02x %02x %02x\n",
+                 hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel,
+                 ctxt.insn_buf_eip,
+                 ctxt.insn_buf[0], ctxt.insn_buf[1],
+                 ctxt.insn_buf[2], ctxt.insn_buf[3],
+                 ctxt.insn_buf[4], ctxt.insn_buf[5]);
+        return 0;
+    case X86EMUL_EXCEPTION:
+        if ( ctxt.flags.exn_pending )
+            hvm_inject_exception(ctxt.exn_vector, 0, 0);
         break;
-
-    case INSTR_MOVS:
-    {
-        struct segment_register sreg;
-        unsigned long count = GET_REPEAT_COUNT();
-        int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
-        unsigned long addr, gfn; 
-        paddr_t paddr;
-        int dir, size = op_size;
-        uint32_t pfec;
-
-        ASSERT(count);
-
-        /* determine non-MMIO address */
-        addr = regs->edi;
-        if ( ad_size == WORD )
-            addr &= 0xFFFF;
-        addr += hvm_get_segment_base(v, x86_seg_es);        
-        pfec = PFEC_page_present | PFEC_write_access;
-        hvm_get_segment_register(v, x86_seg_ss, &sreg);
-        if ( sreg.attr.fields.dpl == 3 )
-            pfec |= PFEC_user_mode;
-        gfn = paging_gva_to_gfn(v, addr, &pfec);
-        paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
-        if ( paddr == gpa )
-        {
-            enum x86_segment seg;
-
-            dir = IOREQ_WRITE;
-            addr = regs->esi;
-            if ( ad_size == WORD )
-                addr &= 0xFFFF;
-            switch ( seg_sel )
-            {
-            case 0x26: seg = x86_seg_es; break;
-            case 0x2e: seg = x86_seg_cs; break;
-            case 0x36: seg = x86_seg_ss; break;
-            case 0:
-            case 0x3e: seg = x86_seg_ds; break;
-            case 0x64: seg = x86_seg_fs; break;
-            case 0x65: seg = x86_seg_gs; break;
-            default: domain_crash_synchronous();
-            }
-            addr += hvm_get_segment_base(v, seg);
-            pfec &= ~PFEC_write_access;
-            gfn = paging_gva_to_gfn(v, addr, &pfec);
-            paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
-        }
-        else
-            dir = IOREQ_READ;
-
-        if ( gfn == INVALID_GFN ) 
-        {
-            /* The guest does not have the non-mmio address mapped. 
-             * Need to send in a page fault */
-            regs->eip -= inst_len; /* do not advance %eip */
-            hvm_inject_exception(TRAP_page_fault, pfec, addr);
-            return;
-        }
-
-        /*
-         * In case of a movs spanning multiple pages, we break the accesses
-         * up into multiple pages (the device model works with non-continguous
-         * physical guest pages). To copy just one page, we adjust %ecx and
-         * do not advance %eip so that the next rep;movs copies the next page.
-         * Unaligned accesses, for example movsl starting at PGSZ-2, are
-         * turned into a single copy where we handle the overlapping memory
-         * copy ourself. After this copy succeeds, "rep movs" is executed
-         * again.
-         */
-        if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) {
-            unsigned long value = 0;
-
-            mmio_op->flags |= OVERLAP;
-
-            if ( dir == IOREQ_WRITE ) {
-                if ( hvm_paging_enabled(v) )
-                {
-                    int rv = hvm_copy_from_guest_virt(&value, addr, size);
-                    if ( rv == HVMCOPY_bad_gva_to_gfn ) 
-                        return; /* exception already injected */
-                }
-                else
-                    (void)hvm_copy_from_guest_phys(&value, addr, size);
-            } else /* dir != IOREQ_WRITE */
-                /* Remember where to write the result, as a *VA*.
-                 * Must be a VA so we can handle the page overlap 
-                 * correctly in hvm_mmio_assist() */
-                mmio_op->addr = addr;
-
-            if ( count != 1 )
-                regs->eip -= inst_len; /* do not advance %eip */
-
-            send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, df, 0);
-        } else {
-            unsigned long last_addr = sign > 0 ? addr + count * size - 1
-                                               : addr - (count - 1) * size;
-
-            if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) )
-            {
-                regs->eip -= inst_len; /* do not advance %eip */
-
-                if ( sign > 0 )
-                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
-                else
-                    count = (addr & ~PAGE_MASK) / size + 1;
-            }
-
-            ASSERT(count);
-
-            send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, 
-                          paddr, dir, df, 1);
-        }
+    default:
         break;
     }
 
-    case INSTR_MOVZX:
-    case INSTR_MOVSX:
-        mmio_operands(IOREQ_TYPE_COPY, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_STOS:
-        /*
-         * Since the destination is always in (contiguous) mmio space we don't
-         * need to break it up into pages.
-         */
-        send_mmio_req(IOREQ_TYPE_COPY, gpa,
-                      GET_REPEAT_COUNT(), op_size, regs->eax, IOREQ_WRITE, df, 
0);
-        break;
-
-    case INSTR_LODS:
-        /*
-         * Since the source is always in (contiguous) mmio space we don't
-         * need to break it up into pages.
-         */
-        mmio_op->operand[0] = mk_operand(op_size, 0, 0, REGISTER);
-        send_mmio_req(IOREQ_TYPE_COPY, gpa,
-                      GET_REPEAT_COUNT(), op_size, 0, IOREQ_READ, df, 0);
-        break;
-
-    case INSTR_OR:
-        mmio_operands(IOREQ_TYPE_OR, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_AND:
-        mmio_operands(IOREQ_TYPE_AND, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_ADD:
-        mmio_operands(IOREQ_TYPE_ADD, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_SUB:
-        mmio_operands(IOREQ_TYPE_SUB, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_XOR:
-        mmio_operands(IOREQ_TYPE_XOR, gpa, mmio_op, op_size);
-        break;
-
-    case INSTR_PUSH:
-        if ( ad_size == WORD )
-        {
-            mmio_op->addr = (uint16_t)(regs->esp - op_size);
-            regs->esp = mmio_op->addr | (regs->esp & ~0xffff);
-        }
-        else
-        {
-            regs->esp -= op_size;
-            mmio_op->addr = regs->esp;
-        }
-        /* send the request and wait for the value */
-        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, op_size, 0, IOREQ_READ, df, 0);
-        break;
-
-    case INSTR_CMP:        /* Pass through */
-    case INSTR_TEST:
-        /* send the request and wait for the value */
-        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, op_size, 0, IOREQ_READ, df, 0);
-        break;
-
-    case INSTR_BT:
-    {
-        unsigned long value = 0;
-        int index, size;
-
-        if ( mmio_op->operand[0] & REGISTER )
-        {
-            index = operand_index(mmio_op->operand[0]);
-            size = operand_size(mmio_op->operand[0]);
-            value = get_reg_value(size, index, 0, regs);
-        }
-        else if ( mmio_op->operand[0] & IMMEDIATE )
-        {
-            mmio_op->immediate = mmio_op->immediate;
-            value = mmio_op->immediate;
-        }
-        send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
-                      op_size, 0, IOREQ_READ, df, 0);
-        break;
-    }
-
-    case INSTR_XCHG:
-        if ( mmio_op->operand[0] & REGISTER ) {
-            long value;
-            unsigned long operand = mmio_op->operand[0];
-            value = get_reg_value(operand_size(operand),
-                                  operand_index(operand), 0,
-                                  regs);
-            /* send the request and wait for the value */
-            send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1,
-                          op_size, value, IOREQ_WRITE, df, 0);
-        } else {
-            /* the destination is a register */
-            long value;
-            unsigned long operand = mmio_op->operand[1];
-            value = get_reg_value(operand_size(operand),
-                                  operand_index(operand), 0,
-                                  regs);
-            /* send the request and wait for the value */
-            send_mmio_req(IOREQ_TYPE_XCHG, gpa, 1,
-                          op_size, value, IOREQ_WRITE, df, 0);
-        }
-        break;
-
-    default:
-        printk("Unhandled MMIO instruction\n");
-        domain_crash_synchronous();
-    }
+    hvm_emulate_writeback(&ctxt);
+
+    curr->arch.hvm_vcpu.mmio_in_progress = curr->arch.hvm_vcpu.io_in_progress;
+
+    return 1;
 }
 
 DEFINE_PER_CPU(int, guest_handles_in_xen_space);
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/stdvga.c
--- a/xen/arch/x86/hvm/stdvga.c Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/stdvga.c Wed Feb 20 14:36:45 2008 +0000
@@ -458,33 +458,6 @@ static int mmio_move(struct hvm_hw_stdvg
     return 1;
 }
 
-static uint32_t op_and(uint32_t a, uint32_t b) { return a & b; }
-static uint32_t op_or (uint32_t a, uint32_t b) { return a | b; }
-static uint32_t op_xor(uint32_t a, uint32_t b) { return a ^ b; }
-static uint32_t op_add(uint32_t a, uint32_t b) { return a + b; }
-static uint32_t op_sub(uint32_t a, uint32_t b) { return a - b; }
-static uint32_t (*op_array[])(uint32_t, uint32_t) = {
-    [IOREQ_TYPE_AND] = op_and,
-    [IOREQ_TYPE_OR ] = op_or,
-    [IOREQ_TYPE_XOR] = op_xor,
-    [IOREQ_TYPE_ADD] = op_add,
-    [IOREQ_TYPE_SUB] = op_sub
-};
-
-static int mmio_op(struct hvm_hw_stdvga *s, ioreq_t *p)
-{
-    uint32_t orig, mod = 0;
-    orig = stdvga_mem_read(p->addr, p->size);
-
-    if ( p->dir == IOREQ_WRITE )
-    {
-        mod = (op_array[p->type])(orig, p->data);
-        stdvga_mem_write(p->addr, mod, p->size);
-    }
-
-    return 0; /* Don't try to buffer these operations */
-}
-
 int stdvga_intercept_mmio(ioreq_t *p)
 {
     struct domain *d = current->domain;
@@ -505,13 +478,6 @@ int stdvga_intercept_mmio(ioreq_t *p)
         {
         case IOREQ_TYPE_COPY:
             buf = mmio_move(s, p);
-            break;
-        case IOREQ_TYPE_AND:
-        case IOREQ_TYPE_OR:
-        case IOREQ_TYPE_XOR:
-        case IOREQ_TYPE_ADD:
-        case IOREQ_TYPE_SUB:
-            buf = mmio_op(s, p);
             break;
         default:
             gdprintk(XENLOG_WARNING, "unsupported mmio request type:%d "
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c        Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/svm/svm.c        Wed Feb 20 14:36:45 2008 +0000
@@ -66,6 +66,13 @@ static void svm_update_guest_efer(struct
 static void svm_update_guest_efer(struct vcpu *v);
 static void svm_inject_exception(
     unsigned int trapnr, int errcode, unsigned long cr2);
+static void svm_cpuid_intercept(
+    unsigned int *eax, unsigned int *ebx,
+    unsigned int *ecx, unsigned int *edx);
+static void svm_wbinvd_intercept(void);
+static void svm_fpu_dirty_intercept(void);
+static int svm_msr_read_intercept(struct cpu_user_regs *regs);
+static int svm_msr_write_intercept(struct cpu_user_regs *regs);
 
 /* va of hardware host save area     */
 static void *hsa[NR_CPUS] __read_mostly;
@@ -112,7 +119,7 @@ static enum handler_return long_mode_do_
     switch ( ecx )
     {
     case MSR_EFER:
-        if ( !hvm_set_efer(msr_content) )
+        if ( hvm_set_efer(msr_content) )
             return HNDL_exception_raised;
         break;
 
@@ -808,7 +815,12 @@ static struct hvm_function_table svm_fun
     .inject_exception     = svm_inject_exception,
     .init_hypercall_page  = svm_init_hypercall_page,
     .event_pending        = svm_event_pending,
-    .do_pmu_interrupt     = svm_do_pmu_interrupt
+    .do_pmu_interrupt     = svm_do_pmu_interrupt,
+    .cpuid_intercept      = svm_cpuid_intercept,
+    .wbinvd_intercept     = svm_wbinvd_intercept,
+    .fpu_dirty_intercept  = svm_fpu_dirty_intercept,
+    .msr_read_intercept   = svm_msr_read_intercept,
+    .msr_write_intercept  = svm_msr_write_intercept
 };
 
 int start_svm(struct cpuinfo_x86 *c)
@@ -873,7 +885,8 @@ static void svm_do_nested_pgfault(paddr_
     mfn = gfn_to_mfn_current(gfn, &p2mt);
     if ( p2mt == p2m_mmio_dm )
     {
-        handle_mmio(gpa);
+        if ( !handle_mmio() )
+            hvm_inject_exception(TRAP_gp_fault, 0, 0);
         return;
     }
 
@@ -882,9 +895,10 @@ static void svm_do_nested_pgfault(paddr_
     p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
 }
 
-static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
+static void svm_fpu_dirty_intercept(void)
 {
     struct vcpu *curr = current;
+    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
 
     svm_fpu_enter(curr);
 
@@ -893,72 +907,83 @@ static void svm_do_no_device_fault(struc
 }
 
 #define bitmaskof(idx)  (1U << ((idx) & 31))
-static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
-                                struct cpu_user_regs *regs)
-{
-    unsigned long input = regs->eax;
-    unsigned int eax, ebx, ecx, edx;
+static void svm_cpuid_intercept(
+    unsigned int *eax, unsigned int *ebx,
+    unsigned int *ecx, unsigned int *edx)
+{
+    unsigned int input = *eax;
     struct vcpu *v = current;
-    int inst_len;
-
-    hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
+
+    hvm_cpuid(input, eax, ebx, ecx, edx);
 
     switch ( input )
     {
     case 0x00000001:
         /* Mask Intel-only features. */
-        ecx &= ~(bitmaskof(X86_FEATURE_SSSE3) |
-                 bitmaskof(X86_FEATURE_SSE4_1) |
-                 bitmaskof(X86_FEATURE_SSE4_2));
+        *ecx &= ~(bitmaskof(X86_FEATURE_SSSE3) |
+                  bitmaskof(X86_FEATURE_SSE4_1) |
+                  bitmaskof(X86_FEATURE_SSE4_2));
         break;
 
     case 0x80000001:
         /* Filter features which are shared with 0x00000001:EDX. */
         if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
-            __clear_bit(X86_FEATURE_APIC & 31, &edx);
+            __clear_bit(X86_FEATURE_APIC & 31, edx);
 #if CONFIG_PAGING_LEVELS >= 3
         if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
 #endif
-            __clear_bit(X86_FEATURE_PAE & 31, &edx);
-        __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
+            __clear_bit(X86_FEATURE_PAE & 31, edx);
+        __clear_bit(X86_FEATURE_PSE36 & 31, edx);
 
         /* Filter all other features according to a whitelist. */
-        ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
-                bitmaskof(X86_FEATURE_ALTMOVCR) |
-                bitmaskof(X86_FEATURE_ABM) |
-                bitmaskof(X86_FEATURE_SSE4A) |
-                bitmaskof(X86_FEATURE_MISALIGNSSE) |
-                bitmaskof(X86_FEATURE_3DNOWPF));
-        edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
-                bitmaskof(X86_FEATURE_NX) |
-                bitmaskof(X86_FEATURE_LM) |
-                bitmaskof(X86_FEATURE_SYSCALL) |
-                bitmaskof(X86_FEATURE_MP) |
-                bitmaskof(X86_FEATURE_MMXEXT) |
-                bitmaskof(X86_FEATURE_FFXSR));
+        *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
+                 bitmaskof(X86_FEATURE_ALTMOVCR) |
+                 bitmaskof(X86_FEATURE_ABM) |
+                 bitmaskof(X86_FEATURE_SSE4A) |
+                 bitmaskof(X86_FEATURE_MISALIGNSSE) |
+                 bitmaskof(X86_FEATURE_3DNOWPF));
+        *edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
+                 bitmaskof(X86_FEATURE_NX) |
+                 bitmaskof(X86_FEATURE_LM) |
+                 bitmaskof(X86_FEATURE_SYSCALL) |
+                 bitmaskof(X86_FEATURE_MP) |
+                 bitmaskof(X86_FEATURE_MMXEXT) |
+                 bitmaskof(X86_FEATURE_FFXSR));
         break;
 
     case 0x80000007:
     case 0x8000000A:
         /* Mask out features of power management and SVM extension. */
-        eax = ebx = ecx = edx = 0;
+        *eax = *ebx = *ecx = *edx = 0;
         break;
 
     case 0x80000008:
         /* Make sure Number of CPU core is 1 when HTT=0 */
-        ecx &= 0xFFFFFF00;
-        break;
-    }
+        *ecx &= 0xFFFFFF00;
+        break;
+    }
+
+    HVMTRACE_3D(CPUID, v, input,
+                ((uint64_t)*eax << 32) | *ebx, ((uint64_t)*ecx << 32) | *edx);
+}
+
+static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
+{
+    unsigned int eax, ebx, ecx, edx, inst_len;
+
+    eax = regs->eax;
+    ebx = regs->ebx;
+    ecx = regs->ecx;
+    edx = regs->edx;
+
+    svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
 
     regs->eax = eax;
     regs->ebx = ebx;
     regs->ecx = ecx;
     regs->edx = edx;
 
-    HVMTRACE_3D(CPUID, v, input,
-                ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
-
-    inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
+    inst_len = __get_instruction_length(current, INSTR_CPUID, NULL);
     __update_guest_eip(regs, inst_len);
 }
 
@@ -1484,11 +1509,11 @@ static int mov_to_cr(int gpreg, int cr, 
     switch ( cr )
     {
     case 0: 
-        return hvm_set_cr0(value);
+        return !hvm_set_cr0(value);
     case 3:
-        return hvm_set_cr3(value);
+        return !hvm_set_cr3(value);
     case 4:
-        return hvm_set_cr4(value);
+        return !hvm_set_cr4(value);
     default:
         gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
         domain_crash(v->domain);
@@ -1564,7 +1589,7 @@ static void svm_cr_access(
         gpreg = decode_src_reg(prefix, buffer[index+2]);
         value = get_reg(gpreg, regs, vmcb) & 0xF;
         value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
-        result = hvm_set_cr0(value);
+        result = !hvm_set_cr0(value);
         HVMTRACE_1D(LMSW, current, value);
         break;
 
@@ -1635,176 +1660,197 @@ static void svm_cr_access(
         __update_guest_eip(regs, inst_len);
 }
 
-static void svm_do_msr_access(
-    struct vcpu *v, struct cpu_user_regs *regs)
-{
-    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-    int  inst_len;
-    u64 msr_content=0;
+static int svm_msr_read_intercept(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
     u32 ecx = regs->ecx, eax, edx;
-
-    HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
-                ecx, (u32)regs->eax, (u32)regs->edx,
-                (unsigned long)vmcb->exitinfo1);
-
-    /* is it a read? */
-    if (vmcb->exitinfo1 == 0)
-    {
-        switch (ecx) {
-        case MSR_IA32_TSC:
-            msr_content = hvm_get_guest_time(v);
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    switch ( ecx )
+    {
+    case MSR_IA32_TSC:
+        msr_content = hvm_get_guest_time(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        break;
+
+    case MSR_EFER:
+        msr_content = v->arch.hvm_vcpu.guest_efer;
+        break;
+
+    case MSR_IA32_MC4_MISC: /* Threshold register */
+    case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
+        /*
+         * MCA/MCE: We report that the threshold register is unavailable
+         * for OS use (locked by the BIOS).
+         */
+        msr_content = 1ULL << 61; /* MC4_MISC.Locked */
+        break;
+
+    case MSR_IA32_EBC_FREQUENCY_ID:
+        /*
+         * This Intel-only register may be accessed if this HVM guest
+         * has been migrated from an Intel host. The value zero is not
+         * particularly meaningful, but at least avoids the guest crashing!
+         */
+        msr_content = 0;
+        break;
+
+    case MSR_K8_VM_HSAVE_PA:
+        goto gpf;
+
+    case MSR_IA32_MCG_CAP:
+    case MSR_IA32_MCG_STATUS:
+    case MSR_IA32_MC0_STATUS:
+    case MSR_IA32_MC1_STATUS:
+    case MSR_IA32_MC2_STATUS:
+    case MSR_IA32_MC3_STATUS:
+    case MSR_IA32_MC4_STATUS:
+    case MSR_IA32_MC5_STATUS:
+        /* No point in letting the guest see real MCEs */
+        msr_content = 0;
+        break;
+
+    case MSR_IA32_DEBUGCTLMSR:
+        msr_content = vmcb->debugctlmsr;
+        break;
+
+    case MSR_IA32_LASTBRANCHFROMIP:
+        msr_content = vmcb->lastbranchfromip;
+        break;
+
+    case MSR_IA32_LASTBRANCHTOIP:
+        msr_content = vmcb->lastbranchtoip;
+        break;
+
+    case MSR_IA32_LASTINTFROMIP:
+        msr_content = vmcb->lastintfromip;
+        break;
+
+    case MSR_IA32_LASTINTTOIP:
+        msr_content = vmcb->lastinttoip;
+        break;
+
+    default:
+        if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
+             rdmsr_safe(ecx, eax, edx) == 0 )
+        {
+            regs->eax = eax;
+            regs->edx = edx;
+            goto done;
+        }
+        goto gpf;
+    }
+    regs->eax = msr_content & 0xFFFFFFFF;
+    regs->edx = msr_content >> 32;
+
+ done:
+    hvmtrace_msr_read(v, ecx, msr_content);
+    HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
+                ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
+    return X86EMUL_OKAY;
+
+ gpf:
+    svm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+static int svm_msr_write_intercept(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
+    u32 ecx = regs->ecx;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+
+    hvmtrace_msr_write(v, ecx, msr_content);
+
+    switch ( ecx )
+    {
+    case MSR_IA32_TSC:
+        hvm_set_guest_time(v, msr_content);
+        pt_reset(v);
+        break;
+
+    case MSR_IA32_APICBASE:
+        vlapic_msr_set(vcpu_vlapic(v), msr_content);
+        break;
+
+    case MSR_K8_VM_HSAVE_PA:
+        goto gpf;
+
+    case MSR_IA32_DEBUGCTLMSR:
+        vmcb->debugctlmsr = msr_content;
+        if ( !msr_content || !cpu_has_svm_lbrv )
             break;
-
-        case MSR_IA32_APICBASE:
-            msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+        vmcb->lbr_control.fields.enable = 1;
+        svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
+        svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
+        break;
+
+    case MSR_IA32_LASTBRANCHFROMIP:
+        vmcb->lastbranchfromip = msr_content;
+        break;
+
+    case MSR_IA32_LASTBRANCHTOIP:
+        vmcb->lastbranchtoip = msr_content;
+        break;
+
+    case MSR_IA32_LASTINTFROMIP:
+        vmcb->lastintfromip = msr_content;
+        break;
+
+    case MSR_IA32_LASTINTTOIP:
+        vmcb->lastinttoip = msr_content;
+        break;
+
+    default:
+        switch ( long_mode_do_msr_write(regs) )
+        {
+        case HNDL_unhandled:
+            wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
             break;
-
-        case MSR_EFER:
-            msr_content = v->arch.hvm_vcpu.guest_efer;
+        case HNDL_exception_raised:
+            return X86EMUL_EXCEPTION;
+        case HNDL_done:
             break;
-
-        case MSR_IA32_MC4_MISC: /* Threshold register */
-        case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
-            /*
-             * MCA/MCE: We report that the threshold register is unavailable
-             * for OS use (locked by the BIOS).
-             */
-            msr_content = 1ULL << 61; /* MC4_MISC.Locked */
-            break;
-
-        case MSR_IA32_EBC_FREQUENCY_ID:
-            /*
-             * This Intel-only register may be accessed if this HVM guest
-             * has been migrated from an Intel host. The value zero is not
-             * particularly meaningful, but at least avoids the guest crashing!
-             */
-            msr_content = 0;
-            break;
-
-        case MSR_K8_VM_HSAVE_PA:
-            svm_inject_exception(TRAP_gp_fault, 0, 0);
-            break;
-
-        case MSR_IA32_MCG_CAP:
-        case MSR_IA32_MCG_STATUS:
-        case MSR_IA32_MC0_STATUS:
-        case MSR_IA32_MC1_STATUS:
-        case MSR_IA32_MC2_STATUS:
-        case MSR_IA32_MC3_STATUS:
-        case MSR_IA32_MC4_STATUS:
-        case MSR_IA32_MC5_STATUS:
-            /* No point in letting the guest see real MCEs */
-            msr_content = 0;
-            break;
-
-        case MSR_IA32_DEBUGCTLMSR:
-            msr_content = vmcb->debugctlmsr;
-            break;
-
-        case MSR_IA32_LASTBRANCHFROMIP:
-            msr_content = vmcb->lastbranchfromip;
-            break;
-
-        case MSR_IA32_LASTBRANCHTOIP:
-            msr_content = vmcb->lastbranchtoip;
-            break;
-
-        case MSR_IA32_LASTINTFROMIP:
-            msr_content = vmcb->lastintfromip;
-            break;
-
-        case MSR_IA32_LASTINTTOIP:
-            msr_content = vmcb->lastinttoip;
-            break;
-
-        default:
-            if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
-                 rdmsr_safe(ecx, eax, edx) == 0 )
-            {
-                regs->eax = eax;
-                regs->edx = edx;
-                goto done;
-            }
-            svm_inject_exception(TRAP_gp_fault, 0, 0);
-            return;
-        }
-        regs->eax = msr_content & 0xFFFFFFFF;
-        regs->edx = msr_content >> 32;
-
- done:
-        hvmtrace_msr_read(v, ecx, msr_content);
-        HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
-                    ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
-
+        }
+        break;
+    }
+
+    return X86EMUL_OKAY;
+
+ gpf:
+    svm_inject_exception(TRAP_gp_fault, 0, 0);
+    return X86EMUL_EXCEPTION;
+}
+
+static void svm_do_msr_access(struct cpu_user_regs *regs)
+{
+    int rc, inst_len;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    if ( vmcb->exitinfo1 == 0 )
+    {
+        rc = svm_msr_read_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
     }
     else
     {
-        msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
-
-        hvmtrace_msr_write(v, ecx, msr_content);
-
-        switch (ecx)
-        {
-        case MSR_IA32_TSC:
-            hvm_set_guest_time(v, msr_content);
-            pt_reset(v);
-            break;
-
-        case MSR_IA32_APICBASE:
-            vlapic_msr_set(vcpu_vlapic(v), msr_content);
-            break;
-
-        case MSR_K8_VM_HSAVE_PA:
-            svm_inject_exception(TRAP_gp_fault, 0, 0);
-            break;
-
-        case MSR_IA32_DEBUGCTLMSR:
-            vmcb->debugctlmsr = msr_content;
-            if ( !msr_content || !cpu_has_svm_lbrv )
-                break;
-            vmcb->lbr_control.fields.enable = 1;
-            svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
-            svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
-            break;
-
-        case MSR_IA32_LASTBRANCHFROMIP:
-            vmcb->lastbranchfromip = msr_content;
-            break;
-
-        case MSR_IA32_LASTBRANCHTOIP:
-            vmcb->lastbranchtoip = msr_content;
-            break;
-
-        case MSR_IA32_LASTINTFROMIP:
-            vmcb->lastintfromip = msr_content;
-            break;
-
-        case MSR_IA32_LASTINTTOIP:
-            vmcb->lastinttoip = msr_content;
-            break;
-
-        default:
-            switch ( long_mode_do_msr_write(regs) )
-            {
-            case HNDL_unhandled:
-                wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
-                break;
-            case HNDL_exception_raised:
-                return;
-            case HNDL_done:
-                break;
-            }
-            break;
-        }
-
+        rc = svm_msr_write_intercept(regs);
         inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
     }
 
-    __update_guest_eip(regs, inst_len);
+    if ( rc == X86EMUL_OKAY )
+        __update_guest_eip(regs, inst_len);
 }
 
 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
@@ -1830,21 +1876,26 @@ static void svm_vmexit_do_hlt(struct vmc
     hvm_hlt(regs->eflags);
 }
 
+static void wbinvd_ipi(void *info)
+{
+    wbinvd();
+}
+
+static void svm_wbinvd_intercept(void)
+{
+    if ( !list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) )
+        on_each_cpu(wbinvd_ipi, NULL, 1, 1);
+}
+
 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
 {
     enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
-    struct vcpu *curr = current;
-    struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
     int inst_len;
 
-    if ( !list_empty(&(domain_hvm_iommu(curr->domain)->pdev_list)) )
-    {
-        vmcb->general2_intercepts &= ~GENERAL2_INTERCEPT_WBINVD;
-        wbinvd();
-    }
+    svm_wbinvd_intercept();
 
     inst_len = __get_instruction_length_from_list(
-        curr, list, ARRAY_SIZE(list), NULL, NULL);
+        current, list, ARRAY_SIZE(list), NULL, NULL);
     __update_guest_eip(regs, inst_len);
 }
 
@@ -1982,7 +2033,7 @@ asmlinkage void svm_vmexit_handler(struc
         break;
 
     case VMEXIT_EXCEPTION_NM:
-        svm_do_no_device_fault(vmcb);
+        svm_fpu_dirty_intercept();
         break;  
 
     case VMEXIT_EXCEPTION_PF: {
@@ -2036,7 +2087,7 @@ asmlinkage void svm_vmexit_handler(struc
     }
 
     case VMEXIT_CPUID:
-        svm_vmexit_do_cpuid(vmcb, regs);
+        svm_vmexit_do_cpuid(regs);
         break;
 
     case VMEXIT_HLT:
@@ -2083,7 +2134,7 @@ asmlinkage void svm_vmexit_handler(struc
         break;
 
     case VMEXIT_MSR:
-        svm_do_msr_access(v, regs);
+        svm_do_msr_access(regs);
         break;
 
     case VMEXIT_SHUTDOWN:
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/vmx/realmode.c
--- a/xen/arch/x86/hvm/vmx/realmode.c   Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/vmx/realmode.c   Wed Feb 20 14:36:45 2008 +0000
@@ -3,7 +3,7 @@
  * 
  * Real-mode emulation for VMX.
  * 
- * Copyright (c) 2007 Citrix Systems, Inc.
+ * Copyright (c) 2007-2008 Citrix Systems, Inc.
  * 
  * Authors:
  *    Keir Fraser <keir.fraser@xxxxxxxxxx>
@@ -15,33 +15,14 @@
 #include <xen/sched.h>
 #include <xen/paging.h>
 #include <asm/event.h>
+#include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vmcs.h>
-#include <asm/x86_emulate.h>
 
 struct realmode_emulate_ctxt {
-    struct x86_emulate_ctxt ctxt;
-
-    /* Cache of 16 bytes of instruction. */
-    uint8_t insn_buf[16];
-    unsigned long insn_buf_eip;
-
-    struct segment_register seg_reg[10];
-
-    union {
-        struct {
-            unsigned int hlt:1;
-            unsigned int mov_ss:1;
-            unsigned int sti:1;
-        } flags;
-        unsigned int flag_word;
-    };
-
-    uint8_t exn_vector;
-    uint8_t exn_insn_len;
-
+    struct hvm_emulate_ctxt hvm;
     uint32_t intr_shadow;
 };
 
@@ -50,12 +31,15 @@ static void realmode_deliver_exception(
     unsigned int insn_len,
     struct realmode_emulate_ctxt *rm_ctxt)
 {
-    struct segment_register *idtr = &rm_ctxt->seg_reg[x86_seg_idtr];
-    struct segment_register *csr = &rm_ctxt->seg_reg[x86_seg_cs];
-    struct cpu_user_regs *regs = rm_ctxt->ctxt.regs;
+    struct segment_register *idtr, *csr;
+    struct cpu_user_regs *regs = rm_ctxt->hvm.ctxt.regs;
     uint32_t cs_eip, pstk;
     uint16_t frame[3];
     unsigned int last_byte;
+
+    idtr = hvmemul_get_seg_reg(x86_seg_idtr, &rm_ctxt->hvm);
+    csr  = hvmemul_get_seg_reg(x86_seg_cs,   &rm_ctxt->hvm);
+    __set_bit(x86_seg_cs, &rm_ctxt->hvm.seg_reg_dirty);
 
  again:
     last_byte = (vector * 4) + 3;
@@ -90,7 +74,7 @@ static void realmode_deliver_exception(
     frame[1] = csr->sel;
     frame[2] = regs->eflags & ~X86_EFLAGS_RF;
 
-    if ( rm_ctxt->ctxt.addr_size == 32 )
+    if ( rm_ctxt->hvm.ctxt.addr_size == 32 )
     {
         regs->esp -= 6;
         pstk = regs->esp;
@@ -102,7 +86,7 @@ static void realmode_deliver_exception(
         regs->esp |= pstk;
     }
 
-    pstk += rm_ctxt->seg_reg[x86_seg_ss].base;
+    pstk += hvmemul_get_seg_reg(x86_seg_ss, &rm_ctxt->hvm)->base;
     (void)hvm_copy_to_guest_phys(pstk, frame, sizeof(frame));
 
     csr->sel  = cs_eip >> 16;
@@ -118,597 +102,34 @@ static void realmode_deliver_exception(
     }
 }
 
-static uint32_t virtual_to_linear(
-    enum x86_segment seg,
-    uint32_t offset,
-    struct realmode_emulate_ctxt *rm_ctxt)
-{
-    uint32_t addr = offset;
-    if ( seg == x86_seg_none )
-        return addr;
-    ASSERT(is_x86_user_segment(seg));
-    return addr + rm_ctxt->seg_reg[seg].base;
-}
-
-static int
-realmode_read(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long *val,
-    unsigned int bytes,
-    enum hvm_access_type access_type,
-    struct realmode_emulate_ctxt *rm_ctxt)
-{
-    uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt);
-
-    *val = 0;
-
-    if ( hvm_copy_from_guest_virt_nofault(val, addr, bytes) )
-    {
-        struct vcpu *curr = current;
-
-        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-            return X86EMUL_UNHANDLEABLE;
-
-        if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-            return X86EMUL_UNHANDLEABLE;
-
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        {
-            curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-            send_mmio_req(IOREQ_TYPE_COPY, addr, 1, bytes,
-                          0, IOREQ_READ, 0, 0);
-        }
-
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-            return X86EMUL_RETRY;
-
-        *val = curr->arch.hvm_vmx.real_mode_io_data;
-        curr->arch.hvm_vmx.real_mode_io_completed = 0;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_emulate_read(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long *val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    return realmode_read(
-        seg, offset, val, bytes, hvm_access_read,
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt));
-}
-
-static int
-realmode_emulate_insn_fetch(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long *val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    unsigned int insn_off = offset - rm_ctxt->insn_buf_eip;
-
-    /* Fall back if requested bytes are not in the prefetch cache. */
-    if ( unlikely((insn_off + bytes) > sizeof(rm_ctxt->insn_buf)) )
-        return realmode_read(
-            seg, offset, val, bytes,
-            hvm_access_insn_fetch, rm_ctxt);
-
-    /* Hit the cache. Simple memcpy. */
-    *val = 0;
-    memcpy(val, &rm_ctxt->insn_buf[insn_off], bytes);
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_emulate_write(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt);
-
-    if ( hvm_copy_to_guest_virt_nofault(addr, &val, bytes) )
-    {
-        struct vcpu *curr = current;
-
-        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-            return X86EMUL_UNHANDLEABLE;
-
-        if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-            return X86EMUL_UNHANDLEABLE;
-
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_mmio_req(IOREQ_TYPE_COPY, addr, 1, bytes,
-                      val, IOREQ_WRITE, 0, 0);
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int 
-realmode_emulate_cmpxchg(
-    enum x86_segment seg,
-    unsigned long offset,
-    unsigned long old,
-    unsigned long new,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return realmode_emulate_write(seg, offset, new, bytes, ctxt);
-}
-
-static int 
-realmode_rep_ins(
-    uint16_t src_port,
-    enum x86_segment dst_seg,
-    unsigned long dst_offset,
-    unsigned int bytes_per_rep,
-    unsigned long *reps,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
+static void realmode_emulate_one(struct realmode_emulate_ctxt *rm_ctxt)
+{
+    struct cpu_user_regs *regs = rm_ctxt->hvm.ctxt.regs;
     struct vcpu *curr = current;
-    uint32_t paddr = virtual_to_linear(dst_seg, dst_offset, rm_ctxt);
-
-    if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-    {
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_pio_req(src_port, *reps, bytes_per_rep,
-                     paddr, IOREQ_READ,
-                     !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-    }
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        return X86EMUL_RETRY;
-
-    curr->arch.hvm_vmx.real_mode_io_completed = 0;
-
-    return X86EMUL_OKAY;
-}
-
-static int 
-realmode_rep_outs(
-    enum x86_segment src_seg,
-    unsigned long src_offset,
-    uint16_t dst_port,
-    unsigned int bytes_per_rep,
-    unsigned long *reps,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    struct vcpu *curr = current;
-    uint32_t paddr = virtual_to_linear(src_seg, src_offset, rm_ctxt);
-
-    if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-    send_pio_req(dst_port, *reps, bytes_per_rep,
-                 paddr, IOREQ_WRITE,
-                 !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-
-    return X86EMUL_OKAY;
-}
-
-static int 
-realmode_rep_movs(
-   enum x86_segment src_seg,
-   unsigned long src_offset,
-   enum x86_segment dst_seg,
-   unsigned long dst_offset,
-   unsigned int bytes_per_rep,
-   unsigned long *reps,
-   struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    struct vcpu *curr = current;
-    uint32_t saddr = virtual_to_linear(src_seg, src_offset, rm_ctxt);
-    uint32_t daddr = virtual_to_linear(dst_seg, dst_offset, rm_ctxt);
-    p2m_type_t p2mt;
-
-    if ( (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
-         curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    mfn_x(gfn_to_mfn_current(saddr >> PAGE_SHIFT, &p2mt));
-    if ( !p2m_is_ram(p2mt) )
-    {
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        {
-            curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-            send_mmio_req(IOREQ_TYPE_COPY, saddr, *reps, bytes_per_rep,
-                      daddr, IOREQ_READ,
-                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-        }
-
-        if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-            return X86EMUL_RETRY;
-
-        curr->arch.hvm_vmx.real_mode_io_completed = 0;
-    }
-    else
-    {
-        mfn_x(gfn_to_mfn_current(daddr >> PAGE_SHIFT, &p2mt));
-        if ( p2m_is_ram(p2mt) )
-            return X86EMUL_UNHANDLEABLE;
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_mmio_req(IOREQ_TYPE_COPY, daddr, *reps, bytes_per_rep,
-                      saddr, IOREQ_WRITE,
-                      !!(ctxt->regs->eflags & X86_EFLAGS_DF), 1);
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_segment(
-    enum x86_segment seg,
-    struct segment_register *reg,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    memcpy(reg, &rm_ctxt->seg_reg[seg], sizeof(struct segment_register));
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_write_segment(
-    enum x86_segment seg,
-    struct segment_register *reg,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    struct vcpu *curr = current;
-
-    if ( seg == x86_seg_cs )
-    {
-        if ( reg->attr.fields.dpl != 0 )
-            return X86EMUL_UNHANDLEABLE;
+    unsigned long seg_reg_dirty;
+    uint32_t new_intr_shadow, intr_info;
+    int rc;
+
+    seg_reg_dirty = rm_ctxt->hvm.seg_reg_dirty;
+    rm_ctxt->hvm.seg_reg_dirty = 0;
+
+    rc = hvm_emulate_one(&rm_ctxt->hvm);
+
+    if ( test_bit(x86_seg_cs, &rm_ctxt->hvm.seg_reg_dirty) )
+    {
         curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_CS;
-        if ( reg->sel & 3 )
+        if ( hvmemul_get_seg_reg(x86_seg_cs, &rm_ctxt->hvm)->sel & 3 )
             curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_CS;
     }
 
-    if ( seg == x86_seg_ss )
-    {
-        if ( reg->attr.fields.dpl != 0 )
-            return X86EMUL_UNHANDLEABLE;
+    if ( test_bit(x86_seg_ss, &rm_ctxt->hvm.seg_reg_dirty) )
+    {
         curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_SS;
-        if ( reg->sel & 3 )
+        if ( hvmemul_get_seg_reg(x86_seg_ss, &rm_ctxt->hvm)->sel & 3 )
             curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_SS;
-        rm_ctxt->flags.mov_ss = 1;
-    }
-
-    memcpy(&rm_ctxt->seg_reg[seg], reg, sizeof(struct segment_register));
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_io(
-    unsigned int port,
-    unsigned int bytes,
-    unsigned long *val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *curr = current;
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-    {
-        curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-        send_pio_req(port, 1, bytes, 0, IOREQ_READ, 0, 0);
-    }
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_completed )
-        return X86EMUL_RETRY;
-
-    *val = curr->arch.hvm_vmx.real_mode_io_data;
-    curr->arch.hvm_vmx.real_mode_io_completed = 0;
-
-    return X86EMUL_OKAY;
-}
-
-static int realmode_write_io(
-    unsigned int port,
-    unsigned int bytes,
-    unsigned long val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *curr = current;
-
-    if ( port == 0xe9 )
-    {
-        hvm_print_line(curr, val);
-        return X86EMUL_OKAY;
-    }
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return X86EMUL_UNHANDLEABLE;
-
-    curr->arch.hvm_vmx.real_mode_io_in_progress = 1;
-    send_pio_req(port, 1, bytes, val, IOREQ_WRITE, 0, 0);
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_cr(
-    unsigned int reg,
-    unsigned long *val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    switch ( reg )
-    {
-    case 0:
-    case 2:
-    case 3:
-    case 4:
-        *val = current->arch.hvm_vcpu.guest_cr[reg];
-        break;
-    default:
-        return X86EMUL_UNHANDLEABLE;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_write_cr(
-    unsigned int reg,
-    unsigned long val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    switch ( reg )
-    {
-    case 0:
-        if ( !hvm_set_cr0(val) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    case 2:
-        current->arch.hvm_vcpu.guest_cr[2] = val;
-        break;
-    case 3:
-        if ( !hvm_set_cr3(val) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    case 4:
-        if ( !hvm_set_cr4(val) )
-            return X86EMUL_UNHANDLEABLE;
-        break;
-    default:
-        return X86EMUL_UNHANDLEABLE;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_read_msr(
-    unsigned long reg,
-    uint64_t *val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct cpu_user_regs _regs;
-
-    _regs.ecx = (uint32_t)reg;
-
-    if ( !vmx_msr_read_intercept(&_regs) )
-    {
-        struct realmode_emulate_ctxt *rm_ctxt =
-            container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-        rm_ctxt->exn_vector = (uint8_t)__vmread(VM_ENTRY_INTR_INFO);
-        rm_ctxt->exn_insn_len = 0;
-        __vmwrite(VM_ENTRY_INTR_INFO, 0);
-        return X86EMUL_EXCEPTION;
-    }
-
-    *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
-    return X86EMUL_OKAY;
-}
-
-static int
-realmode_write_msr(
-    unsigned long reg,
-    uint64_t val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct cpu_user_regs _regs;
-
-    _regs.edx = (uint32_t)(val >> 32);
-    _regs.eax = (uint32_t)val;
-    _regs.ecx = (uint32_t)reg;
-
-    if ( !vmx_msr_write_intercept(&_regs) )
-    {
-        struct realmode_emulate_ctxt *rm_ctxt =
-            container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-        rm_ctxt->exn_vector = (uint8_t)__vmread(VM_ENTRY_INTR_INFO);
-        rm_ctxt->exn_insn_len = 0;
-        __vmwrite(VM_ENTRY_INTR_INFO, 0);
-        return X86EMUL_EXCEPTION;
-    }
-
-    return X86EMUL_OKAY;
-}
-
-static int realmode_write_rflags(
-    unsigned long val,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    if ( (val & X86_EFLAGS_IF) && !(ctxt->regs->eflags & X86_EFLAGS_IF) )
-        rm_ctxt->flags.sti = 1;
-    return X86EMUL_OKAY;
-}
-
-static int realmode_wbinvd(
-    struct x86_emulate_ctxt *ctxt)
-{
-    vmx_wbinvd_intercept();
-    return X86EMUL_OKAY;
-}
-
-static int realmode_cpuid(
-    unsigned int *eax,
-    unsigned int *ebx,
-    unsigned int *ecx,
-    unsigned int *edx,
-    struct x86_emulate_ctxt *ctxt)
-{
-    vmx_cpuid_intercept(eax, ebx, ecx, edx);
-    return X86EMUL_OKAY;
-}
-
-static int realmode_hlt(
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-    rm_ctxt->flags.hlt = 1;
-    return X86EMUL_OKAY;
-}
-
-static int realmode_inject_hw_exception(
-    uint8_t vector,
-    uint16_t error_code,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-
-    /* We don't emulate protected-mode exception delivery. */
-    if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    if ( error_code != 0 )
-        return X86EMUL_UNHANDLEABLE;
-
-    rm_ctxt->exn_vector = vector;
-    rm_ctxt->exn_insn_len = 0;
-
-    return X86EMUL_OKAY;
-}
-
-static int realmode_inject_sw_interrupt(
-    uint8_t vector,
-    uint8_t insn_len,
-    struct x86_emulate_ctxt *ctxt)
-{
-    struct realmode_emulate_ctxt *rm_ctxt =
-        container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
-
-    /* We don't emulate protected-mode exception delivery. */
-    if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
-        return X86EMUL_UNHANDLEABLE;
-
-    rm_ctxt->exn_vector = vector;
-    rm_ctxt->exn_insn_len = insn_len;
-
-    return X86EMUL_OKAY;
-}
-
-static void realmode_load_fpu_ctxt(
-    struct x86_emulate_ctxt *ctxt)
-{
-    if ( !current->fpu_dirtied )
-        vmx_do_no_device_fault();
-}
-
-static struct x86_emulate_ops realmode_emulator_ops = {
-    .read          = realmode_emulate_read,
-    .insn_fetch    = realmode_emulate_insn_fetch,
-    .write         = realmode_emulate_write,
-    .cmpxchg       = realmode_emulate_cmpxchg,
-    .rep_ins       = realmode_rep_ins,
-    .rep_outs      = realmode_rep_outs,
-    .rep_movs      = realmode_rep_movs,
-    .read_segment  = realmode_read_segment,
-    .write_segment = realmode_write_segment,
-    .read_io       = realmode_read_io,
-    .write_io      = realmode_write_io,
-    .read_cr       = realmode_read_cr,
-    .write_cr      = realmode_write_cr,
-    .read_msr      = realmode_read_msr,
-    .write_msr     = realmode_write_msr,
-    .write_rflags  = realmode_write_rflags,
-    .wbinvd        = realmode_wbinvd,
-    .cpuid         = realmode_cpuid,
-    .hlt           = realmode_hlt,
-    .inject_hw_exception = realmode_inject_hw_exception,
-    .inject_sw_interrupt = realmode_inject_sw_interrupt,
-    .load_fpu_ctxt = realmode_load_fpu_ctxt
-};
-
-static void realmode_emulate_one(struct realmode_emulate_ctxt *rm_ctxt)
-{
-    struct cpu_user_regs *regs = rm_ctxt->ctxt.regs;
-    struct vcpu *curr = current;
-    u32 new_intr_shadow;
-    int rc, io_completed;
-    unsigned long addr;
-
-    rm_ctxt->ctxt.addr_size =
-        rm_ctxt->seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16;
-    rm_ctxt->ctxt.sp_size =
-        rm_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16;
-
-    rm_ctxt->insn_buf_eip = (uint32_t)regs->eip;
-    addr = virtual_to_linear(x86_seg_cs, regs->eip, rm_ctxt);
-    if ( hvm_fetch_from_guest_virt_nofault(rm_ctxt->insn_buf, addr,
-                                           sizeof(rm_ctxt->insn_buf))
-         != HVMCOPY_okay )
-    {
-        gdprintk(XENLOG_ERR, "Failed to pre-fetch instruction bytes.\n");
-        goto fail;
-    }
-
-    rm_ctxt->flag_word = 0;
-
-    io_completed = curr->arch.hvm_vmx.real_mode_io_completed;
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
-    {
-        gdprintk(XENLOG_ERR, "I/O in progress before insn is emulated.\n");
-        goto fail;
-    }
-
-    rc = x86_emulate(&rm_ctxt->ctxt, &realmode_emulator_ops);
-
-    if ( curr->arch.hvm_vmx.real_mode_io_completed )
-    {
-        gdprintk(XENLOG_ERR, "I/O completion after insn is emulated.\n");
-        goto fail;
-    }
+    }
+
+    rm_ctxt->hvm.seg_reg_dirty |= seg_reg_dirty;
 
     if ( rc == X86EMUL_UNHANDLEABLE )
     {
@@ -717,31 +138,18 @@ static void realmode_emulate_one(struct 
     }
 
     if ( rc == X86EMUL_RETRY )
-    {
-        BUG_ON(!curr->arch.hvm_vmx.real_mode_io_in_progress);
-        if ( !io_completed )
-            return;
-        gdprintk(XENLOG_ERR, "Multiple I/O reads in a single insn.\n");
-        goto fail;
-    }
-
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress &&
-         (get_ioreq(curr)->vp_ioreq.dir == IOREQ_READ) )
-    {
-        gdprintk(XENLOG_ERR, "I/O read in progress but insn is retired.\n");
-        goto fail;
-    }
+        return;
 
     new_intr_shadow = rm_ctxt->intr_shadow;
 
     /* MOV-SS instruction toggles MOV-SS shadow, else we just clear it. */
-    if ( rm_ctxt->flags.mov_ss )
+    if ( rm_ctxt->hvm.flags.mov_ss )
         new_intr_shadow ^= VMX_INTR_SHADOW_MOV_SS;
     else
         new_intr_shadow &= ~VMX_INTR_SHADOW_MOV_SS;
 
     /* STI instruction toggles STI shadow, else we just clear it. */
-    if ( rm_ctxt->flags.sti )
+    if ( rm_ctxt->hvm.flags.sti )
         new_intr_shadow ^= VMX_INTR_SHADOW_STI;
     else
         new_intr_shadow &= ~VMX_INTR_SHADOW_STI;
@@ -755,10 +163,30 @@ static void realmode_emulate_one(struct 
 
     if ( rc == X86EMUL_EXCEPTION )
     {
+        if ( !rm_ctxt->hvm.flags.exn_pending )
+        {
+            intr_info = __vmread(VM_ENTRY_INTR_INFO);
+            __vmwrite(VM_ENTRY_INTR_INFO, 0);
+            if ( !(intr_info & INTR_INFO_VALID_MASK) )
+            {
+                gdprintk(XENLOG_ERR, "Exception pending but no info.\n");
+                goto fail;
+            }
+            rm_ctxt->hvm.exn_vector = (uint8_t)intr_info;
+            rm_ctxt->hvm.exn_insn_len = 0;
+        }
+
+        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
+        {
+            gdprintk(XENLOG_ERR, "Exception %02x in protected mode.\n",
+                     rm_ctxt->hvm.exn_vector);
+            goto fail;
+        }
+
         realmode_deliver_exception(
-            rm_ctxt->exn_vector, rm_ctxt->exn_insn_len, rm_ctxt);
-    }
-    else if ( rm_ctxt->flags.hlt && !hvm_local_events_need_delivery(curr) )
+            rm_ctxt->hvm.exn_vector, rm_ctxt->hvm.exn_insn_len, rm_ctxt);
+    }
+    else if ( rm_ctxt->hvm.flags.hlt && !hvm_local_events_need_delivery(curr) )
     {
         hvm_hlt(regs->eflags);
     }
@@ -769,10 +197,11 @@ static void realmode_emulate_one(struct 
     gdprintk(XENLOG_ERR,
              "Real-mode emulation failed @ %04x:%08lx: "
              "%02x %02x %02x %02x %02x %02x\n",
-             rm_ctxt->seg_reg[x86_seg_cs].sel, rm_ctxt->insn_buf_eip,
-             rm_ctxt->insn_buf[0], rm_ctxt->insn_buf[1],
-             rm_ctxt->insn_buf[2], rm_ctxt->insn_buf[3],
-             rm_ctxt->insn_buf[4], rm_ctxt->insn_buf[5]);
+             hvmemul_get_seg_reg(x86_seg_cs, &rm_ctxt->hvm)->sel,
+             rm_ctxt->hvm.insn_buf_eip,
+             rm_ctxt->hvm.insn_buf[0], rm_ctxt->hvm.insn_buf[1],
+             rm_ctxt->hvm.insn_buf[2], rm_ctxt->hvm.insn_buf[3],
+             rm_ctxt->hvm.insn_buf[4], rm_ctxt->hvm.insn_buf[5]);
     domain_crash_synchronous();
 }
 
@@ -780,18 +209,20 @@ void vmx_realmode(struct cpu_user_regs *
 {
     struct vcpu *curr = current;
     struct realmode_emulate_ctxt rm_ctxt;
-    unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
-    unsigned int i, emulations = 0;
-
-    rm_ctxt.ctxt.regs = regs;
-
-    for ( i = 0; i < 10; i++ )
-        hvm_get_segment_register(curr, i, &rm_ctxt.seg_reg[i]);
-
+    struct segment_register *sreg;
+    unsigned long intr_info;
+    unsigned int emulations = 0;
+
+    /* Get-and-clear VM_ENTRY_INTR_INFO. */
+    intr_info = __vmread(VM_ENTRY_INTR_INFO);
+    if ( intr_info & INTR_INFO_VALID_MASK )
+        __vmwrite(VM_ENTRY_INTR_INFO, 0);
+
+    hvm_emulate_prepare(&rm_ctxt.hvm, regs);
     rm_ctxt.intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
 
-    if ( curr->arch.hvm_vmx.real_mode_io_in_progress ||
-         curr->arch.hvm_vmx.real_mode_io_completed )
+    if ( curr->arch.hvm_vcpu.io_in_progress ||
+         curr->arch.hvm_vcpu.io_completed )
         realmode_emulate_one(&rm_ctxt);
 
     /* Only deliver interrupts into emulated real mode. */
@@ -799,12 +230,12 @@ void vmx_realmode(struct cpu_user_regs *
          (intr_info & INTR_INFO_VALID_MASK) )
     {
         realmode_deliver_exception((uint8_t)intr_info, 0, &rm_ctxt);
-        __vmwrite(VM_ENTRY_INTR_INFO, 0);
+        intr_info = 0;
     }
 
     while ( curr->arch.hvm_vmx.vmxemul &&
             !softirq_pending(smp_processor_id()) &&
-            !curr->arch.hvm_vmx.real_mode_io_in_progress )
+            !curr->arch.hvm_vcpu.io_in_progress )
     {
         /*
          * Check for pending interrupts only every 16 instructions, because
@@ -825,34 +256,22 @@ void vmx_realmode(struct cpu_user_regs *
          * At this point CS.RPL == SS.RPL == CS.DPL == SS.DPL == 0. For
          * DS, ES, FS and GS the most uninvasive trick is to set DPL == RPL.
          */
-        rm_ctxt.seg_reg[x86_seg_ds].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_ds].sel & 3;
-        rm_ctxt.seg_reg[x86_seg_es].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_es].sel & 3;
-        rm_ctxt.seg_reg[x86_seg_fs].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_fs].sel & 3;
-        rm_ctxt.seg_reg[x86_seg_gs].attr.fields.dpl =
-            rm_ctxt.seg_reg[x86_seg_gs].sel & 3;
-    }
-
-    for ( i = 0; i < 10; i++ )
-        hvm_set_segment_register(curr, i, &rm_ctxt.seg_reg[i]);
-}
-
-int vmx_realmode_io_complete(void)
-{
-    struct vcpu *curr = current;
-    ioreq_t *p = &get_ioreq(curr)->vp_ioreq;
-
-    if ( !curr->arch.hvm_vmx.real_mode_io_in_progress )
-        return 0;
-
-    curr->arch.hvm_vmx.real_mode_io_in_progress = 0;
-    if ( p->dir == IOREQ_READ )
-    {
-        curr->arch.hvm_vmx.real_mode_io_completed = 1;
-        curr->arch.hvm_vmx.real_mode_io_data = p->data;
-    }
-
-    return 1;
-}
+        sreg = hvmemul_get_seg_reg(x86_seg_ds, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        sreg = hvmemul_get_seg_reg(x86_seg_es, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        sreg = hvmemul_get_seg_reg(x86_seg_fs, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        sreg = hvmemul_get_seg_reg(x86_seg_gs, &rm_ctxt.hvm);
+        sreg->attr.fields.dpl = sreg->sel & 3;
+        rm_ctxt.hvm.seg_reg_dirty |=
+            (1ul << x86_seg_ds) | (1ul << x86_seg_es) |
+            (1ul << x86_seg_fs) | (1ul << x86_seg_gs);
+    }
+
+    hvm_emulate_writeback(&rm_ctxt.hvm);
+
+    /* Re-instate VM_ENTRY_INTR_INFO if we did not discharge it. */
+    if ( intr_info & INTR_INFO_VALID_MASK )
+        __vmwrite(VM_ENTRY_INTR_INFO, intr_info);
+}
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c        Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Wed Feb 20 14:36:45 2008 +0000
@@ -60,6 +60,13 @@ static void vmx_install_vlapic_mapping(s
 static void vmx_install_vlapic_mapping(struct vcpu *v);
 static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
 static void vmx_update_guest_efer(struct vcpu *v);
+static void vmx_cpuid_intercept(
+    unsigned int *eax, unsigned int *ebx,
+    unsigned int *ecx, unsigned int *edx);
+static void vmx_wbinvd_intercept(void);
+static void vmx_fpu_dirty_intercept(void);
+static int vmx_msr_read_intercept(struct cpu_user_regs *regs);
+static int vmx_msr_write_intercept(struct cpu_user_regs *regs);
 
 static int vmx_domain_initialise(struct domain *d)
 {
@@ -96,7 +103,6 @@ static int vmx_vcpu_initialise(struct vc
     /* %eax == 1 signals full real-mode support to the guest loader. */
     if ( v->vcpu_id == 0 )
         v->arch.guest_context.user_regs.eax = 1;
-    v->arch.hvm_vcpu.io_complete = vmx_realmode_io_complete;
 
     return 0;
 }
@@ -204,7 +210,7 @@ static enum handler_return long_mode_do_
     switch ( ecx )
     {
     case MSR_EFER:
-        if ( !hvm_set_efer(msr_content) )
+        if ( hvm_set_efer(msr_content) )
             goto exception_raised;
         break;
 
@@ -375,7 +381,7 @@ static enum handler_return long_mode_do_
     switch ( regs->ecx )
     {
     case MSR_EFER:
-        if ( !hvm_set_efer(msr_content) )
+        if ( hvm_set_efer(msr_content) )
             return HNDL_exception_raised;
         break;
 
@@ -1076,6 +1082,11 @@ static struct hvm_function_table vmx_fun
     .do_pmu_interrupt     = vmx_do_pmu_interrupt,
     .cpu_up               = vmx_cpu_up,
     .cpu_down             = vmx_cpu_down,
+    .cpuid_intercept      = vmx_cpuid_intercept,
+    .wbinvd_intercept     = vmx_wbinvd_intercept,
+    .fpu_dirty_intercept  = vmx_fpu_dirty_intercept,
+    .msr_read_intercept   = vmx_msr_read_intercept,
+    .msr_write_intercept  = vmx_msr_write_intercept
 };
 
 void start_vmx(void)
@@ -1147,7 +1158,7 @@ static void __update_guest_eip(unsigned 
         vmx_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
 }
 
-void vmx_do_no_device_fault(void)
+static void vmx_fpu_dirty_intercept(void)
 {
     struct vcpu *curr = current;
 
@@ -1162,7 +1173,7 @@ void vmx_do_no_device_fault(void)
 }
 
 #define bitmaskof(idx)  (1U << ((idx) & 31))
-void vmx_cpuid_intercept(
+static void vmx_cpuid_intercept(
     unsigned int *eax, unsigned int *ebx,
     unsigned int *ecx, unsigned int *edx)
 {
@@ -1751,13 +1762,13 @@ static int mov_to_cr(int gp, int cr, str
     switch ( cr )
     {
     case 0:
-        return hvm_set_cr0(value);
+        return !hvm_set_cr0(value);
 
     case 3:
-        return hvm_set_cr3(value);
+        return !hvm_set_cr3(value);
 
     case 4:
-        return hvm_set_cr4(value);
+        return !hvm_set_cr4(value);
 
     case 8:
         vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
@@ -1848,7 +1859,7 @@ static int vmx_cr_access(unsigned long e
         value = (value & ~0xF) |
             (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
         HVMTRACE_1D(LMSW, current, value);
-        return hvm_set_cr0(value);
+        return !hvm_set_cr0(value);
     default:
         BUG();
     }
@@ -1932,7 +1943,7 @@ static int is_last_branch_msr(u32 ecx)
     return 0;
 }
 
-int vmx_msr_read_intercept(struct cpu_user_regs *regs)
+static int vmx_msr_read_intercept(struct cpu_user_regs *regs)
 {
     u64 msr_content = 0;
     u32 ecx = regs->ecx, eax, edx;
@@ -2017,7 +2028,7 @@ int vmx_msr_read_intercept(struct cpu_us
             case HNDL_unhandled:
                 break;
             case HNDL_exception_raised:
-                return 0;
+                return X86EMUL_EXCEPTION;
             case HNDL_done:
                 goto done;
         }
@@ -2050,11 +2061,11 @@ done:
     HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
                 ecx, (unsigned long)regs->eax,
                 (unsigned long)regs->edx);
-    return 1;
+    return X86EMUL_OKAY;
 
 gp_fault:
     vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
-    return 0;
+    return X86EMUL_EXCEPTION;
 }
 
 static int vmx_alloc_vlapic_mapping(struct domain *d)
@@ -2124,7 +2135,7 @@ extern bool_t mtrr_def_type_msr_set(stru
 extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
 extern bool_t pat_msr_set(u64 *pat, u64 msr);
 
-int vmx_msr_write_intercept(struct cpu_user_regs *regs)
+static int vmx_msr_write_intercept(struct cpu_user_regs *regs)
 {
     u32 ecx = regs->ecx;
     u64 msr_content;
@@ -2219,7 +2230,7 @@ int vmx_msr_write_intercept(struct cpu_u
         goto gp_fault;
     default:
         if ( vpmu_do_wrmsr(regs) )
-            return 1;
+            return X86EMUL_OKAY;
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
@@ -2228,18 +2239,18 @@ int vmx_msr_write_intercept(struct cpu_u
                     wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
                 break;
             case HNDL_exception_raised:
-                return 0;
+                return X86EMUL_EXCEPTION;
             case HNDL_done:
                 break;
         }
         break;
     }
 
-    return 1;
+    return X86EMUL_OKAY;
 
 gp_fault:
     vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
-    return 0;
+    return X86EMUL_EXCEPTION;
 }
 
 static void vmx_do_hlt(struct cpu_user_regs *regs)
@@ -2320,7 +2331,7 @@ static void wbinvd_ipi(void *info)
     wbinvd();
 }
 
-void vmx_wbinvd_intercept(void)
+static void vmx_wbinvd_intercept(void)
 {
     if ( list_empty(&(domain_hvm_iommu(current->domain)->pdev_list)) )
         return;
@@ -2447,7 +2458,7 @@ asmlinkage void vmx_vmexit_handler(struc
             domain_pause_for_debugger();
             break;
         case TRAP_no_device:
-            vmx_do_no_device_fault();
+            vmx_fpu_dirty_intercept();
             break;
         case TRAP_page_fault:
             exit_qualification = __vmread(EXIT_QUALIFICATION);
@@ -2566,12 +2577,12 @@ asmlinkage void vmx_vmexit_handler(struc
         break;
     case EXIT_REASON_MSR_READ:
         inst_len = __get_instruction_length(); /* Safe: RDMSR */
-        if ( vmx_msr_read_intercept(regs) )
+        if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
     case EXIT_REASON_MSR_WRITE:
         inst_len = __get_instruction_length(); /* Safe: WRMSR */
-        if ( vmx_msr_write_intercept(regs) )
+        if ( vmx_msr_write_intercept(regs) == X86EMUL_OKAY )
             __update_guest_eip(inst_len);
         break;
 
@@ -2597,7 +2608,8 @@ asmlinkage void vmx_vmexit_handler(struc
         unsigned long offset;
         exit_qualification = __vmread(EXIT_QUALIFICATION);
         offset = exit_qualification & 0x0fffUL;
-        handle_mmio(APIC_DEFAULT_PHYS_BASE | offset);
+        if ( !handle_mmio() )
+            hvm_inject_exception(TRAP_gp_fault, 0, 0);
         break;
     }
 
diff -r f853c0497095 -r 3f1cf03826fe xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/arch/x86/mm/shadow/multi.c    Wed Feb 20 14:36:45 2008 +0000
@@ -2816,8 +2816,7 @@ static int sh_page_fault(struct vcpu *v,
             perfc_incr(shadow_fault_fast_mmio);
             SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
             reset_early_unshadow(v);
-            handle_mmio(gpa);
-            return EXCRET_fault_fixed;
+            return handle_mmio() ? EXCRET_fault_fixed : 0;
         }
         else
         {
@@ -3117,8 +3116,7 @@ static int sh_page_fault(struct vcpu *v,
     shadow_audit_tables(v);
     reset_early_unshadow(v);
     shadow_unlock(d);
-    handle_mmio(gpa);
-    return EXCRET_fault_fixed;
+    return handle_mmio() ? EXCRET_fault_fixed : 0;
 
  not_a_shadow_fault:
     sh_audit_gw(v, &gw);
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/emulate.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/emulate.h Wed Feb 20 14:36:45 2008 +0000
@@ -0,0 +1,55 @@
+/******************************************************************************
+ * hvm/emulate.h
+ * 
+ * HVM instruction emulation. Used for MMIO and VMX real mode.
+ * 
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@xxxxxxxxxx>
+ */
+
+#ifndef __ASM_X86_HVM_EMULATE_H__
+#define __ASM_X86_HVM_EMULATE_H__
+
+#include <xen/config.h>
+#include <asm/x86_emulate.h>
+
+struct hvm_emulate_ctxt {
+    struct x86_emulate_ctxt ctxt;
+
+    /* Cache of 16 bytes of instruction. */
+    uint8_t insn_buf[16];
+    unsigned long insn_buf_eip;
+    unsigned int insn_buf_bytes;
+
+    struct segment_register seg_reg[10];
+    unsigned long seg_reg_accessed;
+    unsigned long seg_reg_dirty;
+
+    union {
+        struct {
+            unsigned int hlt:1;
+            unsigned int mov_ss:1;
+            unsigned int sti:1;
+            unsigned int exn_pending:1;
+        } flags;
+        unsigned int flag_word;
+    };
+
+    uint8_t exn_vector;
+    uint8_t exn_insn_len;
+};
+
+int hvm_emulate_one(
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+void hvm_emulate_prepare(
+    struct hvm_emulate_ctxt *hvmemul_ctxt,
+    struct cpu_user_regs *regs);
+void hvm_emulate_writeback(
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+struct segment_register *hvmemul_get_seg_reg(
+    enum x86_segment seg,
+    struct hvm_emulate_ctxt *hvmemul_ctxt);
+
+#endif /* __ASM_X86_HVM_EMULATE_H__ */
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/hvm.h     Wed Feb 20 14:36:45 2008 +0000
@@ -117,6 +117,15 @@ struct hvm_function_table {
 
     int  (*cpu_up)(void);
     void (*cpu_down)(void);
+
+    /* Instruction intercepts: non-void return values are X86EMUL codes. */
+    void (*cpuid_intercept)(
+        unsigned int *eax, unsigned int *ebx,
+        unsigned int *ecx, unsigned int *edx);
+    void (*wbinvd_intercept)(void);
+    void (*fpu_dirty_intercept)(void);
+    int (*msr_read_intercept)(struct cpu_user_regs *regs);
+    int (*msr_write_intercept)(struct cpu_user_regs *regs);
 };
 
 extern struct hvm_function_table hvm_funcs;
@@ -162,9 +171,6 @@ hvm_guest_x86_mode(struct vcpu *v)
     ASSERT(v == current);
     return hvm_funcs.guest_x86_mode(v);
 }
-
-int hvm_instruction_fetch(unsigned long pc, int address_bytes,
-                          unsigned char *buf);
 
 static inline void
 hvm_update_host_cr3(struct vcpu *v)
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/io.h
--- a/xen/include/asm-x86/hvm/io.h      Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/io.h      Wed Feb 20 14:36:45 2008 +0000
@@ -120,8 +120,8 @@ struct hvm_mmio_handler {
 };
 
 /* global io interception point in HV */
-extern int hvm_io_intercept(ioreq_t *p, int type);
-extern int register_io_handler(
+int hvm_io_intercept(ioreq_t *p, int type);
+int register_io_handler(
     struct domain *d, unsigned long addr, unsigned long size,
     void *action, int type);
 
@@ -135,8 +135,8 @@ static inline int hvm_buffered_io_interc
     return hvm_io_intercept(p, HVM_BUFFERED_IO);
 }
 
-extern int hvm_mmio_intercept(ioreq_t *p);
-extern int hvm_buffered_io_send(ioreq_t *p);
+int hvm_mmio_intercept(ioreq_t *p);
+int hvm_buffered_io_send(ioreq_t *p);
 
 static inline int register_portio_handler(
     struct domain *d, unsigned long addr,
@@ -159,11 +159,11 @@ void send_pio_req(unsigned long port, un
                   paddr_t value, int dir, int df, int value_is_ptr);
 void send_timeoffset_req(unsigned long timeoff);
 void send_invalidate_req(void);
-extern void handle_mmio(paddr_t gpa);
-extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
-extern void hvm_io_assist(void);
-extern void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
-                         union vioapic_redir_entry *ent);
+int handle_mmio(void);
+void hvm_interrupt_post(struct vcpu *v, int vector, int type);
+void hvm_io_assist(void);
+void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
+                  union vioapic_redir_entry *ent);
 
 struct hvm_hw_stdvga {
     uint8_t sr_index;
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/support.h Wed Feb 20 14:36:45 2008 +0000
@@ -138,6 +138,7 @@ void hvm_hlt(unsigned long rflags);
 void hvm_hlt(unsigned long rflags);
 void hvm_triple_fault(void);
 
+/* These functions all return X86EMUL return codes. */
 int hvm_set_efer(uint64_t value);
 int hvm_set_cr0(unsigned long value);
 int hvm_set_cr3(unsigned long value);
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/vcpu.h    Wed Feb 20 14:36:45 2008 +0000
@@ -59,9 +59,6 @@ struct hvm_vcpu {
     bool_t              flag_dr_dirty;
     bool_t              debug_state_latch;
 
-    /* Callback function for I/O completion. */
-    int                 (*io_complete)(void);
-
     union {
         struct arch_vmx_struct vmx;
         struct arch_svm_struct svm;
@@ -72,6 +69,12 @@ struct hvm_vcpu {
 
     /* Which cache mode is this VCPU in (CR0:CD/NW)? */
     u8                  cache_mode;
+
+    /* I/O request in flight to device model. */
+    bool_t              mmio_in_progress;
+    bool_t              io_in_progress;
+    bool_t              io_completed;
+    unsigned long       io_data;
 };
 
 #define ARCH_HVM_IO_WAIT         1   /* Waiting for I/O completion */
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h        Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Wed Feb 20 14:36:45 2008 +0000
@@ -94,11 +94,6 @@ struct arch_vmx_struct {
 #define VMXEMUL_BAD_CS   2  /* Yes, because CS.RPL != CPL */
 #define VMXEMUL_BAD_SS   4  /* Yes, because SS.RPL != CPL */
     uint8_t              vmxemul;
-
-    /* I/O request in flight to device model. */
-    bool_t               real_mode_io_in_progress;
-    bool_t               real_mode_io_completed;
-    unsigned long        real_mode_io_data;
 };
 
 int vmx_create_vmcs(struct vcpu *v);
diff -r f853c0497095 -r 3f1cf03826fe xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Feb 20 14:36:45 2008 +0000
@@ -33,15 +33,7 @@ void vmx_do_resume(struct vcpu *);
 void vmx_do_resume(struct vcpu *);
 void set_guest_time(struct vcpu *v, u64 gtime);
 void vmx_vlapic_msr_changed(struct vcpu *v);
-void vmx_do_no_device_fault(void);
-void vmx_cpuid_intercept(
-    unsigned int *eax, unsigned int *ebx,
-    unsigned int *ecx, unsigned int *edx);
-int vmx_msr_read_intercept(struct cpu_user_regs *regs);
-int vmx_msr_write_intercept(struct cpu_user_regs *regs);
-void vmx_wbinvd_intercept(void);
 void vmx_realmode(struct cpu_user_regs *regs);
-int vmx_realmode_io_complete(void);
 
 /*
  * Exit Reasons
diff -r f853c0497095 -r 3f1cf03826fe xen/include/public/hvm/ioreq.h
--- a/xen/include/public/hvm/ioreq.h    Tue Feb 19 11:14:40 2008 -0700
+++ b/xen/include/public/hvm/ioreq.h    Wed Feb 20 14:36:45 2008 +0000
@@ -34,14 +34,8 @@
 
 #define IOREQ_TYPE_PIO          0 /* pio */
 #define IOREQ_TYPE_COPY         1 /* mmio ops */
-#define IOREQ_TYPE_AND          2
-#define IOREQ_TYPE_OR           3
-#define IOREQ_TYPE_XOR          4
-#define IOREQ_TYPE_XCHG         5
-#define IOREQ_TYPE_ADD          6
 #define IOREQ_TYPE_TIMEOFFSET   7
 #define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
-#define IOREQ_TYPE_SUB          9
 
 /*
  * VMExit dispatcher should cooperate with instruction decoder to

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86 hvm: Replace old MMIO emulator with x86_emulate()-based harness., Xen patchbot-unstable <=