WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] [IA64] Build new infrastructure for fast

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] [IA64] Build new infrastructure for fast fault handling path.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 20 May 2008 08:30:34 -0700
Delivery-date: Tue, 20 May 2008 08:31:46 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1210830828 -32400
# Node ID ef290f39ae6b8e3c58c224ec469ab09522cc7e0b
# Parent  b03e24f9c1d85e50fcd9cb07d8a517e7f6df2b9a
[IA64] Build new infrastructure for fast fault handling path.

1. use jump table to dispatch virtualization faults.
2. for virtualization faults, handler is executed with psr.i=0, psr.ic=0,
   psr.bn=0. less context switch.
3. use register stack instead of memory stack to switch context.
4. Use C code to handle faults as possible, to reduce maintanance efforts,
        remove assembly handlers for rsm , ssm, mov to psr, mov to rr.
5. add fast path C handler for rsm, ssm, mov to psr, rfi.

Signed-off-by: Anthony Xu <anthony.xu@xxxxxxxxx>
---
 xen/arch/ia64/asm-offsets.c         |    5 
 xen/arch/ia64/vmx/optvfault.S       | 1196 +++++++++++++++++++-----------------
 xen/arch/ia64/vmx/vmx_ivt.S         |   39 -
 xen/arch/ia64/vmx/vmx_phy_mode.c    |   27 
 xen/arch/ia64/vmx/vmx_vcpu.c        |  162 ++++
 xen/include/asm-ia64/vmx_phy_mode.h |    3 
 xen/include/asm-ia64/vmx_vcpu.h     |    2 
 7 files changed, 862 insertions(+), 572 deletions(-)

diff -r b03e24f9c1d8 -r ef290f39ae6b xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Thu May 15 14:18:38 2008 +0900
+++ b/xen/arch/ia64/asm-offsets.c       Thu May 15 14:53:48 2008 +0900
@@ -204,6 +204,11 @@ void foo(void)
 
        DEFINE(IA64_VPD_BASE_OFFSET, offsetof (struct vcpu, arch.privregs));
        DEFINE(IA64_VPD_VIFS_OFFSET, offsetof (mapped_regs_t, ifs));
+       DEFINE(IA64_VPD_VHPI_OFFSET, offsetof (mapped_regs_t, vhpi));
+       DEFINE(IA64_VPD_VB1REG_OFFSET, offsetof (mapped_regs_t, bank1_regs[0]));
+       DEFINE(IA64_VPD_VB0REG_OFFSET, offsetof (mapped_regs_t, bank0_regs[0]));
+       DEFINE(IA64_VPD_VB1NAT_OFFSET, offsetof (mapped_regs_t, vnat));
+       DEFINE(IA64_VPD_VB0NAT_OFFSET, offsetof (mapped_regs_t, vbnat));
        DEFINE(IA64_VLSAPIC_INSVC_BASE_OFFSET, offsetof (struct vcpu, 
arch.insvc[0]));
        DEFINE(IA64_VPD_VPTA_OFFSET, offsetof (struct mapped_regs, pta));
        DEFINE(XXX_THASH_SIZE, sizeof (thash_data_t));
diff -r b03e24f9c1d8 -r ef290f39ae6b xen/arch/ia64/vmx/optvfault.S
--- a/xen/arch/ia64/vmx/optvfault.S     Thu May 15 14:18:38 2008 +0900
+++ b/xen/arch/ia64/vmx/optvfault.S     Thu May 15 14:53:48 2008 +0900
@@ -3,10 +3,10 @@
  * optimize virtualization fault handler
  *
  * Copyright (C) 2006 Intel Co
- *     Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx>
+ * Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx>
  */
 
-#include <linux/config.h>      
+#include <linux/config.h>
 #include <asm/config.h>
 #include <asm/pgtable.h>
 #include <asm/asmmacro.h>
@@ -20,26 +20,230 @@
 #include <asm/virt_event.h>
 #include <asm-ia64/vmx_mm_def.h>
 #include <asm-ia64/vmx_phy_mode.h>
-
-#define ACCE_MOV_FROM_AR
-#define ACCE_MOV_FROM_RR
-#define ACCE_MOV_TO_RR
-#define ACCE_RSM
-#define ACCE_SSM
-#define ACCE_MOV_TO_PSR
-#define ACCE_THASH
+#include "entry.h"
+
+// r21 : current
+// r23 : b0
+// r31 : pr
+
+#define VMX_VIRT_SAVE                                                       \
+    mov r27=ar.rsc;     /* M */                                             \
+    ;;                                                                      \
+    cover;              /* B;; (or nothing) */                              \
+    ;;                                                                      \
+    /* switch from user to kernel RBS: */                                   \
+    invala;             /* M */                                             \
+    ;;                                                                      \
+    mov ar.rsc=0;       /* set enforced lazy mode  */                       \
+    ;;                                                                      \
+    mov.m r26=ar.rnat;                                                      \
+    movl r28=IA64_RBS_OFFSET;        /* compute base of RBS */              \
+    ;;                                                                      \
+    mov r22=ar.bspstore;             /* save ar.bspstore */                 \
+    add r28=r28,r21;                                                        \
+    ;;                                                                      \
+    mov ar.bspstore=r28;    /* switch to kernel RBS */                      \
+    ;;                                                                      \
+    mov r18=ar.bsp;                                                         \
+    mov ar.rsc=0x3;         /* set eager mode */                            \
+    ;;                                                                      \
+    alloc r32=ar.pfs,24,0,3,0    /* save pfs */                             \
+    ;;                                                                      \
+    sub r18=r18,r28;    /* r18=RSE.ndirty*8 */                              \
+    ;;                                                                      \
+    shl r33=r18,16;     /* save loadrs */                                   \
+    mov r35=b6;         /* save b6 */                                       \
+    mov r36=b7;         /* save b7 */                                       \
+    mov r37=ar.csd;     /* save ar.csd */                                   \
+    mov r38=ar.ssd;     /* save ar.ssd */                                   \
+    mov r39=r8;         /* save r8 */                               \
+    mov r40=r9;         /* save r9 */                               \
+    mov r41=r10;        /* save r10 */                              \
+    mov r42=r11;        /* save r11 */                              \
+    mov r43=r27;        /* save ar.rsc */                           \
+    mov r44=r26;        /* save ar.rnat */                          \
+    mov r45=r22;        /* save ar.bspstore */                      \
+    mov r46=r31;        /* save pr */                               \
+    mov r47=r23;        /* save b0 */                               \
+    mov r48=r1;         /* save r1 */                               \
+    mov r49=r12;        /* save r12 */                              \
+    mov r50=r13;        /* save r13 */                              \
+    mov r51=r15;        /* save r15 */                              \
+    mov r52=r14;        /* save r14 */                              \
+    mov r53=r2;         /* save r2 */                               \
+    mov r54=r3;         /* save r3 */                               \
+    mov r34=ar.ccv;     /* save ar.ccv */                           \
+    ;;                                                              \
+    movl r1=__gp;                                                   \
+    movl r29=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16;                  \
+    ;;                                                              \
+    add r12=r29,r21;   /* compute base of memory stack */           \
+    mov r13=r21;                                                    \
+    ;;                                                              \
+{ .mii;       /* call vps sync read */                              \
+    add r25=IA64_VPD_BASE_OFFSET, r21;                              \
+    nop 0x0;                                                        \
+    mov r24=ip;                                                     \
+    ;;                                                              \
+};                                                                  \
+{ .mmb;                                                             \
+    add r24 = 0x20, r24;                                            \
+    ld8 r25=[r25];          /* read vpd base */                     \
+    br.cond.sptk vmx_vps_sync_read;   /*  call the service */       \
+    ;;                                                              \
+};
+
+
+ENTRY(ia64_leave_hypervisor_virt)
+    invala              /* M */
+    ;;
+    mov r21=r13         /* get current */
+    mov b6=r35          /* restore b6 */
+    mov b7=r36          /* restore b7 */
+    mov ar.csd=r37      /* restore ar.csd */
+    mov ar.ssd=r38      /* restore ar.ssd */
+    mov r8=r39          /* restore r8 */
+    mov r9=r40          /* restore r9 */
+    mov r10=r41         /* restore r10 */
+    mov r11=r42         /* restore r11 */
+    mov ar.pfs=r32      /* restore ar.pfs */
+    mov r27=r43         /* restore ar.rsc */
+    mov r26=r44         /* restore ar.rnat */
+    mov r25=r45         /* restore ar.bspstore */
+    mov r23=r46         /* restore predicates */
+    mov r22=r47         /* restore b0 */
+    mov r1=r48          /* restore r1 */
+    mov r12=r49         /* restore r12 */
+    mov r13=r50         /* restore r13 */
+    mov r15=r51         /* restore r15 */
+    mov r14=r52         /* restore r14 */
+    mov r2=r53          /* restore r2 */
+    mov r3=r54          /* restore r3 */
+    mov ar.ccv=r34      /* restore ar.ccv */
+    mov ar.rsc=r33      /* load ar.rsc to be used for "loadrs" */
+    ;;
+    alloc r16=ar.pfs,0,0,0,0    /* drop current register frame */
+    ;;
+    loadrs
+    ;;
+    mov ar.bspstore=r25
+    ;;
+    mov ar.rnat=r26
+    ;;
+    mov ar.rsc=r27
+    adds r18=IA64_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r25=[r18]       // load vpd
+    mov r17=r0
+    ;;
+//vsa_sync_write_start
+    ;;
+    movl r24=ia64_leave_hypervisor_virt_1   // calculate return address
+    br.cond.sptk vmx_vps_sync_write         // call the service
+    ;;
+ia64_leave_hypervisor_virt_1:
+    mov r24=r22
+    mov r31=r23
+    br.cond.sptk vmx_resume_to_guest
+END(ia64_leave_hypervisor_virt)
+
+
 
 // Inputs are: r21 (= current), r24 (= cause), r25 (= insn), r31 (=saved pr)
+
+#define BACK_TO_SLOW_PATH                   \
+{;                                          \
+    nop.m 0x0;                              \
+    mov b0=r23;                             \
+    br.many vmx_virtualization_fault_back;  \
+};                                          \
+
+GLOBAL_ENTRY(virtualization_fault_table)
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+{   /* Entry 3 */
+    cmp.eq p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_mov_from_ar
+}
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+{   /* Entry 6 */
+    cmp.eq p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_mov_to_psr
+}
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+{   /* Entry 10 */
+    cmp.eq p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_mov_to_rr
+}
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+{   /* Entry 18 */
+    cmp.eq p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_mov_from_rr
+}
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+{   /* Entry 24 */
+    cmp.eq p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_ssm
+}
+{   /* Entry 25 */
+    cmp.eq p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_rsm
+}
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+{   /* Entry 31 */
+    cmp.eq p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_thash
+}
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+{   /* Entry 37 */
+    cmp.ne p2,p0=r0,r0
+    mov b0=r23
+    br.many vmx_asm_rfi
+}
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+    BACK_TO_SLOW_PATH
+END(virtualization_fault_table)
+
 
 ENTRY(vmx_dummy_function)
     br.sptk.many vmx_dummy_function
 END(vmx_dummy_function)
 
 /*
- *     Inputs:
- *             r24 : return address
- *     r25 : vpd
- *             r29 : scratch
+ *  Inputs:
+ *  r24 : return address
+ *  r25 : vpd
+ *  r29 : scratch
  *
  */
 GLOBAL_ENTRY(vmx_vps_sync_read)
@@ -50,11 +254,10 @@ END(vmx_vps_sync_read)
 END(vmx_vps_sync_read)
 
 /*
- *     Inputs:
- *             r24 : return address
- *     r25 : vpd
- *             r29 : scratch
- *
+ *  Inputs:
+ *  r24 : return address
+ *  r25 : vpd
+ *  r29 : scratch
  */
 GLOBAL_ENTRY(vmx_vps_sync_write)
     movl r29 = vmx_dummy_function
@@ -64,11 +267,10 @@ END(vmx_vps_sync_write)
 END(vmx_vps_sync_write)
 
 /*
- *     Inputs:
- *             r23 : pr
- *             r24 : guest b0
- *     r25 : vpd
- *
+ *  Inputs:
+ *  r23 : pr
+ *  r24 : guest b0
+ *  r25 : vpd
  */
 GLOBAL_ENTRY(vmx_vps_resume_normal)
     movl r29 = vmx_dummy_function
@@ -79,11 +281,11 @@ END(vmx_vps_resume_normal)
 END(vmx_vps_resume_normal)
 
 /*
- *     Inputs:
- *             r23 : pr
- *             r24 : guest b0
- *     r25 : vpd
- *             r17 : isr
+ *  Inputs:
+ *  r23 : pr
+ *  r24 : guest b0
+ *  r25 : vpd
+ *  r17 : isr
  */
 GLOBAL_ENTRY(vmx_vps_resume_handler)
     movl r29 = vmx_dummy_function
@@ -97,12 +299,203 @@ GLOBAL_ENTRY(vmx_vps_resume_handler)
     br.sptk.many b0
 END(vmx_vps_resume_handler)
 
+//r13 ->vcpu
+//call with psr.bn = 0
+GLOBAL_ENTRY(vmx_asm_bsw0)
+    mov r15=ar.unat
+    ;;
+    adds r14=IA64_VPD_BASE_OFFSET,r13
+    ;;
+    ld8 r14=[r14]
+    bsw.1
+    ;;
+    adds r2=IA64_VPD_VB1REG_OFFSET, r14
+    adds r3=IA64_VPD_VB1REG_OFFSET+8, r14
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r16,16
+    .mem.offset 8,0; st8.spill [r3]=r17,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r18,16
+    .mem.offset 8,0; st8.spill [r3]=r19,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r20,16
+    .mem.offset 8,0; st8.spill [r3]=r21,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r22,16
+    .mem.offset 8,0; st8.spill [r3]=r23,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r24,16
+    .mem.offset 8,0; st8.spill [r3]=r25,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r26,16
+    .mem.offset 8,0; st8.spill [r3]=r27,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r28,16
+    .mem.offset 8,0; st8.spill [r3]=r29,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r30,16
+    .mem.offset 8,0; st8.spill [r3]=r31,16
+    ;;
+    mov r9=ar.unat
+    adds r8=IA64_VPD_VB1NAT_OFFSET, r14
+    ;;
+    st8 [r8]=r9
+    adds r8=IA64_VPD_VB0NAT_OFFSET, r14
+    ;;
+    ld8 r9=[r8]
+    adds r2= IA64_VPD_VB0REG_OFFSET, r14
+    adds r3= IA64_VPD_VB0REG_OFFSET+8, r14
+    ;;
+    mov ar.unat=r9
+    ;;
+    ld8.fill r16=[r2],16
+    ld8.fill r17=[r3],16
+    ;;
+    ld8.fill r18=[r2],16
+    ld8.fill r19=[r3],16
+    ;;
+    ld8.fill r20=[r2],16
+    ld8.fill r21=[r3],16
+    ;;
+    ld8.fill r22=[r2],16
+    ld8.fill r23=[r3],16
+    ;;
+    ld8.fill r24=[r2],16
+    ld8.fill r25=[r3],16
+    ;;
+    ld8.fill r26=[r2],16
+    ld8.fill r27=[r3],16
+    ;;
+    ld8.fill r28=[r2],16
+    ld8.fill r29=[r3],16
+    ;;
+    ld8.fill r30=[r2],16
+    ld8.fill r31=[r3],16
+    ;;
+    mov ar.unat=r15
+    ;;
+    bsw.0
+    ;;
+    br.ret.sptk.many b0
+END(vmx_asm_bsw0)
+
+//r13 ->vcpu
+//call with psr.bn = 0
+GLOBAL_ENTRY(vmx_asm_bsw1)
+    mov r15=ar.unat
+    ;;
+    adds r14=IA64_VPD_BASE_OFFSET,r13
+    ;;
+    ld8 r14=[r14]
+    bsw.1
+    ;;
+    adds r2=IA64_VPD_VB0REG_OFFSET, r14
+    adds r3=IA64_VPD_VB0REG_OFFSET+8, r14
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r16,16
+    .mem.offset 8,0; st8.spill [r3]=r17,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r18,16
+    .mem.offset 8,0; st8.spill [r3]=r19,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r20,16
+    .mem.offset 8,0; st8.spill [r3]=r21,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r22,16
+    .mem.offset 8,0; st8.spill [r3]=r23,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r24,16
+    .mem.offset 8,0; st8.spill [r3]=r25,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r26,16
+    .mem.offset 8,0; st8.spill [r3]=r27,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r28,16
+    .mem.offset 8,0; st8.spill [r3]=r29,16
+    ;;
+    .mem.offset 0,0; st8.spill [r2]=r30,16
+    .mem.offset 8,0; st8.spill [r3]=r31,16
+    ;;
+    mov r9=ar.unat
+    adds r8=IA64_VPD_VB0NAT_OFFSET, r14
+    ;;
+    st8 [r8]=r9
+    adds r8=IA64_VPD_VB1NAT_OFFSET, r14
+    ;;
+    ld8 r9=[r8]
+    adds r2=IA64_VPD_VB1REG_OFFSET, r14
+    adds r3=IA64_VPD_VB1REG_OFFSET+8, r14
+    ;;
+    mov ar.unat=r9
+    ;;
+    ld8.fill r16=[r2],16
+    ld8.fill r17=[r3],16
+    ;;
+    ld8.fill r18=[r2],16
+    ld8.fill r19=[r3],16
+    ;;
+    ld8.fill r20=[r2],16
+    ld8.fill r21=[r3],16
+    ;;
+    ld8.fill r22=[r2],16
+    ld8.fill r23=[r3],16
+    ;;
+    ld8.fill r24=[r2],16
+    ld8.fill r25=[r3],16
+    ;;
+    ld8.fill r26=[r2],16
+    ld8.fill r27=[r3],16
+    ;;
+    ld8.fill r28=[r2],16
+    ld8.fill r29=[r3],16
+    ;;
+    ld8.fill r30=[r2],16
+    ld8.fill r31=[r3],16
+    ;;
+    mov ar.unat=r15
+    ;;
+    bsw.0
+    ;;
+    br.ret.sptk.many b0
+END(vmx_asm_bsw1)
+
+
+// rfi
+ENTRY(vmx_asm_rfi)
+    adds r18=IA64_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]
+    ;;
+    adds r26=IA64_VPD_VIFS_OFFSET,r18
+    ;;
+    ld8 r26=[r26]
+    ;;
+    tbit.z p6,p0=r26,63
+    (p6) br.cond.dptk.few vmx_asm_rfi_1
+    ;;
+    //if vifs.v=1 desert current register frame
+    alloc r27=ar.pfs,0,0,0,0
+    ;;
+vmx_asm_rfi_1:
+    adds r26=IA64_VPD_VHPI_OFFSET,r18
+    ;;
+    ld8 r26=[r26]
+    ;;
+    cmp.ne p6,p0=r26,r0
+    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
+    ;;
+    VMX_VIRT_SAVE
+    ;;
+    mov out0=r21
+    movl r14=ia64_leave_hypervisor_virt
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_vcpu_rfi_fast
+END(vmx_asm_rfi)
+
 
 //mov r1=ar3 (only itc is virtualized)
-GLOBAL_ENTRY(vmx_asm_mov_from_ar)
-#ifndef ACCE_MOV_FROM_AR
-    br.many vmx_virtualization_fault_back
-#endif
+ENTRY(vmx_asm_mov_from_ar)
     add r18=VCPU_VTM_OFFSET_OFS,r21
     add r16=VCPU_VTM_LAST_ITC_OFS,r21
     extr.u r17=r25,6,7
@@ -127,10 +520,7 @@ END(vmx_asm_mov_from_ar)
 
 
 // mov r1=rr[r3]
-GLOBAL_ENTRY(vmx_asm_mov_from_rr)
-#ifndef ACCE_MOV_FROM_RR
-    br.many vmx_virtualization_fault_back
-#endif
+ENTRY(vmx_asm_mov_from_rr)
     extr.u r16=r25,20,7
     extr.u r17=r25,6,7
     movl r20=asm_mov_from_reg
@@ -142,8 +532,8 @@ GLOBAL_ENTRY(vmx_asm_mov_from_rr)
     add r27=VCPU_VRR0_OFS,r21
     mov b0=r16
     br.many b0
-    ;;   
-vmx_asm_mov_from_rr_back_1:  
+    ;;
+vmx_asm_mov_from_rr_back_1:
     adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20
     adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
     shr.u r26=r19,61
@@ -158,475 +548,204 @@ END(vmx_asm_mov_from_rr)
 
 
 // mov rr[r3]=r2
-GLOBAL_ENTRY(vmx_asm_mov_to_rr)
-#ifndef ACCE_MOV_TO_RR
-    br.many vmx_virtualization_fault_back
-#endif
-    add r22=IA64_VCPU_RID_BITS_OFFSET,r21
-    extr.u r16=r25,20,7                // r3
-    extr.u r17=r25,13,7                // r2
-    ;;
+ENTRY(vmx_asm_mov_to_rr)
+    extr.u r16=r25,20,7         // r3
+    extr.u r17=r25,13,7         // r2
     movl r20=asm_mov_from_reg
     ;;
     adds r30=vmx_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
-    shladd r16=r16,4,r20       // get r3
-    mov r18=b0                 // save b0
-    ;;
-    add r27=VCPU_VRR0_OFS,r21
+    shladd r16=r16,4,r20        // get r3
+    ;;
     mov b0=r16
     br.many b0
-    ;;   
+    ;;
 vmx_asm_mov_to_rr_back_1:
     adds r30=vmx_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
-    shr.u r23=r19,61           // get RR #
-    shladd r17=r17,4,r20       // get r2
+    shr.u r16=r19,61            // get RR #
     ;;
     //if rr7, go back
-    cmp.eq p6,p0=7,r23
-    mov b0=r18                 // restore b0
+    cmp.eq p6,p0=7,r16
+    mov b0=r23// restore b0
     (p6) br.cond.dpnt.many vmx_virtualization_fault_back
     ;;
-    mov r28=r19                        // save r3
+    mov r16=r19
+    shladd r17=r17,4,r20        // get r2
+    ;;
     mov b0=r17
     br.many b0
-vmx_asm_mov_to_rr_back_2: 
-    adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20
-    shladd r27=r23,3,r27       // address of VRR
-    ;;
-    ld1 r22=[r22]              // Load rid_bits from domain
-    mov b0=r18                 // restore b0
-    adds r16=IA64_VCPU_STARTING_RID_OFFSET,r21
-    ;;
-    ld4 r16=[r16]              // load starting_rid
-    extr.u r17=r19,8,24                // Extract RID
-    ;;
-    shr r17=r17,r22            // Shift out used bits
-    shl r16=r16,8
-    ;;
-    add r20=r19,r16
-    cmp.ne p6,p0=0,r17 // If reserved RID bits are set, use C fall back.
+vmx_asm_mov_to_rr_back_2:
+    mov r17=r19                 // get value
+    ;;
+    // if invalid value , go back
+    adds r26=IA64_VCPU_RID_BITS_OFFSET,r21
+    mov r27=r0
+    ;;
+    ld1 r27=[r26]
+    ;;
+    shr r19=r19,r27
+    ;;
+    cmp.ne p6,p0=r19,r0
+    mov b0=r23// restore b0
     (p6) br.cond.dpnt.many vmx_virtualization_fault_back
-    ;; //mangling rid 1 and 3
-    extr.u r16=r20,8,8
-    extr.u r17=r20,24,8
-    mov r24=r18                // saved b0 for resume
-    ;;
-    extr.u r18=r20,2,6 // page size
-    dep r20=r16,r20,24,8
-    mov b0=r30
-    ;;
-    dep r20=r17,r20,8,8
-    ;; //set ve 1
-    dep r20=-1,r20,0,1
-    // If ps > PAGE_SHIFT, use PAGE_SHIFT
-    cmp.lt p6,p0=PAGE_SHIFT,r18
-    ;;
-    (p6) mov r18=PAGE_SHIFT
-    ;;
-    (p6) dep r20=r18,r20,2,6
-    ;; 
-    st8 [r27]=r19      // Write to vrr.
-    // Write to save_rr if rr=0 or rr=4.
-    cmp.eq p6,p0=0,r23
-    ;;
-    cmp.eq.or p6,p0=4,r23
-    ;;
-    adds r16=IA64_VCPU_MMU_MODE_OFFSET,r21
-    (p6) adds r17=IA64_VCPU_META_SAVED_RR0_OFFSET,r21
-    ;;
-    ld1 r16=[r16]
-    cmp.eq p7,p0=r0,r0
-    (p6) shladd r17=r23,1,r17
-    ;;
-    (p6) st8 [r17]=r20
-    (p6) cmp.eq p7,p0=VMX_MMU_VIRTUAL,r16 // Set physical rr if in virt mode
-    ;;
-    (p7) mov rr[r28]=r20
-    br.many b0
+    ;;
+    VMX_VIRT_SAVE
+    ;;
+    mov out0=r21
+    mov out1=r16
+    mov out2=r17
+    movl r14=ia64_leave_hypervisor_virt
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_vcpu_set_rr_fast
 END(vmx_asm_mov_to_rr)
 
 
-//rsm 
-GLOBAL_ENTRY(vmx_asm_rsm)
-#ifndef ACCE_RSM
-    br.many vmx_virtualization_fault_back
-#endif
-    mov r23=r31
-    add r16=IA64_VPD_BASE_OFFSET,r21
+//rsm 25
+ENTRY(vmx_asm_rsm)
     extr.u r26=r25,6,21 // Imm21
     extr.u r27=r25,31,2 // I2d
     ;;
-    ld8 r16=[r16]
     extr.u r28=r25,36,1 // I
     dep r26=r27,r26,21,2
     ;;
-    add r17=VPD_VPSR_START_OFFSET,r16
     //r18 is imm24
-    dep r18=r28,r26,23,1
-    ;;
-    //sync read
-    mov r25=r16
-    movl r24=vmx_asm_rsm_sync_read_return
-    mov r20=b0
-    br.sptk.many vmx_vps_sync_read
-    ;;
-vmx_asm_rsm_sync_read_return:
-    ld8 r26=[r17]
-    // xenoprof
-    // Don't change mPSR.pp.
-    // It is manipulated by xenoprof.
-    movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_PP
-
-    sub r27=-1,r18 // ~imm24
-    ;;
-    or r28=r27,r28 // Keep IC,I,DT,SI
-    and r19=r26,r27 // Update vpsr
-    ;;
-    st8 [r17]=r19
-    mov r24=cr.ipsr
-    ;;
-    and r24=r24,r28 // Update ipsr
-    adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-    ;;
-    ld8 r27=[r27]
-    ;;
-    tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-    ;;
-    (p8) dep r24=-1,r24,IA64_PSR_DFH_BIT,1  // Keep dfh
-    ;;
-    mov cr.ipsr=r24
-    //sync write
-    mov r25=r16
-    movl r24=vmx_asm_rsm_sync_write_return
-    br.sptk.many vmx_vps_sync_write
-    ;;
-vmx_asm_rsm_sync_write_return:
-    add r29=IA64_VCPU_MMU_MODE_OFFSET,r21
-    ;;
-    ld1 r27=[r29]
-    ;;
-    cmp.ne p6,p0=VMX_MMU_VIRTUAL,r27
-    ;;
-    tbit.z.or p6,p0=r18,IA64_PSR_DT_BIT
-    (p6) br.dptk vmx_asm_rsm_out
-    // DT not cleared or already in phy mode
-    ;;
-    // Switch to meta physical mode D.
-    add r26=IA64_VCPU_META_RID_D_OFFSET,r21
-    mov r27=VMX_MMU_PHY_D
+    dep r16=r28,r26,23,1
+    ;;
+    VMX_VIRT_SAVE
+    ;;
+    mov out0=r21
+    mov out1=r16
+    movl r14=ia64_leave_hypervisor_virt
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_vcpu_rsm_fast
+END(vmx_asm_rsm)
+
+
+//ssm 24
+ENTRY(vmx_asm_ssm)
+    adds r18=IA64_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]
+    ;;
+    adds r26=IA64_VPD_VHPI_OFFSET,r18
     ;;
     ld8 r26=[r26]
-    st1 [r29]=r27 
-    dep.z r28=4,61,3
-    ;;
-    mov rr[r0]=r26
-    ;;
-    mov rr[r28]=r26
-    ;;
-    srlz.d
-vmx_asm_rsm_out:       
-    mov r31=r23
-    mov r24=r20
-    br.many vmx_resume_to_guest
-END(vmx_asm_rsm)
-
-
-//ssm 
-GLOBAL_ENTRY(vmx_asm_ssm)
-#ifndef ACCE_SSM
-    br.many vmx_virtualization_fault_back
-#endif
-    mov r23=r31
-    add r16=IA64_VPD_BASE_OFFSET,r21
+    ;;
+    cmp.ne p6,p0=r26,r0
+    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
+    ;;
     extr.u r26=r25,6,21
     extr.u r27=r25,31,2
     ;;
-    ld8 r16=[r16]
     extr.u r28=r25,36,1
     dep r26=r27,r26,21,2
     ;;  //r18 is imm24
-    dep r18=r28,r26,23,1
-    ;;  
-    //sync read
-    mov r25=r16
-    movl r24=vmx_asm_ssm_sync_read_return
-    mov r20=b0
-    br.sptk.many vmx_vps_sync_read
-    ;;
-vmx_asm_ssm_sync_read_return:
-    add r27=VPD_VPSR_START_OFFSET,r16
-    ;;
-    ld8 r17=[r27]              //r17 old vpsr
-    dep r28=0,r18,IA64_PSR_PP_BIT,1 // For xenoprof
-                                    // Don't change mPSR.pp
-                                    // It is maintained by xenoprof.
-    ;;
-    or r19=r17,r18             //r19 new vpsr
-    ;;
-    st8 [r27]=r19 // update vpsr
-    mov r24=cr.ipsr
-    ;;
-    or r24=r24,r28
-    ;;
-    mov cr.ipsr=r24
-    //sync_write
-    mov r25=r16
-    movl r24=vmx_asm_ssm_sync_write_return
-    br.sptk.many vmx_vps_sync_write
-    ;;
-vmx_asm_ssm_sync_write_return: 
-    add r29=IA64_VCPU_MMU_MODE_OFFSET,r21
-    movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-    ;;
-    ld1 r30=[r29] // mmu_mode
-    ;;
-    and r27=r28,r19
-    cmp.eq p6,p0=VMX_MMU_VIRTUAL,r30
-    ;;
-    cmp.ne.or p6,p0=r28,r27 // (vpsr & (it+dt+rt)) /= (it+dt+rt) ie stay in phy
-    (p6) br.dptk vmx_asm_ssm_1
-    ;;
-    add r26=IA64_VCPU_META_SAVED_RR0_OFFSET,r21
-    add r27=IA64_VCPU_META_SAVED_RR0_OFFSET+8,r21
-    mov r30=VMX_MMU_VIRTUAL
-    ;;
-    ld8 r26=[r26]
-    ld8 r27=[r27]
-    st1 [r29]=r30
-    dep.z r28=4,61,3
-    ;;
-    mov rr[r0]=r26
-    ;;
-    mov rr[r28]=r27
-    ;;
-    srlz.d
-    ;;
-vmx_asm_ssm_1:
-    tbit.nz p6,p0=r17,IA64_PSR_I_BIT
-    ;;
-    tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
-    (p6) br.dptk vmx_asm_ssm_out
-    ;;
-    add r29=VPD_VTPR_START_OFFSET,r16
-    add r30=VPD_VHPI_START_OFFSET,r16
-    ;;
-    ld8 r29=[r29]
-    ld8 r30=[r30]
-    ;;
-    extr.u r17=r29,4,4
-    extr.u r18=r29,16,1
-    ;;
-    dep r17=r18,r17,4,1
-    mov r31=r23
-    mov b0=r20
-    ;;
-    cmp.gt p6,p0=r30,r17
-    (p6) br.dpnt.few vmx_asm_dispatch_vexirq
-vmx_asm_ssm_out:       
-    mov r31=r23
-    mov r24=r20
-    br.many vmx_resume_to_guest
+    dep r16=r28,r26,23,1
+    ;;
+    VMX_VIRT_SAVE
+    ;;
+    mov out0=r21
+    mov out1=r16
+    movl r14=ia64_leave_hypervisor_virt
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_vcpu_ssm_fast
 END(vmx_asm_ssm)
 
 
-//mov psr.l=r2 
-GLOBAL_ENTRY(vmx_asm_mov_to_psr)
-#ifndef ACCE_MOV_TO_PSR
-    br.many vmx_virtualization_fault_back
-#endif
-    mov r23=r31
-    add r16=IA64_VPD_BASE_OFFSET,r21
+//mov psr.l=r2
+ENTRY(vmx_asm_mov_to_psr)
     extr.u r26=r25,13,7 //r2
-    ;;
-    ld8 r16=[r16]
-    movl r24=asm_mov_from_reg
-    ;;
-    adds r30=vmx_asm_mov_to_psr_back-asm_mov_from_reg,r24
-    shladd r26=r26,4,r24
-    mov r20=b0
+    movl r27=asm_mov_from_reg
+    ;;
+    adds r30=vmx_asm_mov_to_psr_back-asm_mov_from_reg,r27
+    shladd r26=r26,4,r27
     ;;
     mov b0=r26
     br.many b0
-    ;;   
+    ;;
 vmx_asm_mov_to_psr_back:
-    //sync read
-    mov r25=r16
-    movl r24=vmx_asm_mov_to_psr_sync_read_return
-    br.sptk.many vmx_vps_sync_read
-    ;;
-vmx_asm_mov_to_psr_sync_read_return:
-    add r27=VPD_VPSR_START_OFFSET,r16
-    ;;
-    ld8 r17=[r27] // r17 old vpsr
-    dep r19=0,r19,32,32 // Clear bits 32-63
-    ;;   
-    dep r18=0,r17,0,32
-    ;; 
-    or r18=r18,r19 //r18 new vpsr
-    ;;
-    st8 [r27]=r18 // set vpsr
-    //sync write
-    mov r25=r16
-    movl r24=vmx_asm_mov_to_psr_sync_write_return
-    br.sptk.many vmx_vps_sync_write
-    ;;
-vmx_asm_mov_to_psr_sync_write_return:
-    add r22=IA64_VCPU_MMU_MODE_OFFSET,r21
-    movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-    ;;
-    and r27=r28,r18
-    and r29=r28,r17
-    ;;
-    cmp.eq p5,p0=r29,r27 // (old_vpsr & (dt+rt+it)) == (new_vpsr & (dt+rt+it))
-    cmp.eq p6,p7=r28,r27 // (new_vpsr & (dt+rt+it)) == (dt+rt+it)
-    (p5) br.many vmx_asm_mov_to_psr_1 // no change
-    ;;
-    //virtual to physical D
-    (p7) add r26=IA64_VCPU_META_RID_D_OFFSET,r21
-    (p7) add r27=IA64_VCPU_META_RID_D_OFFSET,r21
-    (p7) mov r30=VMX_MMU_PHY_D
-    ;;
-    //physical to virtual
-    (p6) add r26=IA64_VCPU_META_SAVED_RR0_OFFSET,r21
-    (p6) add r27=IA64_VCPU_META_SAVED_RR0_OFFSET+8,r21
-    (p6) mov r30=VMX_MMU_VIRTUAL
+    adds r18=IA64_VPD_BASE_OFFSET,r21
+    tbit.nz p6,p0 = r19, IA64_PSR_I_BIT
+    ;;
+    ld8 r18=[r18]
+    ;;
+    adds r26=IA64_VPD_VHPI_OFFSET,r18
     ;;
     ld8 r26=[r26]
-    ld8 r27=[r27]
-    st1 [r22]=r30
-    dep.z r28=4,61,3
-    ;;
-    mov rr[r0]=r26
-    ;;
-    mov rr[r28]=r27
-    ;;
-    srlz.d
-    ;;
-vmx_asm_mov_to_psr_1:
-    mov r24=cr.ipsr
-    movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
-    ;;
-    tbit.nz p7,p0=r24,IA64_PSR_PP_BIT           // For xenoprof
-    or r27=r19,r28
-    dep r24=0,r24,0,32
-    ;;
-    add r24=r27,r24
-    ;;
-    adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-    (p7) dep r24=-1,r24,IA64_PSR_PP_BIT,1       // For xenoprof
-                                                // Dom't change mPSR.pp
-                                                // It is maintaned by xenoprof
-    ;;
-    ld8 r27=[r27]
-    ;;
-    tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-    ;;
-    (p8) dep r24=-1,r24,IA64_PSR_DFH_BIT,1
-    ;;
-    mov cr.ipsr=r24
-    tbit.nz p6,p0=r17,IA64_PSR_I_BIT
-    ;;
-    tbit.z.or p6,p0=r18,IA64_PSR_I_BIT
-    (p6) br.dpnt.few vmx_asm_mov_to_psr_out
-    ;;
-    add r29=VPD_VTPR_START_OFFSET,r16
-    add r30=VPD_VHPI_START_OFFSET,r16
-    ;;
-    ld8 r29=[r29]
-    ld8 r30=[r30]
-    ;;
-    extr.u r17=r29,4,4
-    extr.u r18=r29,16,1
-    ;;
-    dep r17=r18,r17,4,1
-    mov r31=r23
-    mov b0=r20
-    ;;
-    cmp.gt p6,p0=r30,r17
-    (p6) br.dpnt.few vmx_asm_dispatch_vexirq
-vmx_asm_mov_to_psr_out:
-    mov r31=r23
-    mov r24=r20
-    br.many vmx_resume_to_guest
+    ;;
+    // if enable interrupt and vhpi has value, return
+    cmp.ne.and p6,p0=r26,r0
+    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
+    ;;
+    mov r16=r19
+    ;;
+    VMX_VIRT_SAVE
+    ;;
+    mov out0=r21
+    mov out1=r16
+    movl r14=ia64_leave_hypervisor_virt
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=vmx_vcpu_mov_to_psr_fast
 END(vmx_asm_mov_to_psr)
 
-
-ENTRY(vmx_asm_dispatch_vexirq)
-//increment iip
-    mov r16=cr.ipsr
-    ;;
-    extr.u r17=r16,IA64_PSR_RI_BIT,2
-    tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-    ;; 
-    (p6) mov r18=cr.iip
-    (p6) mov r17=r0
-    (p7) add r17=1,r17
-    ;;    
-    (p6) add r18=0x10,r18
-    dep r16=r17,r16,IA64_PSR_RI_BIT,2
-    ;;         
-    (p6) mov cr.iip=r18
-    mov cr.ipsr=r16
-    br.many vmx_dispatch_vexirq
-END(vmx_asm_dispatch_vexirq)
 
 // thash r1=r3
 // TODO: add support when pta.vf = 1
-GLOBAL_ENTRY(vmx_asm_thash)
-#ifndef ACCE_THASH
-    br.many vmx_virtualization_fault_back
-#endif
-    extr.u r17=r25,20,7                // get r3 from opcode in r25 
-    extr.u r18=r25,6,7         // get r1 from opcode in r25
+ENTRY(vmx_asm_thash)
+    extr.u r17=r25,20,7                 // get r3 from opcode in r25
+    extr.u r18=r25,6,7                  // get r1 from opcode in r25
     movl r20=asm_mov_from_reg
     ;;
     adds r30=vmx_asm_thash_back1-asm_mov_from_reg,r20
-    shladd r17=r17,4,r20       // get addr of MOVE_FROM_REG(r17)
-    adds r16=IA64_VPD_BASE_OFFSET,r21  // get vcpu.arch.priveregs
-    mov r24=b0                 // save b0
-    ;;
-    ld8 r16=[r16]              // get VPD addr
+    shladd r17=r17,4,r20                // get addr of MOVE_FROM_REG(r17)
+    adds r16=IA64_VPD_BASE_OFFSET,r21   // get vcpu.arch.priveregs
+    mov r24=b0                          // save b0
+    ;;
+    ld8 r16=[r16]                       // get VPD addr
     mov b0=r17
-    br.many b0                 // r19 return value
-    ;;                                                     
+    br.many b0                          // r19 return value
+    ;;
 vmx_asm_thash_back1:
-    shr.u r23=r19,61           // get RR number
-    adds r28=VCPU_VRR0_OFS,r21 // get vcpu->arch.arch_vmx.vrr[0]'s addr
-    adds r16=IA64_VPD_VPTA_OFFSET,r16  // get virtual pta 
-    ;;
-    shladd r27=r23,3,r28       // get vcpu->arch.arch_vmx.vrr[r23]'s addr
-    ld8 r17=[r16]              // get virtual PTA
+    shr.u r23=r19,61                    // get RR number
+    adds r28=VCPU_VRR0_OFS,r21  // get vcpu->arch.arch_vmx.vrr[0]'s addr
+    adds r16=IA64_VPD_VPTA_OFFSET,r16   // get virtual pta
+    ;;
+    shladd r27=r23,3,r28        // get vcpu->arch.arch_vmx.vrr[r23]'s addr
+    ld8 r17=[r16]               // get virtual PTA
     mov r26=1
     ;;
-    extr.u r29=r17,2,6         // get pta.size
-    ld8 r28=[r27]              // get vcpu->arch.arch_vmx.vrr[r23]'s value
+    extr.u r29=r17,2,6// get pta.size
+    ld8 r28=[r27]               // get vcpu->arch.arch_vmx.vrr[r23]'s value
     ;;
     // Fall-back to C if VF (long format) is set
     tbit.nz p6,p0=r17,8
     mov b0=r24
     ;;
-(p6) mov r24=EVENT_THASH
-(p6) br.cond.dpnt.many vmx_virtualization_fault_back
-    extr.u r28=r28,2,6         // get rr.ps
-    shl r22=r26,r29            // 1UL << pta.size
-    ;;
-    shr.u r23=r19,r28          // vaddr >> rr.ps
-    adds r26=3,r29             // pta.size + 3 
-    shl r27=r17,3              // pta << 3 
-    ;;
-    shl r23=r23,3              // (vaddr >> rr.ps) << 3
-    shr.u r27=r27,r26          // (pta << 3) >> (pta.size+3)
+    (p6) mov r24=EVENT_THASH
+    (p6) br.cond.dpnt.many vmx_virtualization_fault_back
+    extr.u r28=r28,2,6      // get rr.ps
+    shl r22=r26,r29         // 1UL << pta.size
+    ;;
+    shr.u r23=r19,r28       // vaddr >> rr.ps
+    adds r26=3,r29          // pta.size + 3
+    shl r27=r17,3           // pta << 3
+    ;;
+    shl r23=r23,3           // (vaddr >> rr.ps) << 3
+    shr.u r27=r27,r26       // (pta << 3) >> (pta.size+3)
     movl r16=VRN_MASK
     ;;
-    adds r22=-1,r22            // (1UL << pta.size) - 1
-    shl r27=r27,r29            // ((pta<<3)>>(pta.size+3))<<pta.size
-    and r19=r19,r16            // vaddr & VRN_MASK
-    ;;
-    and r22=r22,r23            // vhpt_offset 
-    or r19=r19,r27             // (vadr&VRN_MASK) |(((pta<<3)>>(pta.size + 
3))<<pta.size) 
+    adds r22=-1,r22         // (1UL << pta.size) - 1
+    shl r27=r27,r29         // ((pta<<3)>>(pta.size+3))<<pta.size
+    and r19=r19,r16         // vaddr & VRN_MASK
+    ;;
+    and r22=r22,r23         // vhpt_offset
+    or r19=r19,r27          // (vadr&VRN_MASK) |(((pta<<3)>>(pta.size + 
3))<<pta.size)
     adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
     ;;
-    or r19=r19,r22             // calc pval
+    or r19=r19,r22          // calc pval
     shladd r17=r18,4,r26
     adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20
     ;;
@@ -634,99 +753,101 @@ vmx_asm_thash_back1:
     br.many b0
 END(vmx_asm_thash)
 
-#define MOV_TO_REG0    \
-{;                     \
-    nop.b 0x0;         \
-    nop.b 0x0;         \
-    nop.b 0x0;         \
-    ;;                 \
+
+
+#define MOV_TO_REG0     \
+{;                      \
+    nop.b 0x0;          \
+    nop.b 0x0;          \
+    nop.b 0x0;          \
+    ;;                  \
 };
 
 
-#define MOV_TO_REG(n)  \
-{;                     \
-    mov r##n##=r19;    \
-    mov b0=r30;                \
-    br.sptk.many b0;   \
-    ;;                 \
+#define MOV_TO_REG(n)   \
+{;                      \
+    mov r##n##=r19;     \
+    mov b0=r30;         \
+    br.sptk.many b0;    \
+    ;;                  \
 };
 
 
-#define MOV_FROM_REG(n)        \
-{;                     \
-    mov r19=r##n##;    \
-    mov b0=r30;                \
-    br.sptk.many b0;   \
-    ;;                 \
+#define MOV_FROM_REG(n) \
+{;                      \
+    mov r19=r##n##;     \
+    mov b0=r30;         \
+    br.sptk.many b0;    \
+    ;;                  \
 };
 
 
-#define MOV_TO_BANK0_REG(n)                    \
-ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);    \
-{;                                             \
-    mov r26=r2;                                        \
-    mov r2=r19;                                        \
-    bsw.1;                                     \
-    ;;                                         \
-};                                             \
-{;                                             \
-    mov r##n##=r2;                             \
-    nop.b 0x0;                                 \
-    bsw.0;                                     \
-    ;;                                         \
-};                                             \
-{;                                             \
-    mov r2=r26;                                        \
-    mov b0=r30;                                        \
-    br.sptk.many b0;                           \
-    ;;                                         \
-};                                             \
+#define MOV_TO_BANK0_REG(n)                 \
+ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \
+{;                                          \
+    mov r26=r2;                             \
+    mov r2=r19;                             \
+    bsw.1;                                  \
+    ;;                                      \
+};                                          \
+{;                                          \
+    mov r##n##=r2;                          \
+    nop.b 0x0;                              \
+    bsw.0;                                  \
+    ;;                                      \
+};                                          \
+{;                                          \
+    mov r2=r26;                             \
+    mov b0=r30;                             \
+    br.sptk.many b0;                        \
+    ;;                                      \
+};                                          \
 END(asm_mov_to_bank0_reg##n##)
 
 
-#define MOV_FROM_BANK0_REG(n)                  \
-ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);  \
-{;                                             \
-    mov r26=r2;                                        \
-    nop.b 0x0;                                 \
-    bsw.1;                                     \
-    ;;                                         \
-};                                             \
-{;                                             \
-    mov r2=r##n##;                             \
-    nop.b 0x0;                                 \
-    bsw.0;                                     \
-    ;;                                         \
-};                                             \
-{;                                             \
-    mov r19=r2;                                        \
-    mov r2=r26;                                        \
-    mov b0=r30;                                        \
-};                                             \
-{;                                             \
-    nop.b 0x0;                                 \
-    nop.b 0x0;                                 \
-    br.sptk.many b0;                           \
-    ;;                                         \
-};                                             \
+#define MOV_FROM_BANK0_REG(n)                   \
+ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);   \
+{;                                              \
+    mov r26=r2;                                 \
+    nop.b 0x0;                                  \
+    bsw.1;                                      \
+    ;;                                          \
+};                                              \
+{;                                              \
+    mov r2=r##n##;                              \
+    nop.b 0x0;                                  \
+    bsw.0;                                      \
+    ;;                                          \
+};                                              \
+{;                                              \
+    mov r19=r2;                                 \
+    mov r2=r26;                                 \
+    mov b0=r30;                                 \
+};                                              \
+{;                                              \
+    nop.b 0x0;                                  \
+    nop.b 0x0;                                  \
+    br.sptk.many b0;                            \
+    ;;                                          \
+};                                              \
 END(asm_mov_from_bank0_reg##n##)
 
 
-#define JMP_TO_MOV_TO_BANK0_REG(n)             \
-{;                                             \
-    nop.b 0x0;                                 \
-    nop.b 0x0;                                 \
-    br.sptk.many asm_mov_to_bank0_reg##n##;    \
-    ;;                                         \
-}    
-
-
-#define JMP_TO_MOV_FROM_BANK0_REG(n)           \
-{;                                             \
-    nop.b 0x0;                                 \
-    nop.b 0x0;                                 \
-    br.sptk.many asm_mov_from_bank0_reg##n##;  \
-    ;;                                         \
+#define JMP_TO_MOV_TO_BANK0_REG(n)              \
+{;                                              \
+    nop.b 0x0;                                  \
+    nop.b 0x0;                                  \
+    br.sptk.many asm_mov_to_bank0_reg##n##;     \
+    ;;                                          \
+}
+
+
+#define JMP_TO_MOV_FROM_BANK0_REG(n)            \
+{;                                              \
+    nop.b 0x0;                                  \
+    nop.b 0x0;                                  \
+    br.sptk.many asm_mov_from_bank0_reg##n##;   \
+    ;;                                          \
 }
 
 
@@ -749,7 +870,7 @@ MOV_FROM_BANK0_REG(31)
 
 
 // mov from reg table
-// r19:        value, r30: return address
+// r19:value, r30: return address
 // r26 may be destroyed
 ENTRY(asm_mov_from_reg)
     MOV_FROM_REG(0)
@@ -884,29 +1005,30 @@ END(asm_mov_from_reg)
 
 
 /* must be in bank 0
- * parameter:
- * r31: pr
- * r24: b0
+ *  parameter:
+ *  r31: pr
+ *  r24: b0
+ *  p2: whether increase IP
+ *  p3: whether check vpsr.ic
  */
 ENTRY(vmx_resume_to_guest)
-    mov r16=cr.ipsr
-    ;;
+    // ip ++
+    (p2) mov r16=cr.ipsr
+    (p2)dep.z r30=1,IA64_PSR_RI_BIT,1
     adds r19=IA64_VPD_BASE_OFFSET,r21
-    extr.u r17=r16,IA64_PSR_RI_BIT,2
     ;;
     ld8 r25=[r19]
-    add r17=1,r17
-    ;;
+    (p2) add r16=r30,r16
+    ;;
+    (p2) mov cr.ipsr=r16
     adds r19= VPD_VPSR_START_OFFSET,r25
-    dep r16=r17,r16,IA64_PSR_RI_BIT,2
-    ;;
-    mov cr.ipsr=r16
+    ;;
     ld8 r19=[r19]
     ;;
     mov r23=r31
     mov r17=r0
     //vps_resume_normal/handler
-    tbit.z p6,p7 = r19,IA64_PSR_IC_BIT         // p1=vpsr.ic
+    tbit.z p6,p7 = r19,IA64_PSR_IC_BIT  // p7=vpsr.ic
     (p6) br.cond.sptk.many vmx_vps_resume_handler
     (p7) br.cond.sptk.few vmx_vps_resume_normal
 END(vmx_resume_to_guest)
@@ -931,7 +1053,7 @@ MOV_TO_BANK0_REG(31)
 
 
 // mov to reg table
-// r19:        value, r30: return address
+// r19:value, r30: return address
 ENTRY(asm_mov_to_reg)
     MOV_TO_REG0
     MOV_TO_REG(1)
diff -r b03e24f9c1d8 -r ef290f39ae6b xen/arch/ia64/vmx/vmx_ivt.S
--- a/xen/arch/ia64/vmx/vmx_ivt.S       Thu May 15 14:18:38 2008 +0900
+++ b/xen/arch/ia64/vmx/vmx_ivt.S       Thu May 15 14:53:48 2008 +0900
@@ -967,21 +967,13 @@ ENTRY(vmx_virtualization_fault)
 ENTRY(vmx_virtualization_fault)
 //    VMX_DBG_FAULT(37)
     mov r31=pr
-    ;;
-    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-    cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-    cmp.eq p9,p0=EVENT_RSM,r24
-    cmp.eq p10,p0=EVENT_SSM,r24
-    cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-    cmp.eq p12,p0=EVENT_THASH,r24 
-    (p6) br.dptk.many vmx_asm_mov_from_ar
-    (p7) br.dptk.many vmx_asm_mov_from_rr
-    (p8) br.dptk.many vmx_asm_mov_to_rr
-    (p9) br.dptk.many vmx_asm_rsm
-    (p10) br.dptk.many vmx_asm_ssm
-    (p11) br.dptk.many vmx_asm_mov_to_psr
-    (p12) br.dptk.many vmx_asm_thash
+    movl r30 = virtualization_fault_table
+    mov r23=b0
+    ;;
+    shladd r30=r24,4,r30
+    ;;
+    mov b0=r30
+    br.sptk.many b0
     ;;
 vmx_virtualization_fault_back:
     mov r19=37
@@ -990,23 +982,6 @@ vmx_virtualization_fault_back:
     ;;
     st8 [r16] = r24
     st8 [r17] = r25
-    ;;
-    cmp.ne p6,p0=EVENT_RFI, r24
-    (p6) br.sptk vmx_dispatch_virtualization_fault
-    ;;
-    adds r18=IA64_VPD_BASE_OFFSET,r21
-    ;;
-    ld8 r18=[r18]
-    ;;
-    adds r18=IA64_VPD_VIFS_OFFSET,r18
-    ;;
-    ld8 r18=[r18]
-    ;;
-    tbit.z p6,p0=r18,63
-    (p6) br.sptk vmx_dispatch_virtualization_fault
-    ;;
-    //if vifs.v=1 desert current register frame
-    alloc r18=ar.pfs,0,0,0,0
     br.sptk vmx_dispatch_virtualization_fault
 END(vmx_virtualization_fault)
 
diff -r b03e24f9c1d8 -r ef290f39ae6b xen/arch/ia64/vmx/vmx_phy_mode.c
--- a/xen/arch/ia64/vmx/vmx_phy_mode.c  Thu May 15 14:18:38 2008 +0900
+++ b/xen/arch/ia64/vmx/vmx_phy_mode.c  Thu May 15 14:53:48 2008 +0900
@@ -228,6 +228,33 @@ static int mm_switch_action(IA64_PSR ops
     return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
 }
 
+/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
+ * so that no tlb miss is allowed.
+ */
+void
+switch_mm_mode_fast(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
+{
+    int act;
+    act = mm_switch_action(old_psr, new_psr);
+    switch (act) {
+    case SW_2P_DT:
+        vcpu->arch.arch_vmx.mmu_mode = VMX_MMU_PHY_DT;
+        switch_to_physical_rid(vcpu);
+        break;
+    case SW_2P_D:
+        vcpu->arch.arch_vmx.mmu_mode = VMX_MMU_PHY_D;
+        switch_to_physical_rid(vcpu);
+        break;
+    case SW_2V:
+        vcpu->arch.arch_vmx.mmu_mode = VMX_MMU_VIRTUAL;
+        switch_to_virtual_rid(vcpu);
+        break;
+    default:
+        break;
+    }
+    return;
+}
+
 void
 switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr)
 {
diff -r b03e24f9c1d8 -r ef290f39ae6b xen/arch/ia64/vmx/vmx_vcpu.c
--- a/xen/arch/ia64/vmx/vmx_vcpu.c      Thu May 15 14:18:38 2008 +0900
+++ b/xen/arch/ia64/vmx/vmx_vcpu.c      Thu May 15 14:53:48 2008 +0900
@@ -168,6 +168,34 @@ IA64FAULT vmx_vcpu_cover(VCPU *vcpu)
     return (IA64_NO_FAULT);
 }
 
+/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
+ * so that no tlb miss is allowed.
+ */
+void vmx_vcpu_set_rr_fast(VCPU *vcpu, u64 reg, u64 val)
+{
+    u64 rrval;
+
+    VMX(vcpu, vrr[reg >> VRN_SHIFT]) = val;
+    switch((u64)(reg >> VRN_SHIFT)) {
+    case VRN4:
+        rrval = vrrtomrr(vcpu, val);
+        vcpu->arch.metaphysical_saved_rr4 = rrval;
+        if (is_virtual_mode(vcpu) && likely(vcpu == current))
+            ia64_set_rr(reg, rrval);
+        break;
+    case VRN0:
+        rrval = vrrtomrr(vcpu, val);
+        vcpu->arch.metaphysical_saved_rr0 = rrval;
+        if (is_virtual_mode(vcpu) && likely(vcpu == current))
+            ia64_set_rr(reg, rrval);
+        break;
+    default:
+        if (likely(vcpu == current))
+            ia64_set_rr(reg, vrrtomrr(vcpu, val));
+        break;
+    }
+}
+
 IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, u64 reg, u64 val)
 {
     u64 rrval;
@@ -246,8 +274,138 @@ u64 vmx_vcpu_get_itir_on_fault(VCPU *vcp
     return (rr1.rrval);
 }
 
-
-
+/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
+ * so that no tlb miss is allowed.
+ */
+void vmx_vcpu_mov_to_psr_fast(VCPU *vcpu, u64 value)
+{
+    /* TODO: Only allowed for current vcpu */
+    u64 old_vpsr, new_vpsr, mipsr, mask;
+    old_vpsr = VCPU(vcpu, vpsr);
+
+    new_vpsr = (old_vpsr & 0xffffffff00000000) | (value & 0xffffffff);
+    VCPU(vcpu, vpsr) = new_vpsr;
+
+    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
+
+    /* xenoprof:
+     * don't change psr.pp.
+     * It is manipulated by xenoprof.
+     */
+    mask = 0xffffffff00000000 | IA64_PSR_IC | IA64_PSR_I 
+        | IA64_PSR_DT  | IA64_PSR_PP | IA64_PSR_SI | IA64_PSR_RT;
+
+    mipsr = (mipsr & mask) | (value & (~mask));
+
+    if (FP_PSR(vcpu) & IA64_PSR_DFH)
+         mipsr |= IA64_PSR_DFH;
+
+    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
+
+    switch_mm_mode_fast(vcpu, (IA64_PSR)old_vpsr, (IA64_PSR)new_vpsr);
+}
+
+#define IA64_PSR_MMU_VIRT (IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_IT)
+/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
+ * so that no tlb miss is allowed.
+ */
+void vmx_vcpu_rfi_fast(VCPU *vcpu)
+{
+    /* TODO: Only allowed for current vcpu */
+    u64 vifs, vipsr, vpsr, mipsr, mask;
+    vipsr = VCPU(vcpu, ipsr);
+    vpsr = VCPU(vcpu, vpsr);
+    vifs = VCPU(vcpu, ifs);
+    if (vipsr & IA64_PSR_BN) {
+        if(!(vpsr & IA64_PSR_BN))
+             vmx_asm_bsw1();
+    } else if (vpsr & IA64_PSR_BN)
+             vmx_asm_bsw0();
+
+    /*
+     *  For those IA64_PSR bits: id/da/dd/ss/ed/ia
+     *  Since these bits will become 0, after success execution of each
+     *  instruction, we will change set them to mIA64_PSR
+     */
+    VCPU(vcpu, vpsr) = vipsr & (~ (IA64_PSR_ID |IA64_PSR_DA 
+                | IA64_PSR_DD | IA64_PSR_ED | IA64_PSR_IA));    
+
+    /*
+     * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+     * , except for the following bits:
+     * ic/i/dt/si/rt/mc/it/bn/vm
+     */
+    /* xenoprof */
+    mask = (IA64_PSR_IC | IA64_PSR_I | IA64_PSR_DT | IA64_PSR_SI |
+            IA64_PSR_RT | IA64_PSR_MC | IA64_PSR_IT | IA64_PSR_BN |
+            IA64_PSR_VM | IA64_PSR_PP);
+    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
+    mipsr = (mipsr & mask) | (vipsr & (~mask));
+
+    if (FP_PSR(vcpu) & IA64_PSR_DFH)
+         mipsr |= IA64_PSR_DFH;
+
+    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
+    vmx_ia64_set_dcr(vcpu);
+
+    if(vifs >> 63)
+        ia64_setreg(_IA64_REG_CR_IFS, vifs);
+
+    ia64_setreg(_IA64_REG_CR_IIP, VCPU(vcpu, iip));
+
+    switch_mm_mode_fast(vcpu, (IA64_PSR)vpsr, (IA64_PSR)vipsr);
+}
+
+/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
+ * so that no tlb miss is allowed.
+ */
+void vmx_vcpu_ssm_fast(VCPU *vcpu, u64 imm24)
+{
+    u64  old_vpsr, new_vpsr, mipsr;
+
+    old_vpsr = VCPU(vcpu, vpsr);
+    new_vpsr = old_vpsr | imm24;
+
+    VCPU(vcpu, vpsr) = new_vpsr;
+
+    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
+    /* xenoprof:
+     * don't change psr.pp.
+     * It is manipulated by xenoprof.
+     */
+    mipsr |= imm24 & (~IA64_PSR_PP);
+    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
+
+    switch_mm_mode_fast(vcpu, (IA64_PSR)old_vpsr, (IA64_PSR)new_vpsr);
+}
+
+/* In fast path, psr.ic = 0, psr.i = 0, psr.bn = 0
+ * so that no tlb miss is allowed.
+ */
+void vmx_vcpu_rsm_fast(VCPU *vcpu, u64 imm24)
+{
+    u64  old_vpsr, new_vpsr, mipsr;
+
+    old_vpsr = VCPU(vcpu, vpsr);
+    new_vpsr = old_vpsr & ~imm24;
+
+    VCPU(vcpu, vpsr) = new_vpsr;
+
+    mipsr = ia64_getreg(_IA64_REG_CR_IPSR);
+    /* xenoprof:
+     * don't change psr.pp.
+     * It is manipulated by xenoprof.
+     */
+    mipsr &= (~imm24) | IA64_PSR_PP;
+    mipsr |= IA64_PSR_IC | IA64_PSR_I | IA64_PSR_DT | IA64_PSR_SI;
+
+    if (FP_PSR(vcpu) & IA64_PSR_DFH)
+         mipsr |= IA64_PSR_DFH;
+
+    ia64_setreg(_IA64_REG_CR_IPSR, mipsr);
+
+    switch_mm_mode_fast(vcpu, (IA64_PSR)old_vpsr, (IA64_PSR)new_vpsr);
+}
 
 IA64FAULT vmx_vcpu_rfi(VCPU *vcpu)
 {
diff -r b03e24f9c1d8 -r ef290f39ae6b xen/include/asm-ia64/vmx_phy_mode.h
--- a/xen/include/asm-ia64/vmx_phy_mode.h       Thu May 15 14:18:38 2008 +0900
+++ b/xen/include/asm-ia64/vmx_phy_mode.h       Thu May 15 14:53:48 2008 +0900
@@ -79,7 +79,8 @@ extern void switch_to_physical_rid(VCPU 
 extern void switch_to_physical_rid(VCPU *);
 extern void switch_to_virtual_rid(VCPU *vcpu);
 extern void switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr);
-extern void check_mm_mode_switch (VCPU *vcpu,  IA64_PSR old_psr, IA64_PSR 
new_psr);
+extern void switch_mm_mode_fast(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR 
new_psr);
+extern void check_mm_mode_switch(VCPU *vcpu,  IA64_PSR old_psr, IA64_PSR 
new_psr);
 extern void prepare_if_physical_mode(VCPU *vcpu);
 extern void recover_if_physical_mode(VCPU *vcpu);
 extern void vmx_init_all_rr(VCPU *vcpu);
diff -r b03e24f9c1d8 -r ef290f39ae6b xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h   Thu May 15 14:18:38 2008 +0900
+++ b/xen/include/asm-ia64/vmx_vcpu.h   Thu May 15 14:53:48 2008 +0900
@@ -106,6 +106,8 @@ extern void vmx_switch_rr7(unsigned long
 extern void vmx_switch_rr7(unsigned long, void *, void *, void *);
 extern void vmx_ia64_set_dcr(VCPU * v);
 extern void inject_guest_interruption(struct vcpu *vcpu, u64 vec);
+extern void vmx_asm_bsw0(void);
+extern void vmx_asm_bsw1(void);
 
 /**************************************************************************
  VCPU control register access routines

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] [IA64] Build new infrastructure for fast fault handling path., Xen patchbot-unstable <=