WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Add fast path for thash hyperprivop

# HG changeset patch
# User djm@xxxxxxxxxxxxxxx
# Node ID 89d92ce1092462f1999221d2615a9976d78bd17b
# Parent  0e774127646895866311f9f617b38577891bf9b9

Add fast path for thash hyperprivop
Add fast path for ptc.ga hyperprivop
Add fast path for iaccess/daccess reflect

diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/hyperprivop.S
--- a/xen/arch/ia64/hyperprivop.S       Sat Jul  9 14:36:13 2005
+++ b/xen/arch/ia64/hyperprivop.S       Sat Jul  9 14:37:13 2005
@@ -124,6 +124,10 @@
        // HYPERPRIVOP_ITC_I?
        cmp.eq p7,p6=XEN_HYPER_ITC_I,r17
 (p7)   br.sptk.many hyper_itc_i;;
+
+       // HYPERPRIVOP_THASH?
+       cmp.eq p7,p6=XEN_HYPER_THASH,r17
+(p7)   br.sptk.many hyper_thash;;
 
        // if not one of the above, give up for now and do it the slow way
        br.sptk.many dispatch_break_fault ;;
@@ -440,7 +444,6 @@
 END(fast_tick_reflect)
 
 // reflect domain breaks directly to domain
-// FIXME: DOES NOT WORK YET
 //     r16 == cr.isr
 //     r17 == cr.iim
 //     r18 == XSI_PSR_IC
@@ -471,15 +474,30 @@
        cmp.eq p7,p0=r22,r17;
 (p7)   br.spnt.few dispatch_break_fault ;;
 #endif
-#ifdef FAST_REFLECT_CNT
-       movl r20=fast_reflect_count+((0x2c00>>8)*8);;
-       ld8 r21=[r20];;
-       adds r21=1,r21;;
-       st8 [r20]=r21;;
-#endif
+       movl r20=0x2c00;
        // save iim in shared_info
        adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r17;;
+       // fall through
+
+
+// reflect to domain ivt+r20
+// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)
+//     r16 == cr.isr
+//     r18 == XSI_PSR_IC
+//     r20 == offset into ivt
+//     r29 == iip
+//     r30 == ipsr
+//     r31 == pr
+ENTRY(fast_reflect)
+#ifdef FAST_REFLECT_CNT
+       movl r22=fast_reflect_count;
+       shr r23=r20,5;;
+       add r22=r22,r23;;
+       ld8 r21=[r22];;
+       adds r21=1,r21;;
+       st8 [r22]=r21;;
+#endif
        // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!)
        adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r29;;
@@ -504,9 +522,9 @@
        or r30=r30,r28;;
        and r30=r30,r27;;
        // also set shared_mem ipsr.i and ipsr.ic appropriately
-       ld8 r20=[r18];;
-       extr.u r22=r20,32,32
-       cmp4.eq p6,p7=r20,r0;;
+       ld8 r24=[r18];;
+       extr.u r22=r24,32,32
+       cmp4.eq p6,p7=r24,r0;;
 (p6)   dep r30=0,r30,IA64_PSR_IC_BIT,1
 (p7)   dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;;
        cmp4.eq p6,p7=r22,r0;;
@@ -520,13 +538,13 @@
        // cover and set shared_mem precover_ifs to cr.ifs
        // set shared_mem ifs and incomplete_regframe to 0
        cover ;;
-       mov r20=cr.ifs;;
+       mov r24=cr.ifs;;
        adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;;
        st4 [r21]=r0 ;;
        adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
        st8 [r21]=r0 ;;
        adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;;
-       st8 [r21]=r20 ;;
+       st8 [r21]=r24 ;;
        // vpsr.i = vpsr.ic = 0 on delivery of interruption
        st8 [r18]=r0;;
        // FIXME: need to save iipa and isr to be arch-compliant
@@ -534,22 +552,30 @@
        mov r22=IA64_KR(CURRENT);;
        adds r22=IA64_VCPU_IVA_OFFSET,r22;;
        ld8 r23=[r22];;
-       movl r24=0x2c00;;
-       add r24=r24,r23;;
-       mov cr.iip=r24;;
+       add r20=r20,r23;;
+       mov cr.iip=r20;;
        // OK, now all set to go except for switch to virtual bank0
        mov r30=r2; mov r29=r3;;
        adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18;
        adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;;
        bsw.1;;
-       st8 [r2]=r16,16; st8 [r3]=r17,16 ;;
-       st8 [r2]=r18,16; st8 [r3]=r19,16 ;;
-       st8 [r2]=r20,16; st8 [r3]=r21,16 ;;
-       st8 [r2]=r22,16; st8 [r3]=r23,16 ;;
-       st8 [r2]=r24,16; st8 [r3]=r25,16 ;;
-       st8 [r2]=r26,16; st8 [r3]=r27,16 ;;
-       st8 [r2]=r28,16; st8 [r3]=r29,16 ;;
-       st8 [r2]=r30,16; st8 [r3]=r31,16 ;;
+       // FIXME: need to handle ar.unat!
+       .mem.offset 0,0; st8.spill [r2]=r16,16;
+       .mem.offset 8,0; st8.spill [r3]=r17,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r18,16;
+       .mem.offset 8,0; st8.spill [r3]=r19,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r20,16;
+       .mem.offset 8,0; st8.spill [r3]=r21,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r22,16;
+       .mem.offset 8,0; st8.spill [r3]=r23,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r24,16;
+       .mem.offset 8,0; st8.spill [r3]=r25,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r26,16;
+       .mem.offset 8,0; st8.spill [r3]=r27,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r28,16;
+       .mem.offset 8,0; st8.spill [r3]=r29,16 ;;
+       .mem.offset 0,0; st8.spill [r2]=r30,16;
+       .mem.offset 8,0; st8.spill [r3]=r31,16 ;;
        movl r31=XSI_IPSR;;
        bsw.0 ;;
        mov r2=r30; mov r3=r29;;
@@ -558,6 +584,41 @@
        mov pr=r31,-1 ;;
        rfi
        ;;
+
+// reflect access faults (0x2400,0x2800,0x5300) directly to domain
+//     r16 == isr
+//     r17 == ifa
+//     r19 == reflect number (only pass-thru to dispatch_reflection)
+//     r20 == offset into ivt
+//     r31 == pr
+GLOBAL_ENTRY(fast_access_reflect)
+       mov r30=cr.ipsr;;
+       mov r29=cr.iip;;
+       extr.u r21=r30,IA64_PSR_BE_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_PP_BIT,1 ;;
+       cmp.ne p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;;
+       cmp.eq p7,p0=r21,r0
+(p7)   br.spnt.few dispatch_reflection ;;
+       movl r18=XSI_PSR_IC;;
+       ld8 r21=[r18];;
+       cmp.eq p7,p0=r0,r21
+(p7)   br.spnt.few dispatch_reflection ;;
+       // set shared_mem ifa, FIXME: should we validate it?
+       mov r17=cr.ifa;;
+       adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; 
+       st8 [r21]=r17 ;;
+       // get rr[ifa] and save to itir in shared memory (extra bits ignored)
+       shr.u r22=r17,61
+       adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 
+       adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+       shladd r22=r22,3,r21;;
+       ld8 r22=[r22];;
+       st8 [r23]=r22;;
+       br.cond.sptk.many fast_reflect;;
 
 
 // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged
@@ -1312,8 +1373,146 @@
        ;;
 END(hyper_set_rr)
 
+// this routine was derived from optimized assembly output from
+// vcpu_thash so it is dense and difficult to read but it works
+// On entry:
+//     r18 == XSI_PSR_IC
+//     r31 == pr
+GLOBAL_ENTRY(hyper_thash)
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       shr.u r20 = r8, 61
+       addl r25 = 1, r0
+       movl r17 = 0xe000000000000000
+       ;;
+       and r21 = r17, r8               // VHPT_Addr1
+       ;;
+       shladd r28 = r20, 3, r18
+       adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18
+       ;;
+       adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28
+       addl r28 = 32767, r0
+       ld8 r24 = [r19]                 // pta
+       ;;
+       ld8 r23 = [r27]                 // rrs[vadr>>61]
+       extr.u r26 = r24, 2, 6
+       ;;
+       extr.u r22 = r23, 2, 6
+       shl r30 = r25, r26
+       ;;
+       shr.u r19 = r8, r22
+       shr.u r29 = r24, 15
+       ;;
+       adds r17 = -1, r30
+       ;;
+       shladd r27 = r19, 3, r0
+       extr.u r26 = r17, 15, 46
+       ;;
+       andcm r24 = r29, r26
+       and r19 = r28, r27
+       shr.u r25 = r27, 15
+       ;;
+       and r23 = r26, r25
+       ;;
+       or r22 = r24, r23
+       ;;
+       dep.z r20 = r22, 15, 46
+       ;;
+       or r16 = r20, r21
+       ;;
+       or r8 = r19, r16
+       // done, update iip/ipsr to next instruction
+       mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_thash)
+
 ENTRY(hyper_ptc_ga)
-       br.spnt.many dispatch_break_fault ;;
+#ifdef CONFIG_SMP
+FIXME: ptc.ga instruction requires spinlock for SMP
+#endif
+       // FIXME: validate not flushing Xen addresses
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       mov r28=r8
+       extr.u r19=r9,2,6               // addr_range=1<<((r9&0xfc)>>2)
+       mov r20=1
+       shr.u r24=r8,61
+       addl r27=56,r0                  // PAGE_SHIFT<<2 (for ptc.ga)
+       movl r26=0x8000000000000000     // INVALID_TI_TAG
+       mov r30=ar.lc
+       ;;
+       shl r19=r20,r19
+       cmp.eq p7,p0=7,r24
+(p7)   br.spnt.many dispatch_break_fault ;;    // slow way for rr7
+       ;;
+       cmp.le p7,p0=r19,r0             // skip flush if size<=0
+(p7)   br.cond.dpnt 2f ;;
+       extr.u r24=r19,0,PAGE_SHIFT
+       shr.u r23=r19,PAGE_SHIFT ;;     // repeat loop for n pages
+       cmp.ne p7,p0=r24,r0 ;;
+(p7)   adds r23=1,r23 ;;               // n_pages<size<n_pages+1? extra iter
+       mov ar.lc=r23
+       movl r29=PAGE_SIZE;;
+1:
+       thash r25=r28 ;;
+       adds r25=16,r25 ;;
+       ld8 r24=[r25] ;;
+       // FIXME: should check if tag matches, not just blow it away
+       or r24=r26,r24 ;;               // vhpt_entry->ti_tag = 1
+       st8 [r25]=r24
+       ptc.ga r28,r27 ;;
+       srlz.i ;;
+       add r28=r29,r28
+       br.cloop.sptk.few 1b
+       ;;
+2:
+       mov ar.lc=r30 ;;
+       mov r29=cr.ipsr
+       mov r30=cr.iip;;
+       mov r27=IA64_KR(CURRENT);;
+       adds r25=IA64_VCPU_DTLB_OFFSET,r27
+       adds r26=IA64_VCPU_ITLB_OFFSET,r27;;
+       ld8 r24=[r25]
+       ld8 r27=[r26] ;;
+       and r24=-2,r24
+       and r27=-2,r27 ;;
+       st8 [r25]=r24                   // set 1-entry i/dtlb as not present
+       st8 [r26]=r27 ;;
+       // increment to point to next instruction
+       extr.u r26=r29,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r30=16,r30
+(p7)   adds r26=1,r26
+       ;;
+       dep r29=r26,r29,41,2
+       ;;
+       mov cr.ipsr=r29
+       mov cr.iip=r30
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
 END(hyper_ptc_ga)
 
 ENTRY(hyper_itc_d)
@@ -1323,5 +1522,3 @@
 ENTRY(hyper_itc_i)
        br.spnt.many dispatch_break_fault ;;
 END(hyper_itc_i)
-
-// ignore me
diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/ivt.S
--- a/xen/arch/ia64/ivt.S       Sat Jul  9 14:36:13 2005
+++ b/xen/arch/ia64/ivt.S       Sat Jul  9 14:37:13 2005
@@ -666,7 +666,12 @@
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 ENTRY(iaccess_bit)
 #ifdef XEN
-       REFLECT(9)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=9
+       movl r20=0x2400
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(9)
        // Like Entry 8, except for instruction access
@@ -734,7 +739,12 @@
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 ENTRY(daccess_bit)
 #ifdef XEN
-       REFLECT(10)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=10
+       movl r20=0x2800
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(10)
        // Like Entry 8, except for data access
@@ -1395,7 +1405,12 @@
 // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
 ENTRY(daccess_rights)
 #ifdef XEN
-       REFLECT(23)
+       mov r31=pr;
+       mov r16=cr.isr
+       mov r17=cr.ifa
+       mov r19=23
+       movl r20=0x5300
+       br.sptk.many fast_access_reflect;;
 #endif
        DBG_FAULT(23)
        mov r16=cr.ifa
@@ -1821,7 +1836,7 @@
 
 #ifdef XEN
        .org ia64_ivt+0x8000
-ENTRY(dispatch_reflection)
+GLOBAL_ENTRY(dispatch_reflection)
        /*
         * Input:
         *      psr.ic: off
diff -r 0e7741276468 -r 89d92ce10924 xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Sat Jul  9 14:36:13 2005
+++ b/xen/arch/ia64/asm-offsets.c       Sat Jul  9 14:37:13 2005
@@ -46,6 +46,8 @@
        DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, 
arch.interrupt_collection_enabled)));
        DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, 
arch.interrupt_delivery_enabled));
        DEFINE(XSI_IIP_OFS, offsetof(vcpu_info_t, arch.iip));
+       DEFINE(XSI_IFA_OFS, offsetof(vcpu_info_t, arch.ifa));
+       DEFINE(XSI_ITIR_OFS, offsetof(vcpu_info_t, arch.itir));
        DEFINE(XSI_IPSR, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.ipsr)));
        DEFINE(XSI_IPSR_OFS, offsetof(vcpu_info_t, arch.ipsr));
        DEFINE(XSI_IFS_OFS, offsetof(vcpu_info_t, arch.ifs));
@@ -61,6 +63,7 @@
        DEFINE(XSI_PEND_OFS, offsetof(vcpu_info_t, arch.pending_interruption));
        DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0]));
        DEFINE(XSI_TPR_OFS, offsetof(vcpu_info_t, arch.tpr));
+       DEFINE(XSI_PTA_OFS, offsetof (vcpu_info_t, arch.pta));
        DEFINE(XSI_ITV_OFS, offsetof(vcpu_info_t, arch.itv));
        //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, 
blocked));
        //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, 
clear_child_tid));
@@ -85,6 +88,8 @@
        DEFINE(IA64_VCPU_ENDING_RID_OFFSET, offsetof (struct vcpu, 
arch.ending_rid));
        DEFINE(IA64_VCPU_DOMAIN_ITM_OFFSET, offsetof (struct vcpu, 
arch.domain_itm));
        DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, 
arch.domain_itm_last));
+       DEFINE(IA64_VCPU_ITLB_OFFSET, offsetof (struct vcpu, arch.itlb));
+       DEFINE(IA64_VCPU_DTLB_OFFSET, offsetof (struct vcpu, arch.dtlb));
 
        BLANK();
        DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, 
itm_next));

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] Add fast path for thash hyperprivop, Xen patchbot -unstable <=