# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1185767134 -32400 # Node ID 82cecfd99f1b9a418c109f7e47a0d6ba4e1ff4a6 # Parent 4492a0285bae734ee18f6acbb6b3f9c80f153be7 implement set_rr0_to_rr4 hyperprivop to reduce linux context switch hyperprivop PATCHNAME: set_rr0_to_rr4_hyperprivop Signed-off-by: Isaku Yamahata diff -r 4492a0285bae -r 82cecfd99f1b xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Fri Jul 27 08:15:16 2007 -0600 +++ b/xen/arch/ia64/xen/hyperprivop.S Mon Jul 30 12:45:34 2007 +0900 @@ -41,6 +41,7 @@ # define FAST_SSM_I # define FAST_PTC_GA # undef RFI_TO_INTERRUPT // not working yet +# define FAST_SET_RR0_TO_RR4 #endif #ifdef CONFIG_SMP @@ -76,7 +77,7 @@ // r16 == cr.isr // r17 == cr.iim // r18 == XSI_PSR_IC_OFS -// r19 == vpsr.ic +// r19 == ipsr.cpl // r31 == pr GLOBAL_ENTRY(fast_hyperprivop) // HYPERPRIVOP_SSM_I? @@ -108,62 +109,67 @@ GLOBAL_ENTRY(fast_hyperprivop) ;; // HYPERPRIVOP_COVER? - cmp.eq p7,p6=HYPERPRIVOP_COVER,r17 + cmp.eq p7,p0=HYPERPRIVOP_COVER,r17 (p7) br.sptk.many hyper_cover ;; // HYPERPRIVOP_SSM_DT? - cmp.eq p7,p6=HYPERPRIVOP_SSM_DT,r17 + cmp.eq p7,p0=HYPERPRIVOP_SSM_DT,r17 (p7) br.sptk.many hyper_ssm_dt ;; // HYPERPRIVOP_RSM_DT? - cmp.eq p7,p6=HYPERPRIVOP_RSM_DT,r17 + cmp.eq p7,p0=HYPERPRIVOP_RSM_DT,r17 (p7) br.sptk.many hyper_rsm_dt ;; // HYPERPRIVOP_SET_ITM? - cmp.eq p7,p6=HYPERPRIVOP_SET_ITM,r17 + cmp.eq p7,p0=HYPERPRIVOP_SET_ITM,r17 (p7) br.sptk.many hyper_set_itm ;; + // HYPERPRIVOP_SET_RR0_TO_RR4? + cmp.eq p7,p0=HYPERPRIVOP_SET_RR0_TO_RR4,r17 +(p7) br.sptk.many hyper_set_rr0_to_rr4 + ;; + // HYPERPRIVOP_SET_RR? - cmp.eq p7,p6=HYPERPRIVOP_SET_RR,r17 + cmp.eq p7,p0=HYPERPRIVOP_SET_RR,r17 (p7) br.sptk.many hyper_set_rr ;; // HYPERPRIVOP_GET_RR? - cmp.eq p7,p6=HYPERPRIVOP_GET_RR,r17 + cmp.eq p7,p0=HYPERPRIVOP_GET_RR,r17 (p7) br.sptk.many hyper_get_rr ;; // HYPERPRIVOP_GET_PSR? - cmp.eq p7,p6=HYPERPRIVOP_GET_PSR,r17 + cmp.eq p7,p0=HYPERPRIVOP_GET_PSR,r17 (p7) br.sptk.many hyper_get_psr ;; // HYPERPRIVOP_PTC_GA? - cmp.eq p7,p6=HYPERPRIVOP_PTC_GA,r17 + cmp.eq p7,p0=HYPERPRIVOP_PTC_GA,r17 (p7) br.sptk.many hyper_ptc_ga ;; // HYPERPRIVOP_ITC_D? - cmp.eq p7,p6=HYPERPRIVOP_ITC_D,r17 + cmp.eq p7,p0=HYPERPRIVOP_ITC_D,r17 (p7) br.sptk.many hyper_itc_d ;; // HYPERPRIVOP_ITC_I? - cmp.eq p7,p6=HYPERPRIVOP_ITC_I,r17 + cmp.eq p7,p0=HYPERPRIVOP_ITC_I,r17 (p7) br.sptk.many hyper_itc_i ;; // HYPERPRIVOP_THASH? - cmp.eq p7,p6=HYPERPRIVOP_THASH,r17 + cmp.eq p7,p0=HYPERPRIVOP_THASH,r17 (p7) br.sptk.many hyper_thash ;; // HYPERPRIVOP_SET_KR? - cmp.eq p7,p6=HYPERPRIVOP_SET_KR,r17 + cmp.eq p7,p0=HYPERPRIVOP_SET_KR,r17 (p7) br.sptk.many hyper_set_kr ;; @@ -208,7 +214,7 @@ END(fast_hyperprivop) // r16 == cr.isr // r17 == cr.iim // r18 == XSI_PSR_IC -// r19 == vpsr.ic +// r19 == ipsr.cpl // r31 == pr ENTRY(hyper_ssm_i) #ifndef FAST_SSM_I @@ -545,7 +551,7 @@ END(fast_tick_reflect) // r16 == cr.isr // r17 == cr.iim // r18 == XSI_PSR_IC -// r19 == vpsr.ic +// r19 == ipsr.cpl // r31 == pr GLOBAL_ENTRY(fast_break_reflect) #ifndef FAST_BREAK // see beginning of file @@ -1643,6 +1649,244 @@ 1: mov r24=cr.ipsr rfi ;; END(hyper_set_rr) + +// r8 = val0 +// r9 = val1 +// r10 = val2 +// r11 = val3 +// r14 = val4 +// mov rr[0x0000000000000000UL] = r8 +// mov rr[0x2000000000000000UL] = r9 +// mov rr[0x4000000000000000UL] = r10 +// mov rr[0x6000000000000000UL] = r11 +// mov rr[0x8000000000000000UL] = r14 +ENTRY(hyper_set_rr0_to_rr4) +#ifndef FAST_SET_RR0_TO_RR4 + br.spnt.few dispatch_break_fault ;; +#endif +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_RR0_TO_RR4);; + ld4 r21=[r20];; + adds r21=1,r21;; + st4 [r20]=r21;; +#endif + movl r17=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r17=[r17];; + + adds r21=IA64_VCPU_STARTING_RID_OFFSET,r17 + adds r25=IA64_VCPU_ENDING_RID_OFFSET,r17 + ;; + ld4 r22=[r21] // r22 = current->starting_rid + extr.u r26=r8,8,24 // r26 = r8.rid + extr.u r27=r9,8,24 // r27 = r9.rid + ld4 r23=[r25] // r23 = current->ending_rid + extr.u r28=r10,8,24 // r28 = r10.rid + extr.u r29=r11,8,24 // r29 = r11.rid + adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r17 + extr.u r30=r14,8,24 // r30 = r14.rid + ;; + add r16=r26,r22 + add r17=r27,r22 + add r19=r28,r22 + add r20=r29,r22 + add r21=r30,r22 + ;; + cmp.geu p6,p0=r16,r23 // if r8.rid + starting_rid >= ending_rid + cmp.geu p7,p0=r17,r23 // if r9.rid + starting_rid >= ending_rid + cmp.geu p8,p0=r19,r23 // if r10.rid + starting_rid >= ending_rid +(p6) br.cond.spnt.few 1f // this is an error, but just ignore/return +(p7) br.cond.spnt.few 1f // this is an error, but just ignore/return + cmp.geu p9,p0=r20,r23 // if r11.rid + starting_rid >= ending_rid +(p8) br.cond.spnt.few 1f // this is an error, but just ignore/return +(p9) br.cond.spnt.few 1f // this is an error, but just ignore/return + cmp.geu p10,p0=r21,r23 // if r14.rid + starting_rid >= ending_rid +(p10) br.cond.spnt.few 1f // this is an error, but just ignore/return + + mov r25=1 + adds r22=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 + ;; + shl r30=r25,61 // r30 = 0x2000000000000000 + +#if 0 + // simple plain version + // rr0 + st8 [r22]=r8, 8 // current->rrs[0] = r8 + + mov r26=0 // r26=0x0000000000000000 + extr.u r27=r16,0,8 + extr.u r28=r16,8,8 + extr.u r29=r16,16,8;; + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + st8 [r24]=r23 // save for metaphysical + mov rr[r26]=r23 + dv_serialize_data + + // rr1 + st8 [r22]=r9, 8 // current->rrs[1] = r9 + add r26=r26,r30 // r26 = 0x2000000000000000 + extr.u r27=r17,0,8 + extr.u r28=r17,8,8 + extr.u r29=r17,16,8;; + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + mov rr[r26]=r23 + dv_serialize_data + + // rr2 + st8 [r22]=r10, 8 // current->rrs[2] = r10 + add r26=r26,r30 // r26 = 0x4000000000000000 + extr.u r27=r19,0,8 + extr.u r28=r19,8,8 + extr.u r29=r19,16,8;; + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + mov rr[r26]=r23 + dv_serialize_data + + // rr3 + st8 [r22]=r11, 8 // current->rrs[3] = r11 + + add r26=r26,r30 // r26 = 0x6000000000000000 + extr.u r27=r20,0,8 + extr.u r28=r20,8,8 + extr.u r29=r20,16,8;; + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + mov rr[r26]=r23 + dv_serialize_data + + // rr4 + st8 [r22]=r14 // current->rrs[4] = r14 + + add r26=r26,r30 // r26 = 0x8000000000000000 + extr.u r27=r21,0,8 + extr.u r28=r21,8,8 + extr.u r29=r21,16,8;; + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + mov rr[r26]=r23 + dv_serialize_data +#else + // shuffled version + // rr0 + // uses r27, r28, r29 for mangling + // r23 for mangled value + st8 [r22]=r8, 8 // current->rrs[0] = r8 + mov r26=0 // r26=0x0000000000000000 + extr.u r27=r16,0,8 + extr.u r28=r16,8,8 + extr.u r29=r16,16,8 + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + extr.u r25=r17,0,8 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + st8 [r24]=r23 // save for metaphysical + mov rr[r26]=r23 + dv_serialize_data + + // r16, r24, r25 is usable. + // rr1 + // uses r25, r28, r29 for mangling + // r23 for mangled value + extr.u r28=r17,8,8 + st8 [r22]=r9, 8 // current->rrs[1] = r9 + extr.u r29=r17,16,8 ;; + dep.z r23=PAGE_SHIFT,2,6;; + add r26=r26,r30 // r26 = 0x2000000000000000 + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + extr.u r24=r19,8,8 + extr.u r16=r19,0,8 + dep r23=r25,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + mov rr[r26]=r23 + dv_serialize_data + + // r16, r17, r24, r25 is usable + // rr2 + // uses r16, r24, r29 for mangling + // r17 for mangled value + extr.u r29=r19,16,8 + extr.u r27=r20,0,8 + st8 [r22]=r10, 8 // current->rrs[2] = r10 + dep.z r17=PAGE_SHIFT,2,6;; + add r26=r26,r30 // r26 = 0x4000000000000000 + dep r17=-1,r17,0,1;; // mangling is swapping bytes 1 & 3 + dep r17=r16,r17,24,8;; + dep r17=r24,r17,16,8;; + dep r17=r29,r17,8,8;; + mov rr[r26]=r17 + dv_serialize_data + + // r16, r17, r19, r24, r25 is usable + // rr3 + // uses r27, r28, r29 for mangling + // r23 for mangled value + extr.u r28=r20,8,8 + extr.u r29=r20,16,8 + st8 [r22]=r11, 8 // current->rrs[3] = r11 + extr.u r16=r21,0,8 + dep.z r23=PAGE_SHIFT,2,6;; + add r26=r26,r30 // r26 = 0x6000000000000000 + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8;; + mov rr[r26]=r23 + dv_serialize_data + + // r16, r17, r19, r20, r24, r25 + // rr4 + // uses r16, r17, r24 for mangling + // r25 for mangled value + extr.u r17=r21,8,8 + extr.u r24=r21,16,8 + st8 [r22]=r14 // current->rrs[4] = r14 + dep.z r25=PAGE_SHIFT,2,6;; + add r26=r26,r30 // r26 = 0x8000000000000000 + dep r25=-1,r25,0,1;; // mangling is swapping bytes 1 & 3 + dep r25=r16,r25,24,8;; + dep r25=r17,r25,16,8;; + dep r25=r24,r25,8,8;; + mov rr[r26]=r25 + dv_serialize_data +#endif + + // done, mosey on back +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_set_rr0_to_rr4) ENTRY(hyper_set_kr) extr.u r25=r8,3,61;; diff -r 4492a0285bae -r 82cecfd99f1b xen/arch/ia64/xen/privop.c --- a/xen/arch/ia64/xen/privop.c Fri Jul 27 08:15:16 2007 -0600 +++ b/xen/arch/ia64/xen/privop.c Mon Jul 30 12:45:34 2007 +0900 @@ -895,6 +895,10 @@ int ia64_hyperprivop(unsigned long iim, vcpu_get_psr_masked(v, &val); regs->r8 = val; return 1; + case HYPERPRIVOP_SET_RR0_TO_RR4: + vcpu_set_rr0_to_rr4(v, regs->r8, regs->r9, regs->r10, + regs->r11, regs->r14); + return 1; } return 0; } diff -r 4492a0285bae -r 82cecfd99f1b xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Fri Jul 27 08:15:16 2007 -0600 +++ b/xen/arch/ia64/xen/vcpu.c Mon Jul 30 12:45:34 2007 +0900 @@ -2051,6 +2051,30 @@ IA64FAULT vcpu_get_rr(VCPU * vcpu, u64 r return IA64_NO_FAULT; } +IA64FAULT vcpu_set_rr0_to_rr4(VCPU * vcpu, u64 val0, u64 val1, u64 val2, + u64 val3, u64 val4) +{ + u64 reg0 = 0x0000000000000000UL; + u64 reg1 = 0x2000000000000000UL; + u64 reg2 = 0x4000000000000000UL; + u64 reg3 = 0x6000000000000000UL; + u64 reg4 = 0x8000000000000000UL; + + PSCB(vcpu, rrs)[reg0 >> 61] = val0; + PSCB(vcpu, rrs)[reg1 >> 61] = val1; + PSCB(vcpu, rrs)[reg2 >> 61] = val2; + PSCB(vcpu, rrs)[reg3 >> 61] = val3; + PSCB(vcpu, rrs)[reg4 >> 61] = val4; + if (vcpu == current) { + set_one_rr(reg0, val0); + set_one_rr(reg1, val1); + set_one_rr(reg2, val2); + set_one_rr(reg3, val3); + set_one_rr(reg4, val4); + } + return IA64_NO_FAULT; +} + /************************************************************************** VCPU protection key register access routines **************************************************************************/ diff -r 4492a0285bae -r 82cecfd99f1b xen/include/asm-ia64/vcpu.h --- a/xen/include/asm-ia64/vcpu.h Fri Jul 27 08:15:16 2007 -0600 +++ b/xen/include/asm-ia64/vcpu.h Mon Jul 30 12:45:34 2007 +0900 @@ -124,6 +124,8 @@ extern IA64FAULT vcpu_set_rr(VCPU * vcpu extern IA64FAULT vcpu_set_rr(VCPU * vcpu, u64 reg, u64 val); extern IA64FAULT vcpu_get_rr(VCPU * vcpu, u64 reg, u64 * pval); extern IA64FAULT vcpu_get_rr_ve(VCPU * vcpu, u64 vadr); +extern IA64FAULT vcpu_set_rr0_to_rr4(VCPU * vcpu, u64 val0, u64 val1, + u64 val2, u64 val3, u64 val4); /* protection key registers */ extern IA64FAULT vcpu_get_pkr(VCPU * vcpu, u64 reg, u64 * pval); extern IA64FAULT vcpu_set_pkr(VCPU * vcpu, u64 reg, u64 val); diff -r 4492a0285bae -r 82cecfd99f1b xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Fri Jul 27 08:15:16 2007 -0600 +++ b/xen/include/public/arch-ia64.h Mon Jul 30 12:45:34 2007 +0900 @@ -544,7 +544,8 @@ struct xen_ia64_boot_param { #define HYPERPRIVOP_SET_EFLAG (HYPERPRIVOP_START + 0x16) #define HYPERPRIVOP_RSM_BE (HYPERPRIVOP_START + 0x17) #define HYPERPRIVOP_GET_PSR (HYPERPRIVOP_START + 0x18) -#define HYPERPRIVOP_MAX (0x19) +#define HYPERPRIVOP_SET_RR0_TO_RR4 (HYPERPRIVOP_START + 0x19) +#define HYPERPRIVOP_MAX (0x1a) /* Fast and light hypercalls. */ #define __HYPERVISOR_ia64_fast_eoi __HYPERVISOR_arch_1