WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86/asm: allow some unlikely taken branch

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86/asm: allow some unlikely taken branches to be statically predicted this way
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 23 Dec 2010 05:35:05 -0800
Delivery-date: Thu, 23 Dec 2010 05:47:00 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1292524640 0
# Node ID 8dc27840025c966cb57aaa1d56e61c69d5178189
# Parent  774908fc38221ccc106254c9a046ef3131482c4a
x86/asm: allow some unlikely taken branches to be statically predicted this way

... by moving the respective code out of line (into sub-section 1 of
the particular section). A few other branches could be eliminated
altogether.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/x86_32/entry.S            |   41 ++++++++++++++++-----------------
 xen/arch/x86/x86_64/compat/entry.S     |    9 ++++---
 xen/arch/x86/x86_64/entry.S            |   17 ++++++++-----
 xen/include/asm-x86/asm_defns.h        |   14 +++++++++++
 xen/include/asm-x86/x86_32/asm_defns.h |    5 +++-
 xen/include/xen/stringify.h            |   12 +++++++++
 6 files changed, 66 insertions(+), 32 deletions(-)

diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/arch/x86/x86_32/entry.S       Thu Dec 16 18:37:20 2010 +0000
@@ -146,7 +146,7 @@ ENTRY(hypercall)
 ENTRY(hypercall)
         subl $4,%esp
         FIXUP_RING0_GUEST_STACK
-        SAVE_ALL(1f,1f)
+        SAVE_ALL(,1f)
 1:      sti
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
@@ -186,12 +186,14 @@ 1:      sti
 #define SHADOW_BYTES 24 /* 6 shadow parameters */
 #endif
         cmpb  $0,tb_init_done
-        je    1f
+UNLIKELY_START(ne, trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movl  UREGS_eax+SHADOW_BYTES(%esp),%eax /* Hypercall # */
+UNLIKELY_END(trace)
+        call *hypercall_table(,%eax,4)
+        movl  %eax,UREGS_eax+SHADOW_BYTES(%esp) # save the return value
 #undef SHADOW_BYTES
-1:      call *hypercall_table(,%eax,4)
         addl  $24,%esp     # Discard the shadow parameters
 #ifndef NDEBUG
         /* Deliberately corrupt real parameter regs used by this hypercall. */
@@ -201,13 +203,10 @@ 1:      call *hypercall_table(,%eax,4)
         jne   skip_clobber # If EIP has changed then don't clobber
         movzb hypercall_args_table(,%ecx,1),%ecx
         movl  %esp,%edi
-        movl  %eax,%esi
         movl  $0xDEADBEEF,%eax
         rep   stosl
-        movl  %esi,%eax
 skip_clobber:
 #endif
-        movl %eax,UREGS_eax(%esp)       # save the return value
 
 test_all_events:
         xorl %ecx,%ecx
@@ -297,8 +296,8 @@ create_bounce_frame:
         jz   ring1 /* jump if returning to an existing ring-1 activation */
         movl VCPU_kernel_sp(%ebx),%esi
 .Lft6:  mov  VCPU_kernel_ss(%ebx),%gs
-        testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_1
+        testl $X86_EFLAGS_VM,%ecx
+UNLIKELY_START(nz, bounce_vm86_1)
         subl $16,%esi       /* push ES/DS/FS/GS (VM86 stack frame) */
         movl UREGS_es+4(%esp),%eax
 .Lft7:  movl %eax,%gs:(%esi)
@@ -308,7 +307,7 @@ create_bounce_frame:
 .Lft9:  movl %eax,%gs:8(%esi)
         movl UREGS_gs+4(%esp),%eax
 .Lft10: movl %eax,%gs:12(%esi)
-.Lnvm86_1:
+UNLIKELY_END(bounce_vm86_1)
         subl $8,%esi        /* push SS/ESP (inter-priv iret) */
         movl UREGS_esp+4(%esp),%eax
 .Lft11: movl %eax,%gs:(%esi)
@@ -350,17 +349,10 @@ 1:      /* Construct a stack frame: EFLA
         movl TRAPBOUNCE_error_code(%edx),%eax
 .Lft17: movl %eax,%gs:(%esi)
 1:      testb $TBF_FAILSAFE,%cl
-        jz   2f
+UNLIKELY_START(nz, bounce_failsafe)
         subl $16,%esi                # add DS/ES/FS/GS to failsafe stack frame
         testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_2
-        xorl %eax,%eax               # VM86: we write zero selector values
-.Lft18: movl %eax,%gs:(%esi)
-.Lft19: movl %eax,%gs:4(%esi)
-.Lft20: movl %eax,%gs:8(%esi)
-.Lft21: movl %eax,%gs:12(%esi)
-        jmp  2f
-.Lnvm86_2:
+        jnz  .Lvm86_2
         movl UREGS_ds+4(%esp),%eax   # non-VM86: write real selector values
 .Lft22: movl %eax,%gs:(%esi)
         movl UREGS_es+4(%esp),%eax
@@ -369,13 +361,22 @@ 1:      testb $TBF_FAILSAFE,%cl
 .Lft24: movl %eax,%gs:8(%esi)
         movl UREGS_gs+4(%esp),%eax
 .Lft25: movl %eax,%gs:12(%esi)
-2:      testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_3
+        jmp  .Lnvm86_3
+.Lvm86_2:
+        xorl %eax,%eax               # VM86: we write zero selector values
+.Lft18: movl %eax,%gs:(%esi)
+.Lft19: movl %eax,%gs:4(%esi)
+.Lft20: movl %eax,%gs:8(%esi)
+.Lft21: movl %eax,%gs:12(%esi)
+UNLIKELY_END(bounce_failsafe)
+        testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
+UNLIKELY_START(nz, bounce_vm86_3)
         xorl %eax,%eax      /* zero DS-GS, just as a real CPU would */
         movl %eax,UREGS_ds+4(%esp)
         movl %eax,UREGS_es+4(%esp)
         movl %eax,UREGS_fs+4(%esp)
         movl %eax,UREGS_gs+4(%esp)
+UNLIKELY_END(bounce_vm86_3)
 .Lnvm86_3:
         /* Rewrite our stack frame and return to ring 1. */
         /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_64/compat/entry.S
--- a/xen/arch/x86/x86_64/compat/entry.S        Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/arch/x86/x86_64/compat/entry.S        Thu Dec 16 18:37:20 2010 +0000
@@ -49,7 +49,7 @@ ENTRY(compat_hypercall)
 #define SHADOW_BYTES 0  /* No on-stack shadow state */
 #endif
         cmpb  $0,tb_init_done(%rip)
-        je    1f
+UNLIKELY_START(ne, compat_trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movl  UREGS_rax+SHADOW_BYTES(%rsp),%eax   /* Hypercall #  */
@@ -60,7 +60,8 @@ ENTRY(compat_hypercall)
         movl  UREGS_rdi+SHADOW_BYTES(%rsp),%r8d   /* Arg 5        */
         movl  UREGS_rbp+SHADOW_BYTES(%rsp),%r9d   /* Arg 6        */
 #undef SHADOW_BYTES
-1:      leaq  compat_hypercall_table(%rip),%r10
+UNLIKELY_END(compat_trace)
+        leaq  compat_hypercall_table(%rip),%r10
         PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
@@ -299,7 +300,7 @@ 2:
 .Lft8:  movl  %eax,%fs:(%rsi)           # ERROR CODE
 1:
         testb $TBF_FAILSAFE,%cl
-        jz    2f
+UNLIKELY_START(nz, compat_bounce_failsafe)
         subl  $4*4,%esi
         movl  %gs,%eax
 .Lft9:  movl  %eax,%fs:3*4(%rsi)        # GS
@@ -308,7 +309,7 @@ 1:
 .Lft11: movl  %eax,%fs:1*4(%rsi)        # ES
         movl  %ds,%eax
 .Lft12: movl  %eax,%fs:0*4(%rsi)        # DS
-2:
+UNLIKELY_END(compat_bounce_failsafe)
         /* Rewrite our stack frame and return to guest-OS mode. */
         /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
         andl  $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\
diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/arch/x86/x86_64/entry.S       Thu Dec 16 18:37:20 2010 +0000
@@ -152,7 +152,7 @@ ENTRY(syscall_enter)
 #define SHADOW_BYTES 0  /* No on-stack shadow state */
 #endif
         cmpb  $0,tb_init_done(%rip)
-        je    1f
+UNLIKELY_START(ne, trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movq  UREGS_rax+SHADOW_BYTES(%rsp),%rax   /* Hypercall #  */
@@ -163,7 +163,8 @@ ENTRY(syscall_enter)
         movq  UREGS_r8 +SHADOW_BYTES(%rsp),%r8    /* Arg 5        */
         movq  UREGS_r9 +SHADOW_BYTES(%rsp),%r9    /* Arg 6        */
 #undef SHADOW_BYTES
-1:      leaq  hypercall_table(%rip),%r10
+UNLIKELY_END(trace)
+        leaq  hypercall_table(%rip),%r10
         PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
@@ -345,11 +346,12 @@ 2:      andq  $~0xf,%rsi                
 2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
         movq  $HYPERVISOR_VIRT_START,%rax
         cmpq  %rax,%rsi
-        jb    1f                        # In +ve address space? Then okay.
         movq  $HYPERVISOR_VIRT_END+60,%rax
+        sbb   %ecx,%ecx                 # In +ve address space? Then okay.
         cmpq  %rax,%rsi
-        jb    domain_crash_synchronous  # Above Xen private area? Then okay.
-1:      movb  TRAPBOUNCE_flags(%rdx),%cl
+        adc   %ecx,%ecx                 # Above Xen private area? Then okay.
+        jg    domain_crash_synchronous
+        movb  TRAPBOUNCE_flags(%rdx),%cl
         subq  $40,%rsi
         movq  UREGS_ss+8(%rsp),%rax
 .Lft2:  movq  %rax,32(%rsi)             # SS
@@ -380,7 +382,7 @@ 1:      movb  TRAPBOUNCE_flags(%rdx),%cl
         movl  TRAPBOUNCE_error_code(%rdx),%eax
 .Lft7:  movq  %rax,(%rsi)               # ERROR CODE
 1:      testb $TBF_FAILSAFE,%cl
-        jz    2f
+UNLIKELY_START(nz, bounce_failsafe)
         subq  $32,%rsi
         movl  %gs,%eax
 .Lft8:  movq  %rax,24(%rsi)             # GS
@@ -390,7 +392,8 @@ 1:      testb $TBF_FAILSAFE,%cl
 .Lft10: movq  %rax,8(%rsi)              # ES
         movl  %ds,%eax
 .Lft11: movq  %rax,(%rsi)               # DS
-2:      subq  $16,%rsi
+UNLIKELY_END(bounce_failsafe)
+        subq  $16,%rsi
         movq  UREGS_r11+8(%rsp),%rax
 .Lft12: movq  %rax,8(%rsi)              # R11
         movq  UREGS_rcx+8(%rsp),%rax
diff -r 774908fc3822 -r 8dc27840025c xen/include/asm-x86/asm_defns.h
--- a/xen/include/asm-x86/asm_defns.h   Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/include/asm-x86/asm_defns.h   Thu Dec 16 18:37:20 2010 +0000
@@ -12,4 +12,18 @@
 #include <asm/x86_32/asm_defns.h>
 #endif
 
+#ifdef __ASSEMBLY__
+
+#define UNLIKELY_START(cond, tag) \
+        j##cond .Lunlikely.tag;   \
+        .subsection 1;            \
+        .Lunlikely.tag:
+
+#define UNLIKELY_END(tag)         \
+        jmp .Llikely.tag;         \
+        .subsection 0;            \
+        .Llikely.tag:
+
+#endif
+
 #endif /* __X86_ASM_DEFNS_H__ */
diff -r 774908fc3822 -r 8dc27840025c xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h    Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/include/asm-x86/x86_32/asm_defns.h    Thu Dec 16 18:37:20 2010 +0000
@@ -1,6 +1,7 @@
 #ifndef __X86_32_ASM_DEFNS_H__
 #define __X86_32_ASM_DEFNS_H__
 
+#include <xen/stringify.h>
 #include <asm/percpu.h>
 
 #ifdef CONFIG_FRAME_POINTER
@@ -53,12 +54,14 @@ 1:      addl  $4,%esp;
         mov   %es,%esi;                                 \
         mov   $(__HYPERVISOR_DS),%ecx;                  \
         jnz   86f;                                      \
-        .text 1;                                        \
+        .subsection 1;                                  \
         86:   call setup_vm86_frame;                    \
         jmp   vm86_lbl;                                 \
         .previous;                                      \
+        .ifnes __stringify(xen_lbl), "";                \
         testb $3,UREGS_cs(%esp);                        \
         jz    xen_lbl;                                  \
+        .endif;                                         \
         /*                                              \
          * We are the outermost Xen context, but our    \
          * life is complicated by NMIs and MCEs. These  \
diff -r 774908fc3822 -r 8dc27840025c xen/include/xen/stringify.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/stringify.h       Thu Dec 16 18:37:20 2010 +0000
@@ -0,0 +1,12 @@
+#ifndef __XEN_STRINGIFY_H
+#define __XEN_STRINGIFY_H
+
+/* Indirect stringification.  Doing two levels allows the parameter to be a
+ * macro itself.  For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...) #x
+#define __stringify(x...)   __stringify_1(x)
+
+#endif /* !__XEN_STRINGIFY_H */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86/asm: allow some unlikely taken branches to be statically predicted this way, Xen patchbot-unstable <=