WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH, RFC 4/5] x86: avoid unlikely taken forward branches

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH, RFC 4/5] x86: avoid unlikely taken forward branches
From: "Jan Beulich" <JBeulich@xxxxxxxxxx>
Date: Wed, 22 Dec 2010 12:17:42 +0000
Delivery-date: Wed, 22 Dec 2010 04:18:33 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
... since these get statically mis-predicted by most CPUs and increase
the cache footprint. This mostly concerns hypercall tracing and vm86
mode handling.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1330,7 +1330,7 @@ asmlinkage void do_page_fault(struct cpu
  * during early boot (an issue was seen once, but was most likely a hardware 
  * problem).
  */
-asmlinkage void do_early_page_fault(struct cpu_user_regs *regs)
+asmlinkage void __init do_early_page_fault(struct cpu_user_regs *regs)
 {
     static int stuck;
     static unsigned long prev_eip, prev_cr2;
--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -142,7 +142,7 @@ restore_all_xen:
 ENTRY(hypercall)
         subl $4,%esp
         FIXUP_RING0_GUEST_STACK
-        SAVE_ALL(1f,1f)
+        SAVE_ALL(,1f)
 1:      sti
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
@@ -182,12 +182,14 @@ ENTRY(hypercall)
 #define SHADOW_BYTES 24 /* 6 shadow parameters */
 #endif
         cmpb  $0,tb_init_done
-        je    1f
+UNLIKELY_START(ne, trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movl  UREGS_eax+SHADOW_BYTES(%esp),%eax /* Hypercall # */
+UNLIKELY_END(trace)
+        call *hypercall_table(,%eax,4)
+        movl  %eax,UREGS_eax+SHADOW_BYTES(%esp) # save the return value
 #undef SHADOW_BYTES
-1:      call *hypercall_table(,%eax,4)
         addl  $24,%esp     # Discard the shadow parameters
 #ifndef NDEBUG
         /* Deliberately corrupt real parameter regs used by this hypercall. */
@@ -197,13 +199,10 @@ ENTRY(hypercall)
         jne   skip_clobber # If EIP has changed then don't clobber
         movzb hypercall_args_table(,%ecx,1),%ecx
         movl  %esp,%edi
-        movl  %eax,%esi
         movl  $0xDEADBEEF,%eax
         rep   stosl
-        movl  %esi,%eax
 skip_clobber:
 #endif
-        movl %eax,UREGS_eax(%esp)       # save the return value
 
 test_all_events:
         xorl %ecx,%ecx
@@ -293,8 +292,8 @@ create_bounce_frame:
         jz   ring1 /* jump if returning to an existing ring-1 activation */
         movl VCPU_kernel_sp(%ebx),%esi
 .Lft6:  mov  VCPU_kernel_ss(%ebx),%gs
-        testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_1
+        testl $X86_EFLAGS_VM,%ecx
+UNLIKELY_START(nz, bounce_vm86_1)
         subl $16,%esi       /* push ES/DS/FS/GS (VM86 stack frame) */
         movl UREGS_es+4(%esp),%eax
 .Lft7:  movl %eax,%gs:(%esi)
@@ -304,7 +303,7 @@ create_bounce_frame:
 .Lft9:  movl %eax,%gs:8(%esi)
         movl UREGS_gs+4(%esp),%eax
 .Lft10: movl %eax,%gs:12(%esi)
-.Lnvm86_1:
+UNLIKELY_END(bounce_vm86_1)
         subl $8,%esi        /* push SS/ESP (inter-priv iret) */
         movl UREGS_esp+4(%esp),%eax
 .Lft11: movl %eax,%gs:(%esi)
@@ -346,17 +345,10 @@ ring1:  /* obtain ss/esp from oldss/olde
         movl TRAPBOUNCE_error_code(%edx),%eax
 .Lft17: movl %eax,%gs:(%esi)
 1:      testb $TBF_FAILSAFE,%cl
-        jz   2f
+UNLIKELY_START(nz, bounce_failsafe)
         subl $16,%esi                # add DS/ES/FS/GS to failsafe stack frame
         testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_2
-        xorl %eax,%eax               # VM86: we write zero selector values
-.Lft18: movl %eax,%gs:(%esi)
-.Lft19: movl %eax,%gs:4(%esi)
-.Lft20: movl %eax,%gs:8(%esi)
-.Lft21: movl %eax,%gs:12(%esi)
-        jmp  2f
-.Lnvm86_2:
+        jnz  .Lvm86_2
         movl UREGS_ds+4(%esp),%eax   # non-VM86: write real selector values
 .Lft22: movl %eax,%gs:(%esi)
         movl UREGS_es+4(%esp),%eax
@@ -365,13 +357,22 @@ ring1:  /* obtain ss/esp from oldss/olde
 .Lft24: movl %eax,%gs:8(%esi)
         movl UREGS_gs+4(%esp),%eax
 .Lft25: movl %eax,%gs:12(%esi)
-2:      testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
-        jz   .Lnvm86_3
+        jmp  .Lnvm86_3
+.Lvm86_2:
+        xorl %eax,%eax               # VM86: we write zero selector values
+.Lft18: movl %eax,%gs:(%esi)
+.Lft19: movl %eax,%gs:4(%esi)
+.Lft20: movl %eax,%gs:8(%esi)
+.Lft21: movl %eax,%gs:12(%esi)
+UNLIKELY_END(bounce_failsafe)
+        testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
+UNLIKELY_START(nz, bounce_vm86_3)
         xorl %eax,%eax      /* zero DS-GS, just as a real CPU would */
         movl %eax,UREGS_ds+4(%esp)
         movl %eax,UREGS_es+4(%esp)
         movl %eax,UREGS_fs+4(%esp)
         movl %eax,UREGS_gs+4(%esp)
+UNLIKELY_END(bounce_vm86_3)
 .Lnvm86_3:
         /* Rewrite our stack frame and return to ring 1. */
         /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
@@ -564,6 +565,7 @@ ENTRY(spurious_interrupt_bug)
         pushl $TRAP_spurious_int<<16
         jmp   handle_exception
 
+       .pushsection .init.text, "ax", @progbits
 ENTRY(early_page_fault)
         SAVE_ALL(1f,1f)
 1:      movl  %esp,%eax
@@ -571,6 +573,7 @@ ENTRY(early_page_fault)
         call  do_early_page_fault
         addl  $4,%esp
         jmp   restore_all_xen
+       .popsection
 
 handle_nmi_mce:
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -49,7 +49,7 @@ ENTRY(compat_hypercall)
 #define SHADOW_BYTES 0  /* No on-stack shadow state */
 #endif
         cmpb  $0,tb_init_done(%rip)
-        je    1f
+UNLIKELY_START(ne, compat_trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movl  UREGS_rax+SHADOW_BYTES(%rsp),%eax   /* Hypercall #  */
@@ -60,7 +60,8 @@ ENTRY(compat_hypercall)
         movl  UREGS_rdi+SHADOW_BYTES(%rsp),%r8d   /* Arg 5        */
         movl  UREGS_rbp+SHADOW_BYTES(%rsp),%r9d   /* Arg 6        */
 #undef SHADOW_BYTES
-1:      leaq  compat_hypercall_table(%rip),%r10
+UNLIKELY_END(compat_trace)
+        leaq  compat_hypercall_table(%rip),%r10
         PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
@@ -295,7 +296,7 @@ compat_create_bounce_frame:
 .Lft8:  movl  %eax,%fs:(%rsi)           # ERROR CODE
 1:
         testb $TBF_FAILSAFE,%cl
-        jz    2f
+UNLIKELY_START(nz, compat_bounce_failsafe)
         subl  $4*4,%esi
         movl  %gs,%eax
 .Lft9:  movl  %eax,%fs:3*4(%rsi)        # GS
@@ -304,7 +305,7 @@ compat_create_bounce_frame:
 .Lft11: movl  %eax,%fs:1*4(%rsi)        # ES
         movl  %ds,%eax
 .Lft12: movl  %eax,%fs:0*4(%rsi)        # DS
-2:
+UNLIKELY_END(compat_bounce_failsafe)
         /* Rewrite our stack frame and return to guest-OS mode. */
         /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
         andl  $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -148,7 +148,7 @@ ENTRY(syscall_enter)
 #define SHADOW_BYTES 0  /* No on-stack shadow state */
 #endif
         cmpb  $0,tb_init_done(%rip)
-        je    1f
+UNLIKELY_START(ne, trace)
         call  trace_hypercall
         /* Now restore all the registers that trace_hypercall clobbered */
         movq  UREGS_rax+SHADOW_BYTES(%rsp),%rax   /* Hypercall #  */
@@ -159,7 +159,8 @@ ENTRY(syscall_enter)
         movq  UREGS_r8 +SHADOW_BYTES(%rsp),%r8    /* Arg 5        */
         movq  UREGS_r9 +SHADOW_BYTES(%rsp),%r9    /* Arg 6        */
 #undef SHADOW_BYTES
-1:      leaq  hypercall_table(%rip),%r10
+UNLIKELY_END(trace)
+        leaq  hypercall_table(%rip),%r10
         PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
@@ -341,11 +342,12 @@ create_bounce_frame:
 2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
         movq  $HYPERVISOR_VIRT_START,%rax
         cmpq  %rax,%rsi
-        jb    1f                        # In +ve address space? Then okay.
         movq  $HYPERVISOR_VIRT_END+60,%rax
+        sbb   %ecx,%ecx                 # In +ve address space? Then okay.
         cmpq  %rax,%rsi
-        jb    domain_crash_synchronous  # Above Xen private area? Then okay.
-1:      movb  TRAPBOUNCE_flags(%rdx),%cl
+        adc   %ecx,%ecx                 # Above Xen private area? Then okay.
+        jg    domain_crash_synchronous
+        movb  TRAPBOUNCE_flags(%rdx),%cl
         subq  $40,%rsi
         movq  UREGS_ss+8(%rsp),%rax
 .Lft2:  movq  %rax,32(%rsi)             # SS
@@ -376,7 +378,7 @@ create_bounce_frame:
         movl  TRAPBOUNCE_error_code(%rdx),%eax
 .Lft7:  movq  %rax,(%rsi)               # ERROR CODE
 1:      testb $TBF_FAILSAFE,%cl
-        jz    2f
+UNLIKELY_START(nz, bounce_failsafe)
         subq  $32,%rsi
         movl  %gs,%eax
 .Lft8:  movq  %rax,24(%rsi)             # GS
@@ -386,7 +388,8 @@ create_bounce_frame:
 .Lft10: movq  %rax,8(%rsi)              # ES
         movl  %ds,%eax
 .Lft11: movq  %rax,(%rsi)               # DS
-2:      subq  $16,%rsi
+UNLIKELY_END(bounce_failsafe)
+        subq  $16,%rsi
         movq  UREGS_r11+8(%rsp),%rax
 .Lft12: movq  %rax,8(%rsi)              # R11
         movq  UREGS_rcx+8(%rsp),%rax
@@ -601,11 +604,13 @@ ENTRY(double_fault)
         call  do_double_fault
         ud2
 
+       .pushsection .init.text, "ax", @progbits
 ENTRY(early_page_fault)
         SAVE_ALL
         movq  %rsp,%rdi
         call  do_early_page_fault
         jmp   restore_all_xen
+       .popsection
 
 handle_ist_exception:
         SAVE_ALL
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -32,4 +32,18 @@
 #define _ASM_EXTABLE(from, to)     _ASM__EXTABLE(, from, to)
 #define _ASM_PRE_EXTABLE(from, to) _ASM__EXTABLE(.pre, from, to)
 
+#ifdef __ASSEMBLY__
+
+#define UNLIKELY_START(cond, tag)                      \
+       j##cond .Lunlikely.tag;                         \
+       .subsection 1;                                  \
+       .Lunlikely.tag:
+
+#define UNLIKELY_END(tag)                              \
+       jmp .Llikely.tag;                               \
+       .subsection 0;                                  \
+       .Llikely.tag:
+
+#endif
+
 #endif /* __X86_ASM_DEFNS_H__ */
--- a/xen/include/asm-x86/x86_32/asm_defns.h
+++ b/xen/include/asm-x86/x86_32/asm_defns.h
@@ -1,6 +1,7 @@
 #ifndef __X86_32_ASM_DEFNS_H__
 #define __X86_32_ASM_DEFNS_H__
 
+#include <xen/stringify.h>
 #include <asm/percpu.h>
 
 #ifdef CONFIG_FRAME_POINTER
@@ -53,12 +54,14 @@
         mov   %es,%esi;                                 \
         mov   $(__HYPERVISOR_DS),%ecx;                  \
         jnz   86f;                                      \
-        .text 1;                                        \
+        .subsection 1;                                  \
         86:   call setup_vm86_frame;                    \
         jmp   vm86_lbl;                                 \
         .previous;                                      \
+        .ifnes __stringify(xen_lbl), "";                \
         testb $3,UREGS_cs(%esp);                        \
         jz    xen_lbl;                                  \
+        .endif;                                         \
         /*                                              \
          * We are the outermost Xen context, but our    \
          * life is complicated by NMIs and MCEs. These  \
--- /dev/null
+++ b/xen/include/xen/stringify.h
@@ -0,0 +1,12 @@
+#ifndef __XEN_STRINGIFY_H
+#define __XEN_STRINGIFY_H
+
+/* Indirect stringification.  Doing two levels allows the parameter to be a
+ * macro itself.  For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...)    #x
+#define __stringify(x...)      __stringify_1(x)
+
+#endif /* !__XEN_STRINGIFY_H */


Attachment: x86-forward-branches.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>