# HG changeset patch
# User Keir Fraser <keir@xxxxxxx>
# Date 1292524640 0
# Node ID 8dc27840025c966cb57aaa1d56e61c69d5178189
# Parent 774908fc38221ccc106254c9a046ef3131482c4a
x86/asm: allow some unlikely taken branches to be statically predicted this way
... by moving the respective code out of line (into sub-section 1 of
the particular section). A few other branches could be eliminated
altogether.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
xen/arch/x86/x86_32/entry.S | 41 ++++++++++++++++-----------------
xen/arch/x86/x86_64/compat/entry.S | 9 ++++---
xen/arch/x86/x86_64/entry.S | 17 ++++++++-----
xen/include/asm-x86/asm_defns.h | 14 +++++++++++
xen/include/asm-x86/x86_32/asm_defns.h | 5 +++-
xen/include/xen/stringify.h | 12 +++++++++
6 files changed, 66 insertions(+), 32 deletions(-)
diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/arch/x86/x86_32/entry.S Thu Dec 16 18:37:20 2010 +0000
@@ -146,7 +146,7 @@ ENTRY(hypercall)
ENTRY(hypercall)
subl $4,%esp
FIXUP_RING0_GUEST_STACK
- SAVE_ALL(1f,1f)
+ SAVE_ALL(,1f)
1: sti
GET_CURRENT(%ebx)
cmpl $NR_hypercalls,%eax
@@ -186,12 +186,14 @@ 1: sti
#define SHADOW_BYTES 24 /* 6 shadow parameters */
#endif
cmpb $0,tb_init_done
- je 1f
+UNLIKELY_START(ne, trace)
call trace_hypercall
/* Now restore all the registers that trace_hypercall clobbered */
movl UREGS_eax+SHADOW_BYTES(%esp),%eax /* Hypercall # */
+UNLIKELY_END(trace)
+ call *hypercall_table(,%eax,4)
+ movl %eax,UREGS_eax+SHADOW_BYTES(%esp) # save the return value
#undef SHADOW_BYTES
-1: call *hypercall_table(,%eax,4)
addl $24,%esp # Discard the shadow parameters
#ifndef NDEBUG
/* Deliberately corrupt real parameter regs used by this hypercall. */
@@ -201,13 +203,10 @@ 1: call *hypercall_table(,%eax,4)
jne skip_clobber # If EIP has changed then don't clobber
movzb hypercall_args_table(,%ecx,1),%ecx
movl %esp,%edi
- movl %eax,%esi
movl $0xDEADBEEF,%eax
rep stosl
- movl %esi,%eax
skip_clobber:
#endif
- movl %eax,UREGS_eax(%esp) # save the return value
test_all_events:
xorl %ecx,%ecx
@@ -297,8 +296,8 @@ create_bounce_frame:
jz ring1 /* jump if returning to an existing ring-1 activation */
movl VCPU_kernel_sp(%ebx),%esi
.Lft6: mov VCPU_kernel_ss(%ebx),%gs
- testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
- jz .Lnvm86_1
+ testl $X86_EFLAGS_VM,%ecx
+UNLIKELY_START(nz, bounce_vm86_1)
subl $16,%esi /* push ES/DS/FS/GS (VM86 stack frame) */
movl UREGS_es+4(%esp),%eax
.Lft7: movl %eax,%gs:(%esi)
@@ -308,7 +307,7 @@ create_bounce_frame:
.Lft9: movl %eax,%gs:8(%esi)
movl UREGS_gs+4(%esp),%eax
.Lft10: movl %eax,%gs:12(%esi)
-.Lnvm86_1:
+UNLIKELY_END(bounce_vm86_1)
subl $8,%esi /* push SS/ESP (inter-priv iret) */
movl UREGS_esp+4(%esp),%eax
.Lft11: movl %eax,%gs:(%esi)
@@ -350,17 +349,10 @@ 1: /* Construct a stack frame: EFLA
movl TRAPBOUNCE_error_code(%edx),%eax
.Lft17: movl %eax,%gs:(%esi)
1: testb $TBF_FAILSAFE,%cl
- jz 2f
+UNLIKELY_START(nz, bounce_failsafe)
subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame
testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
- jz .Lnvm86_2
- xorl %eax,%eax # VM86: we write zero selector values
-.Lft18: movl %eax,%gs:(%esi)
-.Lft19: movl %eax,%gs:4(%esi)
-.Lft20: movl %eax,%gs:8(%esi)
-.Lft21: movl %eax,%gs:12(%esi)
- jmp 2f
-.Lnvm86_2:
+ jnz .Lvm86_2
movl UREGS_ds+4(%esp),%eax # non-VM86: write real selector values
.Lft22: movl %eax,%gs:(%esi)
movl UREGS_es+4(%esp),%eax
@@ -369,13 +361,22 @@ 1: testb $TBF_FAILSAFE,%cl
.Lft24: movl %eax,%gs:8(%esi)
movl UREGS_gs+4(%esp),%eax
.Lft25: movl %eax,%gs:12(%esi)
-2: testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
- jz .Lnvm86_3
+ jmp .Lnvm86_3
+.Lvm86_2:
+ xorl %eax,%eax # VM86: we write zero selector values
+.Lft18: movl %eax,%gs:(%esi)
+.Lft19: movl %eax,%gs:4(%esi)
+.Lft20: movl %eax,%gs:8(%esi)
+.Lft21: movl %eax,%gs:12(%esi)
+UNLIKELY_END(bounce_failsafe)
+ testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
+UNLIKELY_START(nz, bounce_vm86_3)
xorl %eax,%eax /* zero DS-GS, just as a real CPU would */
movl %eax,UREGS_ds+4(%esp)
movl %eax,UREGS_es+4(%esp)
movl %eax,UREGS_fs+4(%esp)
movl %eax,UREGS_gs+4(%esp)
+UNLIKELY_END(bounce_vm86_3)
.Lnvm86_3:
/* Rewrite our stack frame and return to ring 1. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_64/compat/entry.S
--- a/xen/arch/x86/x86_64/compat/entry.S Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/arch/x86/x86_64/compat/entry.S Thu Dec 16 18:37:20 2010 +0000
@@ -49,7 +49,7 @@ ENTRY(compat_hypercall)
#define SHADOW_BYTES 0 /* No on-stack shadow state */
#endif
cmpb $0,tb_init_done(%rip)
- je 1f
+UNLIKELY_START(ne, compat_trace)
call trace_hypercall
/* Now restore all the registers that trace_hypercall clobbered */
movl UREGS_rax+SHADOW_BYTES(%rsp),%eax /* Hypercall # */
@@ -60,7 +60,8 @@ ENTRY(compat_hypercall)
movl UREGS_rdi+SHADOW_BYTES(%rsp),%r8d /* Arg 5 */
movl UREGS_rbp+SHADOW_BYTES(%rsp),%r9d /* Arg 6 */
#undef SHADOW_BYTES
-1: leaq compat_hypercall_table(%rip),%r10
+UNLIKELY_END(compat_trace)
+ leaq compat_hypercall_table(%rip),%r10
PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
callq *(%r10,%rax,8)
#ifndef NDEBUG
@@ -299,7 +300,7 @@ 2:
.Lft8: movl %eax,%fs:(%rsi) # ERROR CODE
1:
testb $TBF_FAILSAFE,%cl
- jz 2f
+UNLIKELY_START(nz, compat_bounce_failsafe)
subl $4*4,%esi
movl %gs,%eax
.Lft9: movl %eax,%fs:3*4(%rsi) # GS
@@ -308,7 +309,7 @@ 1:
.Lft11: movl %eax,%fs:1*4(%rsi) # ES
movl %ds,%eax
.Lft12: movl %eax,%fs:0*4(%rsi) # DS
-2:
+UNLIKELY_END(compat_bounce_failsafe)
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\
diff -r 774908fc3822 -r 8dc27840025c xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/arch/x86/x86_64/entry.S Thu Dec 16 18:37:20 2010 +0000
@@ -152,7 +152,7 @@ ENTRY(syscall_enter)
#define SHADOW_BYTES 0 /* No on-stack shadow state */
#endif
cmpb $0,tb_init_done(%rip)
- je 1f
+UNLIKELY_START(ne, trace)
call trace_hypercall
/* Now restore all the registers that trace_hypercall clobbered */
movq UREGS_rax+SHADOW_BYTES(%rsp),%rax /* Hypercall # */
@@ -163,7 +163,8 @@ ENTRY(syscall_enter)
movq UREGS_r8 +SHADOW_BYTES(%rsp),%r8 /* Arg 5 */
movq UREGS_r9 +SHADOW_BYTES(%rsp),%r9 /* Arg 6 */
#undef SHADOW_BYTES
-1: leaq hypercall_table(%rip),%r10
+UNLIKELY_END(trace)
+ leaq hypercall_table(%rip),%r10
PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
callq *(%r10,%rax,8)
#ifndef NDEBUG
@@ -345,11 +346,12 @@ 2: andq $~0xf,%rsi
2: andq $~0xf,%rsi # Stack frames are 16-byte aligned.
movq $HYPERVISOR_VIRT_START,%rax
cmpq %rax,%rsi
- jb 1f # In +ve address space? Then okay.
movq $HYPERVISOR_VIRT_END+60,%rax
+ sbb %ecx,%ecx # In +ve address space? Then okay.
cmpq %rax,%rsi
- jb domain_crash_synchronous # Above Xen private area? Then okay.
-1: movb TRAPBOUNCE_flags(%rdx),%cl
+ adc %ecx,%ecx # Above Xen private area? Then okay.
+ jg domain_crash_synchronous
+ movb TRAPBOUNCE_flags(%rdx),%cl
subq $40,%rsi
movq UREGS_ss+8(%rsp),%rax
.Lft2: movq %rax,32(%rsi) # SS
@@ -380,7 +382,7 @@ 1: movb TRAPBOUNCE_flags(%rdx),%cl
movl TRAPBOUNCE_error_code(%rdx),%eax
.Lft7: movq %rax,(%rsi) # ERROR CODE
1: testb $TBF_FAILSAFE,%cl
- jz 2f
+UNLIKELY_START(nz, bounce_failsafe)
subq $32,%rsi
movl %gs,%eax
.Lft8: movq %rax,24(%rsi) # GS
@@ -390,7 +392,8 @@ 1: testb $TBF_FAILSAFE,%cl
.Lft10: movq %rax,8(%rsi) # ES
movl %ds,%eax
.Lft11: movq %rax,(%rsi) # DS
-2: subq $16,%rsi
+UNLIKELY_END(bounce_failsafe)
+ subq $16,%rsi
movq UREGS_r11+8(%rsp),%rax
.Lft12: movq %rax,8(%rsi) # R11
movq UREGS_rcx+8(%rsp),%rax
diff -r 774908fc3822 -r 8dc27840025c xen/include/asm-x86/asm_defns.h
--- a/xen/include/asm-x86/asm_defns.h Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/include/asm-x86/asm_defns.h Thu Dec 16 18:37:20 2010 +0000
@@ -12,4 +12,18 @@
#include <asm/x86_32/asm_defns.h>
#endif
+#ifdef __ASSEMBLY__
+
+#define UNLIKELY_START(cond, tag) \
+ j##cond .Lunlikely.tag; \
+ .subsection 1; \
+ .Lunlikely.tag:
+
+#define UNLIKELY_END(tag) \
+ jmp .Llikely.tag; \
+ .subsection 0; \
+ .Llikely.tag:
+
+#endif
+
#endif /* __X86_ASM_DEFNS_H__ */
diff -r 774908fc3822 -r 8dc27840025c xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h Thu Dec 16 18:25:33 2010 +0000
+++ b/xen/include/asm-x86/x86_32/asm_defns.h Thu Dec 16 18:37:20 2010 +0000
@@ -1,6 +1,7 @@
#ifndef __X86_32_ASM_DEFNS_H__
#define __X86_32_ASM_DEFNS_H__
+#include <xen/stringify.h>
#include <asm/percpu.h>
#ifdef CONFIG_FRAME_POINTER
@@ -53,12 +54,14 @@ 1: addl $4,%esp;
mov %es,%esi; \
mov $(__HYPERVISOR_DS),%ecx; \
jnz 86f; \
- .text 1; \
+ .subsection 1; \
86: call setup_vm86_frame; \
jmp vm86_lbl; \
.previous; \
+ .ifnes __stringify(xen_lbl), ""; \
testb $3,UREGS_cs(%esp); \
jz xen_lbl; \
+ .endif; \
/* \
* We are the outermost Xen context, but our \
* life is complicated by NMIs and MCEs. These \
diff -r 774908fc3822 -r 8dc27840025c xen/include/xen/stringify.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/stringify.h Thu Dec 16 18:37:20 2010 +0000
@@ -0,0 +1,12 @@
+#ifndef __XEN_STRINGIFY_H
+#define __XEN_STRINGIFY_H
+
+/* Indirect stringification. Doing two levels allows the parameter to be a
+ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#endif /* !__XEN_STRINGIFY_H */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|