... since these get statically mis-predicted by most CPUs and increase
the cache footprint. This mostly concerns hypercall tracing and vm86
mode handling.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1330,7 +1330,7 @@ asmlinkage void do_page_fault(struct cpu
* during early boot (an issue was seen once, but was most likely a hardware
* problem).
*/
-asmlinkage void do_early_page_fault(struct cpu_user_regs *regs)
+asmlinkage void __init do_early_page_fault(struct cpu_user_regs *regs)
{
static int stuck;
static unsigned long prev_eip, prev_cr2;
--- a/xen/arch/x86/x86_32/entry.S
+++ b/xen/arch/x86/x86_32/entry.S
@@ -142,7 +142,7 @@ restore_all_xen:
ENTRY(hypercall)
subl $4,%esp
FIXUP_RING0_GUEST_STACK
- SAVE_ALL(1f,1f)
+ SAVE_ALL(,1f)
1: sti
GET_CURRENT(%ebx)
cmpl $NR_hypercalls,%eax
@@ -182,12 +182,14 @@ ENTRY(hypercall)
#define SHADOW_BYTES 24 /* 6 shadow parameters */
#endif
cmpb $0,tb_init_done
- je 1f
+UNLIKELY_START(ne, trace)
call trace_hypercall
/* Now restore all the registers that trace_hypercall clobbered */
movl UREGS_eax+SHADOW_BYTES(%esp),%eax /* Hypercall # */
+UNLIKELY_END(trace)
+ call *hypercall_table(,%eax,4)
+ movl %eax,UREGS_eax+SHADOW_BYTES(%esp) # save the return value
#undef SHADOW_BYTES
-1: call *hypercall_table(,%eax,4)
addl $24,%esp # Discard the shadow parameters
#ifndef NDEBUG
/* Deliberately corrupt real parameter regs used by this hypercall. */
@@ -197,13 +199,10 @@ ENTRY(hypercall)
jne skip_clobber # If EIP has changed then don't clobber
movzb hypercall_args_table(,%ecx,1),%ecx
movl %esp,%edi
- movl %eax,%esi
movl $0xDEADBEEF,%eax
rep stosl
- movl %esi,%eax
skip_clobber:
#endif
- movl %eax,UREGS_eax(%esp) # save the return value
test_all_events:
xorl %ecx,%ecx
@@ -293,8 +292,8 @@ create_bounce_frame:
jz ring1 /* jump if returning to an existing ring-1 activation */
movl VCPU_kernel_sp(%ebx),%esi
.Lft6: mov VCPU_kernel_ss(%ebx),%gs
- testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
- jz .Lnvm86_1
+ testl $X86_EFLAGS_VM,%ecx
+UNLIKELY_START(nz, bounce_vm86_1)
subl $16,%esi /* push ES/DS/FS/GS (VM86 stack frame) */
movl UREGS_es+4(%esp),%eax
.Lft7: movl %eax,%gs:(%esi)
@@ -304,7 +303,7 @@ create_bounce_frame:
.Lft9: movl %eax,%gs:8(%esi)
movl UREGS_gs+4(%esp),%eax
.Lft10: movl %eax,%gs:12(%esi)
-.Lnvm86_1:
+UNLIKELY_END(bounce_vm86_1)
subl $8,%esi /* push SS/ESP (inter-priv iret) */
movl UREGS_esp+4(%esp),%eax
.Lft11: movl %eax,%gs:(%esi)
@@ -346,17 +345,10 @@ ring1: /* obtain ss/esp from oldss/olde
movl TRAPBOUNCE_error_code(%edx),%eax
.Lft17: movl %eax,%gs:(%esi)
1: testb $TBF_FAILSAFE,%cl
- jz 2f
+UNLIKELY_START(nz, bounce_failsafe)
subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame
testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
- jz .Lnvm86_2
- xorl %eax,%eax # VM86: we write zero selector values
-.Lft18: movl %eax,%gs:(%esi)
-.Lft19: movl %eax,%gs:4(%esi)
-.Lft20: movl %eax,%gs:8(%esi)
-.Lft21: movl %eax,%gs:12(%esi)
- jmp 2f
-.Lnvm86_2:
+ jnz .Lvm86_2
movl UREGS_ds+4(%esp),%eax # non-VM86: write real selector values
.Lft22: movl %eax,%gs:(%esi)
movl UREGS_es+4(%esp),%eax
@@ -365,13 +357,22 @@ ring1: /* obtain ss/esp from oldss/olde
.Lft24: movl %eax,%gs:8(%esi)
movl UREGS_gs+4(%esp),%eax
.Lft25: movl %eax,%gs:12(%esi)
-2: testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
- jz .Lnvm86_3
+ jmp .Lnvm86_3
+.Lvm86_2:
+ xorl %eax,%eax # VM86: we write zero selector values
+.Lft18: movl %eax,%gs:(%esi)
+.Lft19: movl %eax,%gs:4(%esi)
+.Lft20: movl %eax,%gs:8(%esi)
+.Lft21: movl %eax,%gs:12(%esi)
+UNLIKELY_END(bounce_failsafe)
+ testl $X86_EFLAGS_VM,UREGS_eflags+4(%esp)
+UNLIKELY_START(nz, bounce_vm86_3)
xorl %eax,%eax /* zero DS-GS, just as a real CPU would */
movl %eax,UREGS_ds+4(%esp)
movl %eax,UREGS_es+4(%esp)
movl %eax,UREGS_fs+4(%esp)
movl %eax,UREGS_gs+4(%esp)
+UNLIKELY_END(bounce_vm86_3)
.Lnvm86_3:
/* Rewrite our stack frame and return to ring 1. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
@@ -564,6 +565,7 @@ ENTRY(spurious_interrupt_bug)
pushl $TRAP_spurious_int<<16
jmp handle_exception
+ .pushsection .init.text, "ax", @progbits
ENTRY(early_page_fault)
SAVE_ALL(1f,1f)
1: movl %esp,%eax
@@ -571,6 +573,7 @@ ENTRY(early_page_fault)
call do_early_page_fault
addl $4,%esp
jmp restore_all_xen
+ .popsection
handle_nmi_mce:
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -49,7 +49,7 @@ ENTRY(compat_hypercall)
#define SHADOW_BYTES 0 /* No on-stack shadow state */
#endif
cmpb $0,tb_init_done(%rip)
- je 1f
+UNLIKELY_START(ne, compat_trace)
call trace_hypercall
/* Now restore all the registers that trace_hypercall clobbered */
movl UREGS_rax+SHADOW_BYTES(%rsp),%eax /* Hypercall # */
@@ -60,7 +60,8 @@ ENTRY(compat_hypercall)
movl UREGS_rdi+SHADOW_BYTES(%rsp),%r8d /* Arg 5 */
movl UREGS_rbp+SHADOW_BYTES(%rsp),%r9d /* Arg 6 */
#undef SHADOW_BYTES
-1: leaq compat_hypercall_table(%rip),%r10
+UNLIKELY_END(compat_trace)
+ leaq compat_hypercall_table(%rip),%r10
PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
callq *(%r10,%rax,8)
#ifndef NDEBUG
@@ -295,7 +296,7 @@ compat_create_bounce_frame:
.Lft8: movl %eax,%fs:(%rsi) # ERROR CODE
1:
testb $TBF_FAILSAFE,%cl
- jz 2f
+UNLIKELY_START(nz, compat_bounce_failsafe)
subl $4*4,%esi
movl %gs,%eax
.Lft9: movl %eax,%fs:3*4(%rsi) # GS
@@ -304,7 +305,7 @@ compat_create_bounce_frame:
.Lft11: movl %eax,%fs:1*4(%rsi) # ES
movl %ds,%eax
.Lft12: movl %eax,%fs:0*4(%rsi) # DS
-2:
+UNLIKELY_END(compat_bounce_failsafe)
/* Rewrite our stack frame and return to guest-OS mode. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
andl $~(X86_EFLAGS_VM|X86_EFLAGS_RF|\
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -148,7 +148,7 @@ ENTRY(syscall_enter)
#define SHADOW_BYTES 0 /* No on-stack shadow state */
#endif
cmpb $0,tb_init_done(%rip)
- je 1f
+UNLIKELY_START(ne, trace)
call trace_hypercall
/* Now restore all the registers that trace_hypercall clobbered */
movq UREGS_rax+SHADOW_BYTES(%rsp),%rax /* Hypercall # */
@@ -159,7 +159,8 @@ ENTRY(syscall_enter)
movq UREGS_r8 +SHADOW_BYTES(%rsp),%r8 /* Arg 5 */
movq UREGS_r9 +SHADOW_BYTES(%rsp),%r9 /* Arg 6 */
#undef SHADOW_BYTES
-1: leaq hypercall_table(%rip),%r10
+UNLIKELY_END(trace)
+ leaq hypercall_table(%rip),%r10
PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
callq *(%r10,%rax,8)
#ifndef NDEBUG
@@ -341,11 +342,12 @@ create_bounce_frame:
2: andq $~0xf,%rsi # Stack frames are 16-byte aligned.
movq $HYPERVISOR_VIRT_START,%rax
cmpq %rax,%rsi
- jb 1f # In +ve address space? Then okay.
movq $HYPERVISOR_VIRT_END+60,%rax
+ sbb %ecx,%ecx # In +ve address space? Then okay.
cmpq %rax,%rsi
- jb domain_crash_synchronous # Above Xen private area? Then okay.
-1: movb TRAPBOUNCE_flags(%rdx),%cl
+ adc %ecx,%ecx # Above Xen private area? Then okay.
+ jg domain_crash_synchronous
+ movb TRAPBOUNCE_flags(%rdx),%cl
subq $40,%rsi
movq UREGS_ss+8(%rsp),%rax
.Lft2: movq %rax,32(%rsi) # SS
@@ -376,7 +378,7 @@ create_bounce_frame:
movl TRAPBOUNCE_error_code(%rdx),%eax
.Lft7: movq %rax,(%rsi) # ERROR CODE
1: testb $TBF_FAILSAFE,%cl
- jz 2f
+UNLIKELY_START(nz, bounce_failsafe)
subq $32,%rsi
movl %gs,%eax
.Lft8: movq %rax,24(%rsi) # GS
@@ -386,7 +388,8 @@ create_bounce_frame:
.Lft10: movq %rax,8(%rsi) # ES
movl %ds,%eax
.Lft11: movq %rax,(%rsi) # DS
-2: subq $16,%rsi
+UNLIKELY_END(bounce_failsafe)
+ subq $16,%rsi
movq UREGS_r11+8(%rsp),%rax
.Lft12: movq %rax,8(%rsi) # R11
movq UREGS_rcx+8(%rsp),%rax
@@ -601,11 +604,13 @@ ENTRY(double_fault)
call do_double_fault
ud2
+ .pushsection .init.text, "ax", @progbits
ENTRY(early_page_fault)
SAVE_ALL
movq %rsp,%rdi
call do_early_page_fault
jmp restore_all_xen
+ .popsection
handle_ist_exception:
SAVE_ALL
--- a/xen/include/asm-x86/asm_defns.h
+++ b/xen/include/asm-x86/asm_defns.h
@@ -32,4 +32,18 @@
#define _ASM_EXTABLE(from, to) _ASM__EXTABLE(, from, to)
#define _ASM_PRE_EXTABLE(from, to) _ASM__EXTABLE(.pre, from, to)
+#ifdef __ASSEMBLY__
+
+#define UNLIKELY_START(cond, tag) \
+ j##cond .Lunlikely.tag; \
+ .subsection 1; \
+ .Lunlikely.tag:
+
+#define UNLIKELY_END(tag) \
+ jmp .Llikely.tag; \
+ .subsection 0; \
+ .Llikely.tag:
+
+#endif
+
#endif /* __X86_ASM_DEFNS_H__ */
--- a/xen/include/asm-x86/x86_32/asm_defns.h
+++ b/xen/include/asm-x86/x86_32/asm_defns.h
@@ -1,6 +1,7 @@
#ifndef __X86_32_ASM_DEFNS_H__
#define __X86_32_ASM_DEFNS_H__
+#include <xen/stringify.h>
#include <asm/percpu.h>
#ifdef CONFIG_FRAME_POINTER
@@ -53,12 +54,14 @@
mov %es,%esi; \
mov $(__HYPERVISOR_DS),%ecx; \
jnz 86f; \
- .text 1; \
+ .subsection 1; \
86: call setup_vm86_frame; \
jmp vm86_lbl; \
.previous; \
+ .ifnes __stringify(xen_lbl), ""; \
testb $3,UREGS_cs(%esp); \
jz xen_lbl; \
+ .endif; \
/* \
* We are the outermost Xen context, but our \
* life is complicated by NMIs and MCEs. These \
--- /dev/null
+++ b/xen/include/xen/stringify.h
@@ -0,0 +1,12 @@
+#ifndef __XEN_STRINGIFY_H
+#define __XEN_STRINGIFY_H
+
+/* Indirect stringification. Doing two levels allows the parameter to be a
+ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#endif /* !__XEN_STRINGIFY_H */
x86-forward-branches.patch
Description: Text document
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|