# HG changeset patch # User Wei Huang # Date 1302811927 18000 # Branch lwp3 # Node ID 4ed4e3d9106e691dddf3bbf54afd935ef0a9000f # Parent 476b4ba2c3b55f95b6e80bbe94a24bab3b3779d9 FPU: clean up the FPU code This patch reorganizes and cleans up the FPU code. Main changes include: 1. Related functions are re-arranged together. 2. FPU save/restore code are extracted out as independent functions (fsave/frstor, fxsave/fxrstor, xsave/xrstor). 3. Two FPU entry functions, fpu_save() and fpu_restore(), are defined. They utilize xsave/xrstor to save/restore FPU state if CPU supports extended state. Otherwise they fall back to fxsave/fxrstor or fsave/frstor. Signed-off-by: Wei Huang diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/acpi/suspend.c --- a/xen/arch/x86/acpi/suspend.c Thu Apr 14 15:03:04 2011 -0500 +++ b/xen/arch/x86/acpi/suspend.c Thu Apr 14 15:12:07 2011 -0500 @@ -24,7 +24,7 @@ void save_rest_processor_state(void) { - save_init_fpu(current); + fpu_save(current); #if defined(CONFIG_X86_64) asm volatile ( diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Apr 14 15:03:04 2011 -0500 +++ b/xen/arch/x86/domain.c Thu Apr 14 15:12:07 2011 -0500 @@ -1576,7 +1576,7 @@ if ( !is_idle_vcpu(p) ) { memcpy(&p->arch.user_regs, stack_regs, CTXT_SWITCH_STACK_BYTES); - save_init_fpu(p); + fpu_save(p); p->arch.ctxt_switch_from(p); } diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Apr 14 15:03:04 2011 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Apr 14 15:12:07 2011 -0500 @@ -348,7 +348,7 @@ { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - setup_fpu(v); + fpu_restore(v); vmcb_set_exception_intercepts( vmcb, vmcb_get_exception_intercepts(vmcb) & ~(1U << TRAP_no_device)); } diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 14 15:03:04 2011 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 14 15:12:07 2011 -0500 @@ -612,7 +612,7 @@ static void vmx_fpu_enter(struct vcpu *v) { - setup_fpu(v); + fpu_restore(v); v->arch.hvm_vmx.exception_bitmap &= ~(1u << TRAP_no_device); vmx_update_exception_bitmap(v); v->arch.hvm_vmx.host_cr0 &= ~X86_CR0_TS; diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/i387.c --- a/xen/arch/x86/i387.c Thu Apr 14 15:03:04 2011 -0500 +++ b/xen/arch/x86/i387.c Thu Apr 14 15:12:07 2011 -0500 @@ -16,212 +16,12 @@ #include #include +/******************************/ +/* x86 XSAVE/XRSTOR */ +/******************************/ +#define XSTATE_CPUID 0x0000000d + static bool_t __read_mostly cpu_has_xsaveopt; - -static void xsave(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x27" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory" ); -} - -static void xsaveopt(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x37" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory" ); -} - -static void xrstor(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x2f" - : - : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) ); -} - -static void load_mxcsr(unsigned long val) -{ - val &= 0xffbf; - asm volatile ( "ldmxcsr %0" : : "m" (val) ); -} - -static void init_fpu(void); -static void restore_fpu(struct vcpu *v); - -void setup_fpu(struct vcpu *v) -{ - ASSERT(!is_idle_vcpu(v)); - - /* Avoid recursion. */ - clts(); - - if ( v->fpu_dirtied ) - return; - - if ( xsave_enabled(v) ) - { - /* - * XCR0 normally represents what guest OS set. In case of Xen itself, - * we set all supported feature mask before doing save/restore. - */ - set_xcr0(v->arch.xcr0_accum); - xrstor(v); - set_xcr0(v->arch.xcr0); - } - else if ( v->fpu_initialised ) - { - restore_fpu(v); - } - else - { - init_fpu(); - } - - v->fpu_initialised = 1; - v->fpu_dirtied = 1; -} - -static void init_fpu(void) -{ - asm volatile ( "fninit" ); - if ( cpu_has_xmm ) - load_mxcsr(0x1f80); -} - -void save_init_fpu(struct vcpu *v) -{ - unsigned long cr0; - char *fpu_ctxt; - - if ( !v->fpu_dirtied ) - return; - - ASSERT(!is_idle_vcpu(v)); - - cr0 = read_cr0(); - fpu_ctxt = v->arch.fpu_ctxt; - - /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ - if ( cr0 & X86_CR0_TS ) - clts(); - - if ( xsave_enabled(v) ) - { - /* XCR0 normally represents what guest OS set. In case of Xen itself, - * we set all accumulated feature mask before doing save/restore. - */ - set_xcr0(v->arch.xcr0_accum); - if ( cpu_has_xsaveopt ) - xsaveopt(v); - else - xsave(v); - set_xcr0(v->arch.xcr0); - } - else if ( cpu_has_fxsr ) - { -#ifdef __i386__ - asm volatile ( - "fxsave %0" - : "=m" (*fpu_ctxt) ); -#else /* __x86_64__ */ - /* - * The only way to force fxsaveq on a wide range of gas versions. On - * older versions the rex64 prefix works only if we force an - * addressing mode that doesn't require extended registers. - */ - asm volatile ( - REX64_PREFIX "fxsave (%1)" - : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) ); -#endif - - /* Clear exception flags if FSW.ES is set. */ - if ( unlikely(fpu_ctxt[2] & 0x80) ) - asm volatile ("fnclex"); - - /* - * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception - * is pending. Clear the x87 state here by setting it to fixed - * values. The hypervisor data segment can be sometimes 0 and - * sometimes new user value. Both should be ok. Use the FPU saved - * data block as a safe address because it should be in L1. - */ - if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) - { - asm volatile ( - "emms\n\t" /* clear stack tags */ - "fildl %0" /* load to clear state */ - : : "m" (*fpu_ctxt) ); - } - } - else - { - /* FWAIT is required to make FNSAVE synchronous. */ - asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) ); - } - - v->fpu_dirtied = 0; - write_cr0(cr0|X86_CR0_TS); -} - -static void restore_fpu(struct vcpu *v) -{ - const char *fpu_ctxt = v->arch.fpu_ctxt; - - /* - * FXRSTOR can fault if passed a corrupted data block. We handle this - * possibility, which may occur if the block was passed to us by control - * tools, by silently clearing the block. - */ - if ( cpu_has_fxsr ) - { - asm volatile ( -#ifdef __i386__ - "1: fxrstor %0 \n" -#else /* __x86_64__ */ - /* See above for why the operands/constraints are this way. */ - "1: " REX64_PREFIX "fxrstor (%2)\n" -#endif - ".section .fixup,\"ax\" \n" - "2: push %%"__OP"ax \n" - " push %%"__OP"cx \n" - " push %%"__OP"di \n" - " lea %0,%%"__OP"di \n" - " mov %1,%%ecx \n" - " xor %%eax,%%eax \n" - " rep ; stosl \n" - " pop %%"__OP"di \n" - " pop %%"__OP"cx \n" - " pop %%"__OP"ax \n" - " jmp 1b \n" - ".previous \n" - _ASM_EXTABLE(1b, 2b) - : - : "m" (*fpu_ctxt), - "i" (sizeof(v->arch.xsave_area->fpu_sse)/4) -#ifdef __x86_64__ - ,"cdaSDb" (fpu_ctxt) -#endif - ); - } - else - { - asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) ); - } -} - -#define XSTATE_CPUID 0xd - /* * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all * the supported and enabled features on the processor, including the @@ -327,6 +127,229 @@ return cpu_has_xsave; } +static void xsave(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x27" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); +} + +static void xsaveopt(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x37" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); +} + +static void xrstor(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x2f" + : + : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) ); +} + +/******************************/ +/* x86 FPU */ +/******************************/ +#define MXCSR_DEFAULT 0x1f80 + +static void fpu_init(struct vcpu *v) +{ + unsigned long val; + + asm volatile ( "fninit" ); + if ( cpu_has_xmm ) + { + /* load default value into MXCSR control/status register */ + val = MXCSR_DEFAULT; + asm volatile ( "ldmxcsr %0" : : "m" (val) ); + } +} + +/* Restore extended state */ +static inline void fpu_xrstor(struct vcpu *v) +{ + /* + * XCR0 normally represents what guest OS set. In case of Xen itself, + * we set all supported feature mask before doing save/restore. + */ + set_xcr0(v->arch.xcr0_accum); + xrstor(v); + set_xcr0(v->arch.xcr0); +} + +/* Restore x87 FPU, MMX, XMM and MXCSR state */ +static inline void fpu_fxrstor(struct vcpu *v) +{ + const char *fpu_ctxt = v->arch.fpu_ctxt; + + asm volatile ( +#ifdef __i386__ + "1: fxrstor %0 \n" +#else /* __x86_64__ */ + /* See above for why the operands/constraints are this way. */ + "1: " REX64_PREFIX "fxrstor (%2)\n" +#endif + ".section .fixup,\"ax\" \n" + "2: push %%"__OP"ax \n" + " push %%"__OP"cx \n" + " push %%"__OP"di \n" + " lea %0,%%"__OP"di \n" + " mov %1,%%ecx \n" + " xor %%eax,%%eax \n" + " rep ; stosl \n" + " pop %%"__OP"di \n" + " pop %%"__OP"cx \n" + " pop %%"__OP"ax \n" + " jmp 1b \n" + ".previous \n" + _ASM_EXTABLE(1b, 2b) + : + : "m" (*fpu_ctxt), + "i" (sizeof(v->arch.xsave_area->fpu_sse)/4) +#ifdef __x86_64__ + ,"cdaSDb" (fpu_ctxt) +#endif + ); +} + +/* Restore x87 state */ +static inline void fpu_frstor(struct vcpu *v) +{ + const char *fpu_ctxt = v->arch.fpu_ctxt; + + asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) ); +} + +void fpu_restore(struct vcpu *v) +{ + ASSERT(!is_idle_vcpu(v)); + + /* Avoid recursion. */ + clts(); + + if ( v->fpu_dirtied ) + return; + + if ( xsave_enabled(v) ) + { + fpu_xrstor(v); + } + else if ( v->fpu_initialised ) + { + if ( cpu_has_fxsr ) + fpu_fxrstor(v); + else + fpu_frstor(v); + } + else + { + fpu_init(v); + } + + v->fpu_initialised = 1; + v->fpu_dirtied = 1; +} + +/* Save x87 extended state */ +static inline void fpu_xsave(struct vcpu *v) +{ + /* XCR0 normally represents what guest OS set. In case of Xen itself, + * we set all accumulated feature mask before doing save/restore. + */ + set_xcr0(v->arch.xcr0_accum); + if ( cpu_has_xsaveopt ) + xsaveopt(v); + else + xsave(v); + set_xcr0(v->arch.xcr0); +} + +/* Save x87 FPU, MMX, SSE and SSE2 state */ +static inline void fpu_fxsave(struct vcpu *v) +{ + char *fpu_ctxt = v->arch.fpu_ctxt; + +#ifdef __i386__ + asm volatile ( + "fxsave %0" + : "=m" (*fpu_ctxt) ); +#else /* __x86_64__ */ + /* + * The only way to force fxsaveq on a wide range of gas versions. On + * older versions the rex64 prefix works only if we force an + * addressing mode that doesn't require extended registers. + */ + asm volatile ( + REX64_PREFIX "fxsave (%1)" + : "=m" (*fpu_ctxt) : "cdaSDb" (fpu_ctxt) ); +#endif + + /* Clear exception flags if FSW.ES is set. */ + if ( unlikely(fpu_ctxt[2] & 0x80) ) + asm volatile ("fnclex"); + + /* + * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception + * is pending. Clear the x87 state here by setting it to fixed + * values. The hypervisor data segment can be sometimes 0 and + * sometimes new user value. Both should be ok. Use the FPU saved + * data block as a safe address because it should be in L1. + */ + if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) + { + asm volatile ( + "emms\n\t" /* clear stack tags */ + "fildl %0" /* load to clear state */ + : : "m" (*fpu_ctxt) ); + } +} + +/* Save x87 FPU state */ +static inline void fpu_fsave(struct vcpu *v) +{ + char *fpu_ctxt = v->arch.fpu_ctxt; + + /* FWAIT is required to make FNSAVE synchronous. */ + asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) ); +} + +void fpu_save(struct vcpu *v) +{ + unsigned long cr0; + + if ( !v->fpu_dirtied ) + return; + + ASSERT(!is_idle_vcpu(v)); + + /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ + cr0 = read_cr0(); + if ( cr0 & X86_CR0_TS ) + clts(); + + if ( xsave_enabled(v) ) + fpu_xsave(v); + else if ( cpu_has_fxsr ) + fpu_fxsave(v); + else + fpu_fsave(v); + + v->fpu_dirtied = 0; + write_cr0(cr0|X86_CR0_TS); +} + /* * Local variables: * mode: C diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Apr 14 15:03:04 2011 -0500 +++ b/xen/arch/x86/traps.c Thu Apr 14 15:12:07 2011 -0500 @@ -3167,7 +3167,7 @@ BUG_ON(!guest_mode(regs)); - setup_fpu(curr); + fpu_restore(curr); if ( curr->arch.pv_vcpu.ctrlreg[0] & X86_CR0_TS ) { diff -r 476b4ba2c3b5 -r 4ed4e3d9106e xen/include/asm-x86/i387.h --- a/xen/include/asm-x86/i387.h Thu Apr 14 15:03:04 2011 -0500 +++ b/xen/include/asm-x86/i387.h Thu Apr 14 15:12:07 2011 -0500 @@ -14,15 +14,13 @@ #include #include +/******************************/ +/* x86 XSAVE/XRSTOR */ +/******************************/ struct vcpu; extern unsigned int xsave_cntxt_size; extern u64 xfeature_mask; - -void xsave_init(void); -int xsave_alloc_save_area(struct vcpu *v); -void xsave_free_save_area(struct vcpu *v); -bool_t xsave_enabled(const struct vcpu *v); #define XSAVE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */ #define XSTATE_FP (1ULL << 0) @@ -34,6 +32,16 @@ #define XSTATE_YMM_OFFSET XSAVE_AREA_MIN_SIZE #define XSTATE_YMM_SIZE 256 #define XSAVEOPT (1 << 0) + +#define XCR_XFEATURE_ENABLED_MASK 0 + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +DECLARE_PER_CPU(uint64_t, xcr0); struct xsave_struct { @@ -48,15 +56,10 @@ char data[]; /* Future new states */ } __attribute__ ((packed, aligned (64))); -#define XCR_XFEATURE_ENABLED_MASK 0 - -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -DECLARE_PER_CPU(uint64_t, xcr0); +void xsave_init(void); +int xsave_alloc_save_area(struct vcpu *v); +void xsave_free_save_area(struct vcpu *v); +bool_t xsave_enabled(const struct vcpu *v); static inline void xsetbv(u32 index, u64 xfeatures) { @@ -78,7 +81,10 @@ return this_cpu(xcr0); } -extern void setup_fpu(struct vcpu *v); -extern void save_init_fpu(struct vcpu *v); +/******************************/ +/* x86 FPU */ +/******************************/ +extern void fpu_save(struct vcpu *v); +extern void fpu_restore(struct vcpu *v); #endif /* __ASM_I386_I387_H */