# HG changeset patch # User Wei Huang # Date 1304447651 18000 # Node ID a2f5b9631b2f61be37127ef5d5b810d6ce71e858 # Parent 10f27b8b3d63959c7a8e15299a7a398b7ff7f230 FPU: extract extended related code into xstate.h and xstate.c Current extended code is mixing with FPU code in i387.c. As part of FPU code cleanup, this patch moves all extended state code into independent files. Not much semantic are changed and most function names are kept untouched, except for xsave() and xsaveopt(). These two functions are combined into a single function. Signed-off-by: Wei Huang diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/Makefile Tue May 03 13:34:11 2011 -0500 @@ -56,6 +56,7 @@ obj-y += crash.o obj-y += tboot.o obj-y += hpet.o +obj-y += xstate.o obj-$(crash_debug) += gdbstub.o diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/cpu/common.c --- a/xen/arch/x86/cpu/common.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/cpu/common.c Tue May 03 13:34:11 2011 -0500 @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -354,7 +354,7 @@ clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability); if ( cpu_has_xsave ) - xsave_init(); + xstate_init(); /* * The vendor-specific functions might have changed features. Now diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/domain.c Tue May 03 13:34:11 2011 -0500 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -419,7 +420,7 @@ v->arch.perdomain_ptes = perdomain_ptes(d, v); - if ( (rc = xsave_alloc_save_area(v)) != 0 ) + if ( (rc = xstate_alloc_save_area(v)) != 0 ) return rc; if ( v->arch.xsave_area ) v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse; @@ -485,7 +486,7 @@ if ( rc ) { if ( v->arch.xsave_area ) - xsave_free_save_area(v); + xstate_free_save_area(v); else xfree(v->arch.fpu_ctxt); if ( !is_hvm_domain(d) && standalone_trap_ctxt(v) ) @@ -501,7 +502,7 @@ release_compat_l4(v); if ( v->arch.xsave_area ) - xsave_free_save_area(v); + xstate_free_save_area(v); else xfree(v->arch.fpu_ctxt); diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/domctl.c Tue May 03 13:34:11 2011 -0500 @@ -33,7 +33,7 @@ #include #include #include -#include +#include #ifdef XEN_KDB_CONFIG #include "../kdb/include/kdbdefs.h" diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/hvm/hvm.c Tue May 03 13:34:11 2011 -0500 @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -2427,7 +2428,7 @@ if ( count == 0 && v->arch.xcr0 ) { /* reset EBX to default value first */ - *ebx = XSAVE_AREA_MIN_SIZE; + *ebx = XSTATE_AREA_MIN_SIZE; for ( sub_leaf = 2; sub_leaf < 64; sub_leaf++ ) { if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) ) diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue May 03 13:34:11 2011 -0500 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/i387.c --- a/xen/arch/x86/i387.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/i387.c Tue May 03 13:34:11 2011 -0500 @@ -14,41 +14,8 @@ #include #include #include +#include #include - -static bool_t __read_mostly cpu_has_xsaveopt; - -static void xsave(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x27" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory" ); -} - -static void xsaveopt(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x37" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory" ); -} - -static void xrstor(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x2f" - : - : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) ); -} static void load_mxcsr(unsigned long val) { @@ -122,10 +89,7 @@ * we set all accumulated feature mask before doing save/restore. */ set_xcr0(v->arch.xcr0_accum); - if ( cpu_has_xsaveopt ) - xsaveopt(v); - else - xsave(v); + xsave(v); set_xcr0(v->arch.xcr0); } else if ( cpu_has_fxsr ) @@ -220,113 +184,6 @@ } } -#define XSTATE_CPUID 0xd - -/* - * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all - * the supported and enabled features on the processor, including the - * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known. - */ -u32 xsave_cntxt_size; - -/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ -u64 xfeature_mask; - -/* Cached xcr0 for fast read */ -DEFINE_PER_CPU(uint64_t, xcr0); - -void xsave_init(void) -{ - u32 eax, ebx, ecx, edx; - int cpu = smp_processor_id(); - u32 min_size; - - if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) - return; - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - - BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); - BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); - - /* FP/SSE, XSAVE.HEADER, YMM */ - min_size = XSAVE_AREA_MIN_SIZE; - if ( eax & XSTATE_YMM ) - min_size += XSTATE_YMM_SIZE; - BUG_ON(ecx < min_size); - - /* - * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. - */ - set_in_cr4(X86_CR4_OSXSAVE); - set_xcr0((((u64)edx << 32) | eax) & XCNTXT_MASK); - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - - if ( cpu == 0 ) - { - /* - * xsave_cntxt_size is the max size required by enabled features. - * We know FP/SSE and YMM about eax, and nothing about edx at present. - */ - xsave_cntxt_size = ebx; - xfeature_mask = eax + ((u64)edx << 32); - xfeature_mask &= XCNTXT_MASK; - printk("%s: using cntxt_size: 0x%x and states: 0x%"PRIx64"\n", - __func__, xsave_cntxt_size, xfeature_mask); - - /* Check XSAVEOPT feature. */ - cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); - cpu_has_xsaveopt = !!(eax & XSAVEOPT); - } - else - { - BUG_ON(xsave_cntxt_size != ebx); - BUG_ON(xfeature_mask != (xfeature_mask & XCNTXT_MASK)); - } -} - -int xsave_alloc_save_area(struct vcpu *v) -{ - void *save_area; - - if ( !cpu_has_xsave || is_idle_vcpu(v) ) - return 0; - - BUG_ON(xsave_cntxt_size < XSAVE_AREA_MIN_SIZE); - - /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ - save_area = _xmalloc(xsave_cntxt_size, 64); - if ( save_area == NULL ) - return -ENOMEM; - - memset(save_area, 0, xsave_cntxt_size); - ((u32 *)save_area)[6] = 0x1f80; /* MXCSR */ - *(uint64_t *)(save_area + 512) = XSTATE_FP_SSE; /* XSETBV */ - - v->arch.xsave_area = save_area; - v->arch.xcr0 = XSTATE_FP_SSE; - v->arch.xcr0_accum = XSTATE_FP_SSE; - - return 0; -} - -void xsave_free_save_area(struct vcpu *v) -{ - xfree(v->arch.xsave_area); - v->arch.xsave_area = NULL; -} - -bool_t xsave_enabled(const struct vcpu *v) -{ - if ( cpu_has_xsave ) - { - ASSERT(xsave_cntxt_size >= XSAVE_AREA_MIN_SIZE); - ASSERT(v->arch.xsave_area); - } - - return cpu_has_xsave; -} - /* * Local variables: * mode: C diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Mon May 02 12:00:40 2011 +0100 +++ b/xen/arch/x86/traps.c Tue May 03 13:34:11 2011 -0500 @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/arch/x86/xstate.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/xstate.c Tue May 03 13:34:11 2011 -0500 @@ -0,0 +1,183 @@ +/* + * arch/x86/xstate.c + * + * x86 extended state operations + * + */ + +#include +#include +#include +#include +#include +#include +#include + +bool_t __read_mostly cpu_has_xsaveopt; + +/* + * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all + * the supported and enabled features on the processor, including the + * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known. + */ +u32 xsave_cntxt_size; + +/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ +u64 xfeature_mask; + +/* Cached xcr0 for fast read */ +DEFINE_PER_CPU(uint64_t, xcr0); + +/* Because XCR0 is cached for each CPU, xsetbv() is not exposed. Users should + * use set_xcr0() instead. + */ +static inline void xsetbv(u32 index, u64 xfeatures) +{ + u32 hi = xfeatures >> 32; + u32 lo = (u32)xfeatures; + + asm volatile (".byte 0x0f,0x01,0xd1" :: "c" (index), + "a" (lo), "d" (hi)); +} + +inline void set_xcr0(u64 xfeatures) +{ + this_cpu(xcr0) = xfeatures; + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures); +} + +inline uint64_t get_xcr0(void) +{ + return this_cpu(xcr0); +} + +void xsave(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + if ( cpu_has_xsaveopt ) + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x37" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); + else + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x27" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); +} + +void xrstor(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x2f" + : + : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) ); +} + +bool_t xsave_enabled(const struct vcpu *v) +{ + if ( cpu_has_xsave ) + { + ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE); + ASSERT(v->arch.xsave_area); + } + + return cpu_has_xsave; +} + +int xstate_alloc_save_area(struct vcpu *v) +{ + void *save_area; + + if ( !cpu_has_xsave || is_idle_vcpu(v) ) + return 0; + + BUG_ON(xsave_cntxt_size < XSTATE_AREA_MIN_SIZE); + + /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ + save_area = _xmalloc(xsave_cntxt_size, 64); + if ( save_area == NULL ) + return -ENOMEM; + + memset(save_area, 0, xsave_cntxt_size); + ((u32 *)save_area)[6] = 0x1f80; /* MXCSR */ + *(uint64_t *)(save_area + 512) = XSTATE_FP_SSE; /* XSETBV */ + + v->arch.xsave_area = save_area; + v->arch.xcr0 = XSTATE_FP_SSE; + v->arch.xcr0_accum = XSTATE_FP_SSE; + + return 0; +} + +void xstate_free_save_area(struct vcpu *v) +{ + xfree(v->arch.xsave_area); + v->arch.xsave_area = NULL; +} + +/* Collect the information of processor's extended state */ +void xstate_init(void) +{ + u32 eax, ebx, ecx, edx; + int cpu = smp_processor_id(); + u32 min_size; + + if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) + return; + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + + BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); + BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); + + /* FP/SSE, XSAVE.HEADER, YMM */ + min_size = XSTATE_AREA_MIN_SIZE; + if ( eax & XSTATE_YMM ) + min_size += XSTATE_YMM_SIZE; + BUG_ON(ecx < min_size); + + /* + * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. + */ + set_in_cr4(X86_CR4_OSXSAVE); + set_xcr0((((u64)edx << 32) | eax) & XCNTXT_MASK); + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + + if ( cpu == 0 ) + { + /* + * xsave_cntxt_size is the max size required by enabled features. + * We know FP/SSE and YMM about eax, and nothing about edx at present. + */ + xsave_cntxt_size = ebx; + xfeature_mask = eax + ((u64)edx << 32); + xfeature_mask &= XCNTXT_MASK; + printk("%s: using cntxt_size: 0x%x and states: 0x%"PRIx64"\n", + __func__, xsave_cntxt_size, xfeature_mask); + + /* Check XSAVEOPT feature. */ + cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); + cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); + } + else + { + BUG_ON(xsave_cntxt_size != ebx); + BUG_ON(xfeature_mask != (xfeature_mask & XCNTXT_MASK)); + } +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/include/asm-x86/i387.h --- a/xen/include/asm-x86/i387.h Mon May 02 12:00:40 2011 +0100 +++ b/xen/include/asm-x86/i387.h Tue May 03 13:34:11 2011 -0500 @@ -14,71 +14,7 @@ #include #include -struct vcpu; - -extern unsigned int xsave_cntxt_size; -extern u64 xfeature_mask; - -void xsave_init(void); -int xsave_alloc_save_area(struct vcpu *v); -void xsave_free_save_area(struct vcpu *v); -bool_t xsave_enabled(const struct vcpu *v); - -#define XSAVE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */ -#define XSTATE_FP (1ULL << 0) -#define XSTATE_SSE (1ULL << 1) -#define XSTATE_YMM (1ULL << 2) -#define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */ -#define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE) -#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_LWP) -#define XSTATE_YMM_OFFSET XSAVE_AREA_MIN_SIZE -#define XSTATE_YMM_SIZE 256 -#define XSAVEOPT (1 << 0) - -struct xsave_struct -{ - struct { char x[512]; } fpu_sse; /* FPU/MMX, SSE */ - - struct { - u64 xstate_bv; - u64 reserved[7]; - } xsave_hdr; /* The 64-byte header */ - - struct { char x[XSTATE_YMM_SIZE]; } ymm; /* YMM */ - char data[]; /* Future new states */ -} __attribute__ ((packed, aligned (64))); - -#define XCR_XFEATURE_ENABLED_MASK 0 - -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -DECLARE_PER_CPU(uint64_t, xcr0); - -static inline void xsetbv(u32 index, u64 xfeatures) -{ - u32 hi = xfeatures >> 32; - u32 lo = (u32)xfeatures; - - asm volatile (".byte 0x0f,0x01,0xd1" :: "c" (index), - "a" (lo), "d" (hi)); -} - -static inline void set_xcr0(u64 xfeatures) -{ - this_cpu(xcr0) = xfeatures; - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures); -} - -static inline uint64_t get_xcr0(void) -{ - return this_cpu(xcr0); -} - -extern void setup_fpu(struct vcpu *v); -extern void save_init_fpu(struct vcpu *v); +void setup_fpu(struct vcpu *v); +void save_init_fpu(struct vcpu *v); #endif /* __ASM_I386_I387_H */ diff -r 10f27b8b3d63 -r a2f5b9631b2f xen/include/asm-x86/xstate.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/xstate.h Tue May 03 13:34:11 2011 -0500 @@ -0,0 +1,68 @@ +/* + * include/asm-i386/xstate.h + * + * x86 extended state (xsave/xrstor) related definitions + * + */ + +#ifndef __ASM_XSTATE_H +#define __ASM_XSTATE_H + +#include +#include + +#define XSTATE_CPUID 0x0000000d +#define XSTATE_FEATURE_XSAVEOPT (1 << 0) /* sub-leaf 1, eax[bit 0] */ + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 /* index of XCR0 */ + +#define XSTATE_YMM_SIZE 256 +#define XSTATE_YMM_OFFSET XSAVE_AREA_MIN_SIZE +#define XSTATE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */ + +#define XSTATE_FP (1ULL << 0) +#define XSTATE_SSE (1ULL << 1) +#define XSTATE_YMM (1ULL << 2) +#define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */ +#define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE) +#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_LWP) + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +/* extended state variables */ +DECLARE_PER_CPU(uint64_t, xcr0); + +extern unsigned int xsave_cntxt_size; +extern u64 xfeature_mask; + +/* extended state save area */ +struct xsave_struct +{ + struct { char x[512]; } fpu_sse; /* FPU/MMX, SSE */ + + struct { + u64 xstate_bv; + u64 reserved[7]; + } xsave_hdr; /* The 64-byte header */ + + struct { char x[XSTATE_YMM_SIZE]; } ymm; /* YMM */ + char data[]; /* Future new states */ +} __attribute__ ((packed, aligned (64))); + +/* extended state operations */ +void set_xcr0(u64 xfeatures); +uint64_t get_xcr0(void); +void xsave(struct vcpu *v); +void xrstor(struct vcpu *v); +bool_t xsave_enabled(const struct vcpu *v); + +/* extended state init and cleanup functions */ +void xstate_free_save_area(struct vcpu *v); +int xstate_alloc_save_area(struct vcpu *v); +void xstate_init(void); + +#endif /* __ASM_XSTATE_H */