# HG changeset patch # User Wei Huang # Date 1302812136 18000 # Branch lwp3 # Node ID bccbc5ecf62e49482c06149bb94dfc3dee8882f1 # Parent f5c4d99cde695bd0342a96d3ea6d5b34bd6047af FPU: define new FPU interface functions: fpu_save, fpu_restore, fpu_reload Current Xen implementation only support lazy FPU reload via CR0.TS. But new FPU state isn't always tracked by CR0.TS bit. One example is AMD's lightweight profiling. With this patch, Xen calls fpu_save() on vcpu which is being scheduled out and fpu_restore() on vcpu which is being scheduled in. The nonlazy FPU state is saved and restored in these two functions. This patch also defines fpu_reload() to handle lazy FPU state when #NM is triggered. Signed-off-by: Wei Huang diff -r f5c4d99cde69 -r bccbc5ecf62e xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Apr 14 15:13:18 2011 -0500 +++ b/xen/arch/x86/domain.c Thu Apr 14 15:15:36 2011 -0500 @@ -1594,6 +1594,7 @@ memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES); if ( xsave_enabled(n) && n->arch.xcr0 != get_xcr0() ) set_xcr0(n->arch.xcr0); + fpu_restore(n); n->arch.ctxt_switch_to(n); } diff -r f5c4d99cde69 -r bccbc5ecf62e xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Apr 14 15:13:18 2011 -0500 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Apr 14 15:15:36 2011 -0500 @@ -348,7 +348,7 @@ { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - fpu_restore(v); + fpu_reload(v); vmcb_set_exception_intercepts( vmcb, vmcb_get_exception_intercepts(vmcb) & ~(1U << TRAP_no_device)); } diff -r f5c4d99cde69 -r bccbc5ecf62e xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 14 15:13:18 2011 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Apr 14 15:15:36 2011 -0500 @@ -612,7 +612,7 @@ static void vmx_fpu_enter(struct vcpu *v) { - fpu_restore(v); + fpu_reload(v); v->arch.hvm_vmx.exception_bitmap &= ~(1u << TRAP_no_device); vmx_update_exception_bitmap(v); v->arch.hvm_vmx.host_cr0 &= ~X86_CR0_TS; diff -r f5c4d99cde69 -r bccbc5ecf62e xen/arch/x86/i387.c --- a/xen/arch/x86/i387.c Thu Apr 14 15:13:18 2011 -0500 +++ b/xen/arch/x86/i387.c Thu Apr 14 15:15:36 2011 -0500 @@ -184,14 +184,14 @@ } /* Restore extended state */ -static inline void fpu_xrstor(struct vcpu *v) +static inline void fpu_xrstor(struct vcpu *v, bool_t lazy) { /* * XCR0 normally represents what guest OS set. In case of Xen itself, * we set all supported feature mask before doing save/restore. */ set_xcr0(v->arch.xcr0_accum); - xrstor(v, XCNTXT_MASK); + xrstor(v, xfeature_mask & (lazy ? XSTATE_LAZY : XSTATE_NONLAZY)); set_xcr0(v->arch.xcr0); } @@ -238,47 +238,17 @@ asm volatile ( "frstor %0" : : "m" (*fpu_ctxt) ); } -void fpu_restore(struct vcpu *v) -{ - ASSERT(!is_idle_vcpu(v)); - - /* Avoid recursion. */ - clts(); - - if ( v->fpu_dirtied ) - return; - - if ( xsave_enabled(v) ) - { - fpu_xrstor(v); - } - else if ( v->fpu_initialised ) - { - if ( cpu_has_fxsr ) - fpu_fxrstor(v); - else - fpu_frstor(v); - } - else - { - fpu_init(v); - } - - v->fpu_initialised = 1; - v->fpu_dirtied = 1; -} - /* Save x87 extended state */ -static inline void fpu_xsave(struct vcpu *v) +static inline void fpu_xsave(struct vcpu *v, bool_t lazy) { /* XCR0 normally represents what guest OS set. In case of Xen itself, * we set all accumulated feature mask before doing save/restore. */ set_xcr0(v->arch.xcr0_accum); if ( cpu_has_xsaveopt ) - xsaveopt(v, XCNTXT_MASK); + xsaveopt(v, xfeature_mask & (lazy ? XSTATE_LAZY : XSTATE_NONLAZY)); else - xsave(v, XCNTXT_MASK); + xsave(v, xfeature_mask & (lazy ? XSTATE_LAZY : XSTATE_NONLAZY)); set_xcr0(v->arch.xcr0); } @@ -331,31 +301,101 @@ asm volatile ( "fnsave %0 ; fwait" : "=m" (*fpu_ctxt) ); } -void fpu_save(struct vcpu *v) +/* Unlazy FPU state goes here. + * + * Some FPU state isn't tracked by CR0.TS bit. One example is AMD's LWP state + * in xsave area. As a result Xen has to save it whenever VCPU is schduled out. + * To avoid recursion, caller has to make sure TS bit is cleared. + */ +void __fpu_unlazy_save(struct vcpu *v) { - unsigned long cr0; + if ( xsave_enabled(v) ) + fpu_xsave(v, 0); +} +/* + * Lazy FPU state goes here + */ +void __fpu_lazy_save(struct vcpu *v) +{ + /* fpu_dirtied keeps track of lazy FPU state. Don't do anything if clean.*/ if ( !v->fpu_dirtied ) return; - ASSERT(!is_idle_vcpu(v)); - - /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ - cr0 = read_cr0(); - if ( cr0 & X86_CR0_TS ) - clts(); - if ( xsave_enabled(v) ) - fpu_xsave(v); + fpu_xsave(v, 1); else if ( cpu_has_fxsr ) fpu_fxsave(v); else fpu_fsave(v); - + v->fpu_dirtied = 0; - write_cr0(cr0|X86_CR0_TS); } + +void __fpu_unlazy_restore(struct vcpu *v) +{ + if ( xsave_enabled(v) ) + fpu_xrstor(v, 0); +} + +/* Save VCPU's FPU state when the vcpu is scheduled out. */ +void fpu_save(struct vcpu *v) +{ + ASSERT(!is_idle_vcpu(v)); + + clts(); + __fpu_unlazy_save(v); + __fpu_lazy_save(v); + stts(); +} + +/* Restore VCPU's FPU state when the vcpu is scheduled out */ +void fpu_restore(struct vcpu *v) +{ + ASSERT(!is_idle_vcpu(v)); + + /* Avoid recursion */ + clts(); + + /* Only non-lazy state is restored on every context switch. The lazy + * state, as its name suggests, will be reloaded when #NM is triggered */ + __fpu_unlazy_restore(v); + + stts(); +} + +/* Reload VCPU's FPU state when a #NM is triggered */ +void fpu_reload(struct vcpu *v) +{ + ASSERT(!is_idle_vcpu(v)); + + /* Avoid recursion. */ + clts(); + + if ( v->fpu_dirtied ) + return; + + /* Only lazy stuff is reloaded */ + if ( xsave_enabled(v) ) + { + fpu_xrstor(v, 1); + } + else if ( v->fpu_initialised ) + { + if ( cpu_has_fxsr ) + fpu_fxrstor(v); + else + fpu_frstor(v); + } + else + { + fpu_init(v); + } + + v->fpu_initialised = 1; + v->fpu_dirtied = 1; +} /* * Local variables: * mode: C diff -r f5c4d99cde69 -r bccbc5ecf62e xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Apr 14 15:13:18 2011 -0500 +++ b/xen/arch/x86/traps.c Thu Apr 14 15:15:36 2011 -0500 @@ -3167,7 +3167,7 @@ BUG_ON(!guest_mode(regs)); - fpu_restore(curr); + fpu_reload(curr); if ( curr->arch.pv_vcpu.ctrlreg[0] & X86_CR0_TS ) { diff -r f5c4d99cde69 -r bccbc5ecf62e xen/include/asm-x86/i387.h --- a/xen/include/asm-x86/i387.h Thu Apr 14 15:13:18 2011 -0500 +++ b/xen/include/asm-x86/i387.h Thu Apr 14 15:15:36 2011 -0500 @@ -32,6 +32,9 @@ #define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */ #define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE) #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_LWP) + +#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) +#define XSTATE_NONLAZY (XSTATE_LWP) #define XSAVEOPT (1 << 0) @@ -88,5 +91,5 @@ /******************************/ extern void fpu_save(struct vcpu *v); extern void fpu_restore(struct vcpu *v); - +extern void fpu_reload(struct vcpu *v); #endif /* __ASM_I386_I387_H */