--- 2006-04-21/xen/arch/x86/i387.c.0 2006-04-06 17:50:26.000000000 +0200 +++ 2006-04-21/xen/arch/x86/i387.c 2006-04-24 16:23:21.000000000 +0200 @@ -31,14 +31,55 @@ void save_init_fpu(struct vcpu *v) if ( cr0 & X86_CR0_TS ) clts(); +#ifdef __i386__ if ( cpu_has_fxsr ) __asm__ __volatile__ ( - "fxsave %0 ; fnclex" + "fxsave %0" : "=m" (v->arch.guest_context.fpu_ctxt) ); else __asm__ __volatile__ ( "fnsave %0 ; fwait" : "=m" (v->arch.guest_context.fpu_ctxt) ); +#elif 0 + /* Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. */ + __asm__ __volatile__ ( + "fxsaveq %0" + : "=m" (v->arch.guest_context.fpu_ctxt) ); +#elif 0 + /* Using, as a workaround, the properly prefixed form below isn't + accepted by any binutils version so far released, complaining that + the same type of prefix is used twice if an extended register is + needed for addressing (fix submitted to mainline 2005-11-21). */ + __asm__ __volatile__ ( + "rex64/fxsave %0" + : "=m" (v->arch.guest_context.fpu_ctxt) ); +#else + /* This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + __asm__ __volatile__ ( + "rex64/fxsave %P2(%1)" + : "=m" (v->arch.guest_context.fpu_ctxt) + : "cdaSDb" (v), + "i" (offsetof(__typeof__(*v), arch.guest_context.fpu_ctxt)) ); +#endif + if ( cpu_has_fxsr ) { + if ( unlikely(v->arch.guest_context.fpu_ctxt.x[2] & 0x80) ) + __asm__ __volatile__ ("fnclex"); + /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. The hypervisor data segment can be sometimes 0 and + sometimes new user value. Both should be ok. + Use the boot CPU's capability field as safe address because it + (being read in the conditional above) should be already in L1. */ + if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) { + __asm__ __volatile__ ( + "emms\n\t" /* clear stack tags */ + "fildl %0" /* load to clear state */ + : + : "m" (boot_cpu_data.x86_capability[X86_FEATURE_FXSR / 32])); + } + } clear_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags); write_cr0(cr0|X86_CR0_TS); @@ -51,6 +92,7 @@ void restore_fpu(struct vcpu *v) * possibility, which may occur if the block was passed to us by control * tools, by silently clearing the block. */ +#ifdef __i386__ if ( cpu_has_fxsr ) __asm__ __volatile__ ( "1: fxrstor %0 \n" @@ -78,6 +120,33 @@ void restore_fpu(struct vcpu *v) __asm__ __volatile__ ( "frstor %0" : : "m" (v->arch.guest_context.fpu_ctxt) ); +#else + /* See save_init_fpu() for why the operands/constraints are this way. */ + __asm__ __volatile__ ( + "1: rex64/fxrstor %P3(%2) \n" + ".section .fixup,\"ax\" \n" + "2: push %%"__OP"ax \n" + " push %%"__OP"cx \n" + " push %%"__OP"di \n" + " lea %0,%%"__OP"di \n" + " mov %1,%%ecx \n" + " xor %%eax,%%eax \n" + " rep ; stosl \n" + " pop %%"__OP"di \n" + " pop %%"__OP"cx \n" + " pop %%"__OP"ax \n" + " jmp 1b \n" + ".previous \n" + ".section __ex_table,\"a\"\n" + " "__FIXUP_ALIGN" \n" + " "__FIXUP_WORD" 1b,2b \n" + ".previous \n" + : + : "m" (v->arch.guest_context.fpu_ctxt), + "i" (sizeof(v->arch.guest_context.fpu_ctxt)/4), + "cdaSDb" (v), + "i" (offsetof(__typeof__(*v), arch.guest_context.fpu_ctxt)) ); +#endif } /*