Implement rdtscp emulation and rdtscp_aux "support"
The rdtscp instruction (and the associated TSC_AUX
msr) are present on most recent AMD processors,
and on the Nehalem and future Intel processors.
Cpuid has a bit to detect the presence of this feature.
Xen intentionally does not expose the cpuid rdtscp bit
to PV OS's or to HVM guests, but PV apps can see this
bit and, as a result, may choose to use the rdtscp
instruction. When a PV guest with such an app is migrated
to a machine that does not have rdtscp support, the
app will get killed due to an invalid op. Fix this
by emulating the rdtscp instruction. We also need
to emulate rdtscp in the case where the machine has
rdtscp support, but rdtsc emulation is enabled (which
is unfortunately a different path: a privileged op).
The rdtscp instruction reads the TSC_AUX register which
presumably is set by the OS (and, in the case of
tsc_mode==pvrdtscp, will be set by Xen). HV Linux
and PV Linux will not set TSC_AUX because the
cpuid rdtscp bit is not propogated by Xen; I'm told that
Windows always sets TSC_AUX to zero. So for PV guests
running on rdtscp-capable hardware (that don't use
tsc_mode==pvrdtscp), always set TSC_AUX to zero.
Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
diff -r c0e32941ee69 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Wed Nov 25 14:19:50 2009 +0000
+++ b/xen/arch/x86/time.c Wed Nov 25 14:12:56 2009 -0700
@@ -851,9 +851,13 @@ static void __update_vcpu_system_time(st
else
tsc_stamp = t->local_tsc_stamp;
- if ( d->arch.tsc_mode == TSC_MODE_PVRDTSCP &&
- boot_cpu_has(X86_FEATURE_RDTSCP) )
- write_rdtscp_aux(d->arch.incarnation);
+ if ( boot_cpu_has(X86_FEATURE_RDTSCP) )
+ {
+ if ( d->arch.tsc_mode == TSC_MODE_PVRDTSCP )
+ write_rdtscp_aux(d->arch.incarnation);
+ else
+ write_rdtscp_aux(0);
+ }
/* Don't bother unless timestamps have changed or we are forced. */
if ( !force && (u->tsc_timestamp == tsc_stamp) )
@@ -1608,7 +1612,7 @@ void tsc_check_reliability(void)
* PV SoftTSC Emulation.
*/
-void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs)
+void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp)
{
s_time_t now = get_s_time();
struct domain *d = v->domain;
@@ -1633,6 +1637,10 @@ void pv_soft_rdtsc(struct vcpu *v, struc
regs->eax = (uint32_t)now;
regs->edx = (uint32_t)(now >> 32);
+
+ if ( rdtscp )
+ regs->ecx =
+ (d->arch.tsc_mode == TSC_MODE_PVRDTSCP) ? d->arch.incarnation :
0;
}
static int host_tsc_is_safe(void)
@@ -1826,7 +1834,7 @@ static void dump_softtsc(unsigned char k
printk(",khz=%"PRIu32"",d->arch.tsc_khz);
if ( d->arch.incarnation )
printk(",inc=%"PRIu32"",d->arch.incarnation);
- if ( !d->arch.vtsc )
+ if ( !(d->arch.vtsc_kerncount | d->arch.vtsc_usercount) )
{
printk("\n");
continue;
diff -r c0e32941ee69 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Wed Nov 25 14:19:50 2009 +0000
+++ b/xen/arch/x86/traps.c Wed Nov 25 14:12:56 2009 -0700
@@ -831,6 +831,26 @@ static void pv_cpuid(struct cpu_user_reg
regs->edx = d;
}
+static int emulate_invalid_rdtscp(struct cpu_user_regs *regs)
+{
+ char opcode[3];
+ unsigned long eip, rc;
+ struct vcpu *v = current;
+
+ eip = regs->eip;
+ if ( (rc = copy_from_user(opcode, (char *)eip, sizeof(opcode))) != 0 )
+ {
+ propagate_page_fault(eip + sizeof(opcode) - rc, 0);
+ return EXCRET_fault_fixed;
+ }
+ if ( memcmp(opcode, "\xf\x1\xf9", sizeof(opcode)) )
+ return 0;
+ eip += sizeof(opcode);
+ pv_soft_rdtsc(v, regs, 1);
+ instruction_done(regs, eip, 0);
+ return EXCRET_fault_fixed;
+}
+
static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
{
char sig[5], instr[2];
@@ -879,7 +899,8 @@ asmlinkage void do_invalid_op(struct cpu
if ( likely(guest_mode(regs)) )
{
- if ( !emulate_forced_invalid_op(regs) )
+ if ( !emulate_invalid_rdtscp(regs) &&
+ !emulate_forced_invalid_op(regs) )
do_guest_trap(TRAP_invalid_op, regs, 0);
return;
}
@@ -2009,11 +2030,12 @@ static int emulate_privileged_op(struct
twobyte_opcode:
/*
- * All two-byte opcodes, except RDTSC (0x31) are executable only from
- * guest kernel mode (virtual ring 0).
+ * All 2 and 3 byte opcodes, except RDTSC (0x31) and RDTSCP (0x1,0xF9)
+ * are executable only from guest kernel mode (virtual ring 0).
*/
opcode = insn_fetch(u8, code_base, eip, code_limit);
if ( !guest_kernel_mode(v, regs) &&
+ (opcode != 0x1) && /* always emulate rdtscp */
!((opcode == 0x31) && v->domain->arch.vtsc) )
goto fail;
@@ -2021,6 +2043,12 @@ static int emulate_privileged_op(struct
goto fail;
switch ( opcode )
{
+ case 0x1: /* RDTSCP */
+ if ( insn_fetch(u8, code_base, eip, code_limit) != 0xf9 )
+ goto fail;
+ pv_soft_rdtsc(v, regs, 1);
+ break;
+
case 0x06: /* CLTS */
(void)do_fpu_taskswitch(0);
break;
@@ -2269,7 +2297,7 @@ static int emulate_privileged_op(struct
}
case 0x31: /* RDTSC */
- pv_soft_rdtsc(v, regs);
+ pv_soft_rdtsc(v, regs, 0);
break;
case 0x32: /* RDMSR */
diff -r c0e32941ee69 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Wed Nov 25 14:19:50 2009 +0000
+++ b/xen/include/asm-x86/time.h Wed Nov 25 14:12:56 2009 -0700
@@ -59,7 +59,7 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
uint64_t ns_to_acpi_pm_tick(uint64_t ns);
-void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs);
+void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp);
void tsc_set_info(struct domain *d, uint32_t tsc_mode, uint64_t elapsed_nsec,
uint32_t gtsc_khz, uint32_t incarnation);
rdtscp.patch
Description: Binary data
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|