Previous attempt was rejected as too intrusive, but
further app rdtsc optimization work is very dependent
on Xen being able to determine if TSC is reliable
or not.
This patch starts to introduce the concept of
X86_FEATURE_TSC_RELIABLE as it is defined and
used by Linux, but uses it and tests it only in
a debug-key for now, so that a wide variety of
hardware can be measured by the broader Xen
community to confirm/deny TSC assumptions.
The eventual goal is for the evaluation of
TSC reliability to be exported to userland
so that apps can use rdtsc natively if and when
it is safe to do so.
(See http://lists.xensource.com/archives/html/xen-devel/2009-10/msg00056.html)
Note that the original Linux code for tsc_sync.c
uses a raw spinlock to ensure the "fastest, inlined,
non-debug version of a critical section". Xen
doesn't provide a _raw_spin_lock() so I used
regular spinlocks, but I would prefer the code
to use something more strict as Linux does.
(Also includes a minor nit: "NOSTOP" was used in
an early version of a Linux patch, but mainline
now uses "NONSTOP"... correct this for consistency.)
Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
diff -r 1e33261a814f xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/Makefile Tue Oct 06 14:07:32 2009 -0600
@@ -45,6 +45,7 @@ obj-y += string.o
obj-y += string.o
obj-y += sysctl.o
obj-y += time.o
+obj-y += tsc_sync.o
obj-y += trace.o
obj-y += traps.o
obj-y += usercopy.o
diff -r 1e33261a814f xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/cpu/amd.c Tue Oct 06 14:07:32 2009 -0600
@@ -463,7 +463,7 @@ static void __devinit init_amd(struct cp
c->x86_power = cpuid_edx(0x80000007);
if (c->x86_power & (1<<8)) {
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
- set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+ set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
}
}
diff -r 1e33261a814f xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/cpu/intel.c Tue Oct 06 14:07:32 2009 -0600
@@ -226,7 +226,8 @@ static void __devinit init_intel(struct
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
if (cpuid_edx(0x80000007) & (1u<<8)) {
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
- set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+ set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
+ set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
}
if ((c->cpuid_level >= 0x00000006) &&
(cpuid_eax(0x00000006) & (1u<<2)))
diff -r 1e33261a814f xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/arch/x86/time.c Tue Oct 06 14:07:32 2009 -0600
@@ -698,7 +698,7 @@ void cstate_restore_tsc(void)
s_time_t stime_delta;
u64 new_tsc;
- if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
+ if ( boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
return;
stime_delta = read_platform_stime() - t->stime_master_stamp;
@@ -1428,6 +1428,44 @@ struct tm wallclock_time(void)
return gmtime(seconds);
}
+/*
+ * TSC Reliability check
+ */
+
+static unsigned long tsc_max_warp = 0;
+static cpumask_t tsc_check_cpumask = CPU_MASK_NONE;
+static unsigned long tsc_check_count = 0;
+
+static void tsc_check_slave(void *unused)
+{
+ unsigned int cpu = smp_processor_id();
+ local_irq_disable();
+ while ( !cpu_isset(cpu, tsc_check_cpumask) )
+ mb();
+ check_tsc_warp(cpu_khz, &tsc_max_warp);
+ cpu_clear(cpu, tsc_check_cpumask);
+ local_irq_enable();
+}
+
+static void tsc_check_reliability(void)
+{
+ unsigned int cpu = smp_processor_id();
+ static DEFINE_SPINLOCK(lock);
+
+ spin_lock(&lock);
+
+ tsc_check_count++;
+ smp_call_function(tsc_check_slave, NULL, 0);
+ tsc_check_cpumask = cpu_online_map;
+ local_irq_disable();
+ check_tsc_warp(cpu_khz, &tsc_max_warp);
+ cpu_clear(cpu, tsc_check_cpumask);
+ local_irq_enable();
+ while ( !cpus_empty(tsc_check_cpumask) )
+ cpu_relax();
+
+ spin_unlock(&lock);
+}
/*
* PV SoftTSC Emulation.
@@ -1463,6 +1501,16 @@ static void dump_softtsc(unsigned char k
struct domain *d;
int domcnt = 0;
+ tsc_check_reliability();
+ if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
+ printk("TSC marked as reliable, "
+ "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
+ else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) )
+ printk("TSC marked as constant but not reliable, "
+ "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
+ else
+ printk("TSC not marked as either constant or reliable, "
+ "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
for_each_domain ( d )
{
if ( !d->arch.vtsc )
diff -r 1e33261a814f xen/arch/x86/tsc_sync.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/tsc_sync.c Tue Oct 06 14:07:32 2009 -0600
@@ -0,0 +1,88 @@
+/*
+ * check TSC synchronization.
+ *
+ * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
+ * Modified for Xen by Dan Magenheimer, Oracle Corp.
+ *
+ * We check whether all boot CPUs have their TSC's synchronized,
+ * print a warning if not and turn off the TSC clock-source.
+ *
+ * The warp-check is point-to-point between two CPUs, the CPU
+ * initiating the bootup is the 'source CPU', the freshly booting
+ * CPU is the 'target CPU'.
+ *
+ * Only two CPUs may participate - they can enter in any order.
+ * ( The serial nature of the boot logic and the CPU hotplug lock
+ * protects against more than 2 CPUs entering this code. )
+ */
+#include <xen/config.h>
+#include <xen/spinlock.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+
+#define rdtsc_barrier mb
+#define __RAW_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED
+
+/*
+ * We use a raw spinlock in this exceptional case, because
+ * we want to have the fastest, inlined, non-debug version
+ * of a critical section, to be able to prove TSC time-warps:
+ */
+static spinlock_t sync_lock = SPIN_LOCK_UNLOCKED;
+static cycles_t last_tsc;
+
+/*
+ * TSC-warp measurement loop running on both CPUs:
+ */
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
+{
+ cycles_t start, now, prev, end;
+ int i;
+
+ rdtsc_barrier();
+ start = get_cycles();
+ rdtsc_barrier();
+ /*
+ * The measurement runs for 20 msecs:
+ */
+ end = start + tsc_khz * 20ULL;
+ now = start;
+
+ for (i = 0; ; i++) {
+ /*
+ * We take the global lock, measure TSC, save the
+ * previous TSC that was measured (possibly on
+ * another CPU) and update the previous TSC timestamp.
+ */
+ spin_lock(&sync_lock);
+ prev = last_tsc;
+ rdtsc_barrier();
+ now = get_cycles();
+ rdtsc_barrier();
+ last_tsc = now;
+ spin_unlock(&sync_lock);
+
+ /*
+ * Be nice every now and then (and also check whether
+ * measurement is done [we also insert a 10 million
+ * loops safety exit, so we dont lock up in case the
+ * TSC readout is totally broken]):
+ */
+ if (unlikely(!(i & 7))) {
+ if (now > end || i > 10000000)
+ break;
+ cpu_relax();
+ /*touch_nmi_watchdog();*/
+ }
+ /*
+ * Outside the critical section we can now see whether
+ * we saw a time-warp of the TSC going backwards:
+ */
+ if (unlikely(prev > now)) {
+ spin_lock(&sync_lock);
+ if ( *max_warp > prev - now )
+ *max_warp = prev - now;
+ spin_unlock(&sync_lock);
+ }
+ }
+}
diff -r 1e33261a814f xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/include/asm-x86/cpufeature.h Tue Oct 06 14:07:32 2009 -0600
@@ -74,9 +74,10 @@
#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_NOSTOP_TSC (3*32+ 9) /* TSC does not stop in C states */
+#define X86_FEATURE_NONSTOP_TSC (3*32+ 9) /* TSC does not stop in C
states */
#define X86_FEATURE_ARAT (3*32+ 10) /* Always running APIC timer */
#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_TSC_RELIABLE (3*32+12) /* TSC is known to be reliable */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
diff -r 1e33261a814f xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Mon Sep 28 13:59:35 2009 +0100
+++ b/xen/include/asm-x86/time.h Tue Oct 06 14:07:32 2009 -0600
@@ -43,4 +43,6 @@ uint64_t ns_to_acpi_pm_tick(uint64_t ns)
void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs);
+void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp);
+
#endif /* __X86_TIME_H__ */
tsc-reliab2.patch
Description: Binary data
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|