Here is the updated patch for constant-tsc case. -Jimmy
CPUIDLE: revise tsc-restore to avoid increasing tsc skew between cpus
Originally, the sequence for each cpu is [tsc-save, entry deepC, break-evt,
exit deepC, tsc-restore], the system error is quite easy to be accumulated.
Once the workloads between cpus are not balanced, the tsc skew between cpus
will eventually become bigger & begger - more than 10 seconds can be observed.
Then we remove the tsc-save step, and just based on percpu
t->stime_master_stamp, t->tsc_scale, & t->local_tsc_stamp to do the tsc-restore
after exit from deepC. It make the accumulating slower, but can't remove it.
Now, for constant-tsc case, we just keep a initial stamp via cstate_init_stamp
during the booting/s3 resuming, which is based on the platform stime. All cpus
need only to do tsc-restore relative to the initial stamp after exit deepC. The
base and tsc->ns scale are fixed and same for all cpus, so it can avoid
accumulated tsc-skew. BTW, bypass the percpu tsc scale calibration for
constant-tsc case.
Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
diff -r 045f70d1acdb xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Sat Dec 13 17:44:20 2008 +0000
+++ b/xen/arch/x86/time.c Mon Dec 15 10:35:11 2008 +0800
@@ -69,8 +69,11 @@ static DEFINE_PER_CPU(struct cpu_time, c
#define EPOCH MILLISECS(1000)
static struct timer calibration_timer;
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
+/* TSC will not stop during deep C state? */
+static bool_t tsc_nostop;
+/* TSC will be constant rate, independent with P/T state? */
+static int constant_tsc = 0;
+boolean_param("const_tsc", constant_tsc);
/*
* We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -551,6 +554,10 @@ static u64 plt_stamp; /* hard
static u64 plt_stamp; /* hardware-width platform counter stamp */
static struct timer plt_overflow_timer;
+/* following 2 variables are for deep C state TSC restore usage */
+static u64 initial_tsc_stamp; /* initial tsc stamp while plt starting */
+static s_time_t initial_stime_platform_stamp; /* initial stime stamp */
+
static void plt_overflow(void *unused)
{
u64 count;
@@ -664,25 +671,41 @@ static void init_platform_timer(void)
freq_string(pts->frequency), pts->name);
}
-void cstate_restore_tsc(void)
+static void cstate_init_stamp(void)
+{
+ if ( tsc_nostop || !constant_tsc )
+ return;
+
+ initial_stime_platform_stamp = read_platform_stime();
+ rdtscll(initial_tsc_stamp);
+}
+
+static inline void __restore_tsc(s_time_t plt_stime)
{
struct cpu_time *t = &this_cpu(cpu_time);
struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
s_time_t stime_delta;
u64 tsc_delta;
- if ( tsc_invariant )
+ if ( tsc_nostop )
return;
- stime_delta = read_platform_stime() - t->stime_master_stamp;
+ stime_delta = plt_stime -
+ (constant_tsc ? initial_stime_platform_stamp : t->stime_master_stamp);
+
if ( stime_delta < 0 )
stime_delta = 0;
tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
- wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
+ wrmsrl(MSR_IA32_TSC,
+ (constant_tsc ? initial_tsc_stamp : t->local_tsc_stamp) + tsc_delta);
}
+void cstate_restore_tsc(void)
+{
+ __restore_tsc(read_platform_stime());
+}
/***************************************************************************
* CMOS Timer functions
***************************************************************************/
@@ -960,6 +983,18 @@ static void local_time_calibration(void)
curr_master_stime - curr_local_stime);
#endif
+ if ( constant_tsc )
+ {
+ local_irq_disable();
+ t->local_tsc_stamp = curr_tsc;
+ t->stime_local_stamp = curr_master_stime;
+ t->stime_master_stamp = curr_master_stime;
+ local_irq_enable();
+
+ update_vcpu_system_time(current);
+ goto out;
+ }
+
/* Local time warps forward if it lags behind master time. */
if ( curr_local_stime < curr_master_stime )
curr_local_stime = curr_master_stime;
@@ -1082,6 +1117,8 @@ static void time_calibration_rendezvous(
mb(); /* receive signal /then/ read r->master_stime */
}
+ __restore_tsc(r->master_stime);
+
rdtscll(c->local_tsc_stamp);
c->stime_local_stamp = get_s_time();
c->stime_master_stamp = r->master_stime;
@@ -1125,9 +1162,23 @@ void init_percpu_time(void)
/* Late init function (after all CPUs are booted). */
int __init init_xen_time(void)
{
- /* Is TSC invariant during deep C state? */
+ /* for recent intel x86 model, the tsc increments at a constant rate */
+ if ( (current_cpu_data.x86 == 0xf && current_cpu_data.x86_model >= 0x03) ||
+ (current_cpu_data.x86 == 0x6 && current_cpu_data.x86_model >= 0x0e) )
+ {
+ int cpu;
+
+ constant_tsc = 1;
+
+ for_each_cpu(cpu)
+ {
+ per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+ }
+ }
+
+ /* Is TSC not stop during deep C state ? */
if ( cpuid_edx(0x80000007) & (1u<<8) )
- tsc_invariant = 1;
+ tsc_nostop = 1;
open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
@@ -1139,6 +1190,8 @@ int __init init_xen_time(void)
stime_platform_stamp = NOW();
init_platform_timer();
+
+ cstate_init_stamp();
init_percpu_time();
@@ -1260,6 +1313,8 @@ int time_resume(void)
disable_pit_irq();
init_percpu_time();
+
+ cstate_init_stamp();
do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
tsc-skew-20081213-1.patch
Description: tsc-skew-20081213-1.patch
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|