Recently we found one performance bug when doing network test with VTd
assigned devices - in some extreme case, the network performance in HVM
using new Linux kernel could be 1/20 of native. Root cause is one of our
sync-tsc-under-deep-C-state patches brings extra kilo-TSC drift between
pCPUs and let check-tsc-sync logic in HVM failed. The result is the
kernel fails to use platform timer (HPET, PMtimer) for gettimeofday
instead of TSC and brings very frequent costly IOport access VMExit -
triple per one call.
We provides below 2 patches to address the issue:
tsc1.patch: Minimize the TSC drift between pCPUs by letting BSP/AP set
TSC at the same time in time_calibration_rendezvous(). Looping a few
times before writing tsc sounds better, but it may be too costly.
Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxx>
tsc2.patch: only do TSC-sync if really necessary, which narrows its
effect a lot.
Signed-off-by: Wei Gang <wei.gang@xxxxxxxxx>
Thanks,
Xiaowei
diff -r 0b0e7c2b4eef xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Tue Jan 20 21:21:16 2009 +0800
+++ b/xen/arch/x86/time.c Mon Feb 09 02:21:50 2009 +0800
@@ -1095,22 +1095,21 @@ static void time_calibration_rendezvous(
while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
cpu_relax();
r->master_stime = read_platform_stime();
- rdtscll(r->master_tsc_stamp);
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ rdtscll(r->master_tsc_stamp);
mb(); /* write r->master_* /then/ signal */
atomic_inc(&r->nr_cpus);
- c->local_tsc_stamp = r->master_tsc_stamp;
}
else
{
atomic_inc(&r->nr_cpus);
while ( atomic_read(&r->nr_cpus) != total_cpus )
- cpu_relax();
- mb(); /* receive signal /then/ read r->master_* */
- if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
- wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
- rdtscll(c->local_tsc_stamp);
- }
-
+ mb(); /* receive signal /then/ read r->master_* */
+ }
+
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
+ rdtscll(c->local_tsc_stamp);
c->stime_local_stamp = get_s_time();
c->stime_master_stamp = r->master_stime;
diff -r 246ecf354c85 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/acpi/cpu_idle.c Mon Feb 16 12:57:08 2009 +0800
@@ -737,6 +737,15 @@ long set_cx_pminfo(uint32_t cpu, struct
if ( cpu_id == 0 && pm_idle_save == NULL )
{
+ int deepest_cx = acpi_power->states[acpi_power->count - 1].type;
+ if ( max_cstate >= 3 && deepest_cx >= ACPI_STATE_C3 )
+ tsc_may_stop = 1;
+ else if ( max_cstate >= 2 && deepest_cx >= ACPI_STATE_C2
+ && !local_apic_timer_c2_ok )
+ tsc_may_stop = 1;
+ else
+ tsc_may_stop = 0;
+
pm_idle_save = pm_idle;
pm_idle = acpi_processor_idle;
}
diff -r 246ecf354c85 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/time.c Mon Feb 16 13:10:24 2009 +0800
@@ -1091,6 +1091,8 @@ struct calibration_rendezvous {
u64 master_tsc_stamp;
};
+int tsc_may_stop __read_mostly = 0;
+
static void time_calibration_rendezvous(void *_r)
{
struct cpu_calibration *c = &this_cpu(cpu_calibration);
@@ -1102,7 +1104,9 @@ static void time_calibration_rendezvous(
while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
cpu_relax();
r->master_stime = read_platform_stime();
- if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ if ( !boot_cpu_has(X86_FEATURE_NOSTOP_TSC)
+ && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
+ && tsc_may_stop )
rdtscll(r->master_tsc_stamp);
mb(); /* write r->master_* /then/ signal */
atomic_inc(&r->nr_cpus);
@@ -1114,7 +1118,7 @@ static void time_calibration_rendezvous(
mb(); /* receive signal /then/ read r->master_* */
}
- if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ if ( r->master_tsc_stamp )
wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
rdtscll(c->local_tsc_stamp);
c->stime_local_stamp = get_s_time();
@@ -1127,7 +1131,8 @@ static void time_calibration(void *unuse
{
struct calibration_rendezvous r = {
.cpu_calibration_map = cpu_online_map,
- .nr_cpus = ATOMIC_INIT(0)
+ .nr_cpus = ATOMIC_INIT(0),
+ .master_tsc_stamp = 0
};
/* @wait=1 because we must wait for all cpus before freeing @r. */
diff -r 246ecf354c85 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/include/asm-x86/time.h Mon Feb 16 12:57:08 2009 +0800
@@ -41,4 +41,6 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
uint64_t ns_to_acpi_pm_tick(uint64_t ns);
+extern int tsc_may_stop;
+
#endif /* __X86_TIME_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|