WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] Fix performance issue brought by TSC-sync logic

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] Fix performance issue brought by TSC-sync logic
From: "Yang, Xiaowei" <xiaowei.yang@xxxxxxxxx>
Date: Mon, 23 Feb 2009 16:21:07 +0800
Delivery-date: Mon, 23 Feb 2009 00:21:58 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.19 (X11/20090105)
Recently we found one performance bug when doing network test with VTd
assigned devices - in some extreme case, the network performance in HVM
using new Linux kernel could be 1/20 of native. Root cause is one of our
sync-tsc-under-deep-C-state patches brings extra kilo-TSC drift between
pCPUs and let check-tsc-sync logic in HVM failed. The result is the
kernel fails to use platform timer (HPET, PMtimer) for gettimeofday
instead of TSC and brings very frequent costly IOport access VMExit -
triple per one call.

We provides below 2 patches to address the issue:

tsc1.patch: Minimize the TSC drift between pCPUs by letting BSP/AP set
TSC at the same time in time_calibration_rendezvous(). Looping a few times before writing tsc sounds better, but it may be too costly.
Signed-off-by: Xiaowei Yang <xiaowei.yang@xxxxxxxxx>

tsc2.patch: only do TSC-sync if really necessary, which narrows its effect a lot.
Signed-off-by: Wei Gang <wei.gang@xxxxxxxxx>


Thanks,
Xiaowei

diff -r 0b0e7c2b4eef xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Tue Jan 20 21:21:16 2009 +0800
+++ b/xen/arch/x86/time.c       Mon Feb 09 02:21:50 2009 +0800
@@ -1095,22 +1095,21 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        rdtscll(r->master_tsc_stamp);
+        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+            rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_* /then/ signal */
         atomic_inc(&r->nr_cpus);
-        c->local_tsc_stamp = r->master_tsc_stamp;
     }
     else
     {
         atomic_inc(&r->nr_cpus);
         while ( atomic_read(&r->nr_cpus) != total_cpus )
-            cpu_relax();
-        mb(); /* receive signal /then/ read r->master_* */
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-            wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
-        rdtscll(c->local_tsc_stamp);
-    }
-
+            mb(); /* receive signal /then/ read r->master_* */
+    }
+
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
+    rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
diff -r 246ecf354c85 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/acpi/cpu_idle.c      Mon Feb 16 12:57:08 2009 +0800
@@ -737,6 +737,15 @@ long set_cx_pminfo(uint32_t cpu, struct 
 
     if ( cpu_id == 0 && pm_idle_save == NULL )
     {
+        int deepest_cx = acpi_power->states[acpi_power->count - 1].type;
+        if ( max_cstate >= 3 && deepest_cx >= ACPI_STATE_C3 )
+            tsc_may_stop = 1;
+        else if ( max_cstate >= 2 && deepest_cx >= ACPI_STATE_C2
+                  && !local_apic_timer_c2_ok )
+            tsc_may_stop = 1;
+        else
+            tsc_may_stop = 0;
+
         pm_idle_save = pm_idle;
         pm_idle = acpi_processor_idle;
     }
diff -r 246ecf354c85 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/time.c       Mon Feb 16 13:10:24 2009 +0800
@@ -1091,6 +1091,8 @@ struct calibration_rendezvous {
     u64 master_tsc_stamp;
 };
 
+int tsc_may_stop __read_mostly = 0;
+
 static void time_calibration_rendezvous(void *_r)
 {
     struct cpu_calibration *c = &this_cpu(cpu_calibration);
@@ -1102,7 +1104,9 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        if ( !boot_cpu_has(X86_FEATURE_NOSTOP_TSC)
+             && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
+             && tsc_may_stop )
             rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_* /then/ signal */
         atomic_inc(&r->nr_cpus);
@@ -1114,7 +1118,7 @@ static void time_calibration_rendezvous(
             mb(); /* receive signal /then/ read r->master_* */
     }
 
-    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    if ( r->master_tsc_stamp )
         wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
@@ -1127,7 +1131,8 @@ static void time_calibration(void *unuse
 {
     struct calibration_rendezvous r = {
         .cpu_calibration_map = cpu_online_map,
-        .nr_cpus = ATOMIC_INIT(0)
+        .nr_cpus = ATOMIC_INIT(0),
+        .master_tsc_stamp = 0
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
diff -r 246ecf354c85 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h        Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/include/asm-x86/time.h        Mon Feb 16 12:57:08 2009 +0800
@@ -41,4 +41,6 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
 uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
 uint64_t ns_to_acpi_pm_tick(uint64_t ns);
 
+extern int tsc_may_stop;
+
 #endif /* __X86_TIME_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel