WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] Fix performance issue brought by TSC-sync logic

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] Fix performance issue brought by TSC-sync logic
From: "Yang, Xiaowei" <xiaowei.yang@xxxxxxxxx>
Date: Wed, 25 Feb 2009 18:26:38 +0800
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Wed, 25 Feb 2009 02:27:23 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <C5C923B7.32EA%keir.fraser@xxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <C5C923B7.32EA%keir.fraser@xxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.19 (X11/20090105)
Keir Fraser wrote:
On 23/02/2009 22:33, "Yang, Xiaowei" <xiaowei.yang@xxxxxxxxx> wrote:

For further improvement to reach the effect of 3), e.g. by taking care
of cache consistance amongs CPUs, there will be more overhead. And
considering the function is called per second, we are hesitating to do
this. What's your idea?:)

Maybe we should see what that overhead is... Also we may not really need to
run that function every second.

 -- Keir



I measured time_calibration_rendezvous()'s overhead on my machine. It's around 5-6k TSC. And I made anther patch to add loop. The cycle skew introduced by TSC-sync is gone with it. And a bit surprisingly, the expected extra overhead is not even noticeable:)

The new patch is attached.

Thanks,
Xiaowei
diff -r 0b0e7c2b4eef xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Tue Jan 20 21:21:16 2009 +0800
+++ b/xen/arch/x86/time.c       Wed Feb 25 03:11:50 2009 +0800
@@ -1079,38 +1079,69 @@ static void local_time_calibration(void)
  */
 struct calibration_rendezvous {
     cpumask_t cpu_calibration_map;
-    atomic_t nr_cpus;
+    atomic_t count_start;
+    atomic_t count_end;
     s_time_t master_stime;
     u64 master_tsc_stamp;
 };
 
+#define NR_LOOPS 5
+
 static void time_calibration_rendezvous(void *_r)
 {
+    int i;
     struct cpu_calibration *c = &this_cpu(cpu_calibration);
     struct calibration_rendezvous *r = _r;
     unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
 
-    if ( smp_processor_id() == 0 )
-    {
-        while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
-            cpu_relax();
-        r->master_stime = read_platform_stime();
-        rdtscll(r->master_tsc_stamp);
-        mb(); /* write r->master_* /then/ signal */
-        atomic_inc(&r->nr_cpus);
-        c->local_tsc_stamp = r->master_tsc_stamp;
-    }
-    else
-    {
-        atomic_inc(&r->nr_cpus);
-        while ( atomic_read(&r->nr_cpus) != total_cpus )
-            cpu_relax();
-        mb(); /* receive signal /then/ read r->master_* */
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-            wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
-        rdtscll(c->local_tsc_stamp);
-    }
-
+    /* 
+     * Loop is used here to get rid of the cache's side effect to enlarge
+     * the TSC difference among CPUs.
+     */
+    for ( i = 0; i < NR_LOOPS; i++ )
+    {
+        if ( smp_processor_id() == 0 )
+        {
+            while ( atomic_read(&r->count_start) != (total_cpus - 1) )
+                mb();
+   
+            if ( r->master_stime == 0 )
+            {
+                r->master_stime = read_platform_stime();
+                if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+                    rdtscll(r->master_tsc_stamp);
+            }
+            atomic_set(&r->count_end, 0);
+            wmb();
+            atomic_inc(&r->count_start);
+    
+            if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && 
+                 i == NR_LOOPS - 1 )
+                write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp 
>> 32));
+    
+            while (atomic_read(&r->count_end) != total_cpus - 1)
+                mb();
+            atomic_set(&r->count_start, 0);
+            wmb();
+            atomic_inc(&r->count_end);
+        }
+        else
+        {
+            atomic_inc(&r->count_start);
+            while ( atomic_read(&r->count_start) != total_cpus )
+                mb();
+    
+            if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && 
+                 i == NR_LOOPS - 1 )
+                write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp 
>> 32));
+    
+            atomic_inc(&r->count_end);
+            while (atomic_read(&r->count_end) != total_cpus)
+                mb();
+        }
+    }
+
+    rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
@@ -1121,7 +1152,9 @@ static void time_calibration(void *unuse
 {
     struct calibration_rendezvous r = {
         .cpu_calibration_map = cpu_online_map,
-        .nr_cpus = ATOMIC_INIT(0)
+        .count_start = ATOMIC_INIT(0),
+        .count_end = ATOMIC_INIT(0),
+        .master_stime = 0
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel