WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH] x86: don't write_tsc() non-zero values on CPUs u

To: Jan Beulich <JBeulich@xxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: Re: [Xen-devel] [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
From: Keir Fraser <keir.xen@xxxxxxxxx>
Date: Thu, 14 Apr 2011 08:25:56 +0100
Cc: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>, "winston.l.wang" <winston.l.wang@xxxxxxxxx>
Delivery-date: Thu, 14 Apr 2011 00:26:42 -0700
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:user-agent:date:subject:from:to:cc:message-id :thread-topic:thread-index:in-reply-to:mime-version:content-type :content-transfer-encoding; bh=TkbUw0zykYOiGTBvx15AePzYumsdaIjk4yLaeszsua8=; b=vuKkn6qZp/VwYvKE4oLxXPQK7kwy5qwQszB6MAQQqbMRI2L0UJdPY/F4PhVTqFgfTC mpsWr7BdHmAvB2JKvI5krPk3mG0GJLLYfb7JYul/KxpPbDnLzNZhnhrMUBnvqJSYe8O/ XIjitvD/xS5UEH7PxyrWq5cUXmn1TaNlm9ScY=
Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=user-agent:date:subject:from:to:cc:message-id:thread-topic :thread-index:in-reply-to:mime-version:content-type :content-transfer-encoding; b=pByHMryhwp7MuqTzRxi27zzEQQ2mwi3DT8vNXQi/K1xWR8QK53PHodOzVE8JcvwM4Y FUqCszbQerWzj146QvyJvmrPqXvoziYWKGTiFFBT+rIPYrYkv4QMMrZqj+9rzzwy10z9 VczvW4zeJnttNtWdRz0pfsDZ6FZlqzV95BjOw=
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4DA6BBD1020000780003B865@xxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: Acv6dTGS2wvFMNpbxUKojLmnUFUxIw==
Thread-topic: [Xen-devel] [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits
User-agent: Microsoft-Entourage/12.28.0.101117
On 14/04/2011 08:18, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote:

> This means suppressing the uses in time_calibration_tsc_rendezvous(),
> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
> hang of Linux Dom0 when loading processor.ko on such systems that
> have support for C states above C1.

Should your new test be gated on !X86_FEATURE_TSC_RELIABLE? We already
*never* write the TSC when boot_cpu_has(TSC_RELIABLE) -- Dan Magenheimer
made that change on the assumption that TSCs were globally synced by
firmware in this case, and us writing one or more TSCs could only ever make
things worse.

 -- Keir

> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
> 
> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void)
>      hpet_disable_legacy_broadcast();
>  }
>  
> +bool_t cpuidle_using_deep_cstate(void)
> +{
> +    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
> +}
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -41,6 +41,7 @@
>  #include <asm/flushtlb.h>
>  #include <asm/msr.h>
>  #include <asm/mtrr.h>
> +#include <asm/time.h>
>  #include <mach_apic.h>
>  #include <mach_wakecpu.h>
>  #include <smpboot_hooks.h>
> @@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id)
>      ;
>  }
>  
> +/*
> + * TSC's upper 32 bits can't be written in earlier CPUs (before
> + * Prescott), there is no way to resync one AP against BP.
> + */
> +bool_t disable_tsc_sync;
> +
>  static atomic_t tsc_count;
>  static uint64_t tsc_value;
>  static cpumask_t tsc_sync_cpu_mask;
> @@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> @@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign
>  {
>      unsigned int i;
>  
> +    if ( disable_tsc_sync )
> +        return;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
>           !cpu_isset(slave, tsc_sync_cpu_mask) )
>          return;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -21,6 +21,7 @@
>  #include <xen/smp.h>
>  #include <xen/irq.h>
>  #include <xen/softirq.h>
> +#include <xen/cpuidle.h>
>  #include <xen/symbols.h>
>  #include <xen/keyhandler.h>
>  #include <xen/guest_access.h>
> @@ -1385,6 +1386,9 @@ void init_percpu_time(void)
>  /* Late init function (after all CPUs are booted). */
>  int __init init_xen_time(void)
>  {
> +    u64 tsc, tmp;
> +    const char *what = NULL;
> +
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
>      {
>          /*
> @@ -1398,6 +1402,45 @@ int __init init_xen_time(void)
>              setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
>      }
>  
> +    /*
> +     * On certain older Intel CPUs writing the TSC MSR clears the upper
> +     * 32 bits. Obviously we must not use write_tsc() on such CPUs.
> +     *
> +     * Additionally, AMD specifies that being able to write the TSC MSR
> +     * is not an architectural feature (but, other than their manual says,
> +     * also cannot be determined from CPUID bits).
> +     */
> +    rdtscll(tsc);
> +    if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) == 0 )
> +    {
> +        u64 tmp2;
> +
> +        rdtscll(tmp2);
> +        write_tsc(tsc | (1ULL << 32));
> +        rdtscll(tmp);
> +        if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
> +            what = "only partially";
> +    }
> +    else
> +        what = "not";
> +    if ( what )
> +    {
> +        printk(XENLOG_WARNING "TSC %s writable\n", what);
> +
> +        /* time_calibration_tsc_rendezvous() must not be used */
> +        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> +            setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
> +
> +        /* cstate_restore_tsc() must not be used (or do nothing) */
> +        if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
> +            cpuidle_disable_deep_cstate();
> +
> +        /* synchronize_tsc_slave() must do nothing */
> +        disable_tsc_sync = 1;
> +    }
> +    else
> +        write_tsc(tsc + 4 * (s32)(tmp - tsc));
> +
>      /* If we have constant-rate TSCs then scale factor can be shared. */
>      if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
>      {
> @@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b
>       * XXX dom0 may rely on RTC interrupt delivery, so only enable
>       * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
>       */
> -    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
> +    if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
>      {
>          hpet_broadcast_setup();
>          if ( !hpet_broadcast_is_available() )
> --- a/xen/include/asm-x86/setup.h
> +++ b/xen/include/asm-x86/setup.h
> @@ -4,7 +4,6 @@
>  #include <xen/multiboot.h>
>  
>  extern bool_t early_boot;
> -extern s8 xen_cpuidle;
>  extern unsigned long xenheap_initial_phys_start;
>  
>  void init_done(void);
> --- a/xen/include/asm-x86/time.h
> +++ b/xen/include/asm-x86/time.h
> @@ -24,6 +24,8 @@
>  
>  typedef u64 cycles_t;
>  
> +extern bool_t disable_tsc_sync;
> +
>  static inline cycles_t get_cycles(void)
>  {
>      cycles_t c;
> --- a/xen/include/xen/cpuidle.h
> +++ b/xen/include/xen/cpuidle.h
> @@ -85,7 +85,10 @@ struct cpuidle_governor
>      void (*reflect)         (struct acpi_processor_power *dev);
>  };
>  
> +extern s8 xen_cpuidle;
>  extern struct cpuidle_governor *cpuidle_current_governor;
> +
> +bool_t cpuidle_using_deep_cstate(void);
>  void cpuidle_disable_deep_cstate(void);
>  
>  extern void cpuidle_wakeup_mwait(cpumask_t *mask);
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel