>>> On 14.04.11 at 09:18, "Jan Beulich" <JBeulich@xxxxxxxxxx> wrote:
> This means suppressing the uses in time_calibration_tsc_rendezvous(),
> cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot
> hang of Linux Dom0 when loading processor.ko on such systems that
> have support for C states above C1.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
A note on backporting to 4.1 and 4.0 (the problem was really found on
4.0.x): An additional adjustment to hpet_disable_legacy_broadcast()
is necessary here, as other than in -unstable it isn't prepared to be
called before hpet_broadcast_init(), e.g.:
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -632,6 +632,9 @@ void hpet_disable_legacy_broadcast(void)
u32 cfg;
unsigned long flags;
+ if ( !legacy_hpet_event.shift )
+ return;
+
spin_lock_irqsave(&legacy_hpet_event.lock, flags);
legacy_hpet_event.flags |= HPET_EVT_DISABLE;
Additionally, the disabling of TSC sync isn't necessary on 4.0 - zero gets
written into the TSC MSR there.
Jan
> --- a/xen/arch/x86/acpi/cpu_idle.c
> +++ b/xen/arch/x86/acpi/cpu_idle.c
> @@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void)
> hpet_disable_legacy_broadcast();
> }
>
> +bool_t cpuidle_using_deep_cstate(void)
> +{
> + return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
> +}
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -41,6 +41,7 @@
> #include <asm/flushtlb.h>
> #include <asm/msr.h>
> #include <asm/mtrr.h>
> +#include <asm/time.h>
> #include <mach_apic.h>
> #include <mach_wakecpu.h>
> #include <smpboot_hooks.h>
> @@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id)
> ;
> }
>
> +/*
> + * TSC's upper 32 bits can't be written in earlier CPUs (before
> + * Prescott), there is no way to resync one AP against BP.
> + */
> +bool_t disable_tsc_sync;
> +
> static atomic_t tsc_count;
> static uint64_t tsc_value;
> static cpumask_t tsc_sync_cpu_mask;
> @@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig
> {
> unsigned int i;
>
> + if ( disable_tsc_sync )
> + return;
> +
> if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
> !cpu_isset(slave, tsc_sync_cpu_mask) )
> return;
> @@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign
> {
> unsigned int i;
>
> + if ( disable_tsc_sync )
> + return;
> +
> if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) &&
> !cpu_isset(slave, tsc_sync_cpu_mask) )
> return;
> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -21,6 +21,7 @@
> #include <xen/smp.h>
> #include <xen/irq.h>
> #include <xen/softirq.h>
> +#include <xen/cpuidle.h>
> #include <xen/symbols.h>
> #include <xen/keyhandler.h>
> #include <xen/guest_access.h>
> @@ -1385,6 +1386,9 @@ void init_percpu_time(void)
> /* Late init function (after all CPUs are booted). */
> int __init init_xen_time(void)
> {
> + u64 tsc, tmp;
> + const char *what = NULL;
> +
> if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> {
> /*
> @@ -1398,6 +1402,45 @@ int __init init_xen_time(void)
> setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
> }
>
> + /*
> + * On certain older Intel CPUs writing the TSC MSR clears the upper
> + * 32 bits. Obviously we must not use write_tsc() on such CPUs.
> + *
> + * Additionally, AMD specifies that being able to write the TSC MSR
> + * is not an architectural feature (but, other than their manual says,
> + * also cannot be determined from CPUID bits).
> + */
> + rdtscll(tsc);
> + if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) == 0 )
> + {
> + u64 tmp2;
> +
> + rdtscll(tmp2);
> + write_tsc(tsc | (1ULL << 32));
> + rdtscll(tmp);
> + if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
> + what = "only partially";
> + }
> + else
> + what = "not";
> + if ( what )
> + {
> + printk(XENLOG_WARNING "TSC %s writable\n", what);
> +
> + /* time_calibration_tsc_rendezvous() must not be used */
> + if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
> + setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
> +
> + /* cstate_restore_tsc() must not be used (or do nothing) */
> + if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) )
> + cpuidle_disable_deep_cstate();
> +
> + /* synchronize_tsc_slave() must do nothing */
> + disable_tsc_sync = 1;
> + }
> + else
> + write_tsc(tsc + 4 * (s32)(tmp - tsc));
> +
> /* If we have constant-rate TSCs then scale factor can be shared. */
> if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
> {
> @@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b
> * XXX dom0 may rely on RTC interrupt delivery, so only enable
> * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
> */
> - if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
> + if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) )
> {
> hpet_broadcast_setup();
> if ( !hpet_broadcast_is_available() )
> --- a/xen/include/asm-x86/setup.h
> +++ b/xen/include/asm-x86/setup.h
> @@ -4,7 +4,6 @@
> #include <xen/multiboot.h>
>
> extern bool_t early_boot;
> -extern s8 xen_cpuidle;
> extern unsigned long xenheap_initial_phys_start;
>
> void init_done(void);
> --- a/xen/include/asm-x86/time.h
> +++ b/xen/include/asm-x86/time.h
> @@ -24,6 +24,8 @@
>
> typedef u64 cycles_t;
>
> +extern bool_t disable_tsc_sync;
> +
> static inline cycles_t get_cycles(void)
> {
> cycles_t c;
> --- a/xen/include/xen/cpuidle.h
> +++ b/xen/include/xen/cpuidle.h
> @@ -85,7 +85,10 @@ struct cpuidle_governor
> void (*reflect) (struct acpi_processor_power *dev);
> };
>
> +extern s8 xen_cpuidle;
> extern struct cpuidle_governor *cpuidle_current_governor;
> +
> +bool_t cpuidle_using_deep_cstate(void);
> void cpuidle_disable_deep_cstate(void);
>
> extern void cpuidle_wakeup_mwait(cpumask_t *mask);
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|