# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1211362737 -3600
# Node ID d795e15b85a788d0389d24963897cf480dcab0e9
# Parent 672c09aad49df0b8056c795bd6c351746d037975
x86: Fix lapic timer stop issue in deep C state
Local APIC timer may stop at deep C state (C3/C4...) entry/exit. this
patch add the logic that use platform timer (HPET) to reenable local
APIC timer at C state entry/exit.
Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
Signed-off-by: Yu Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
xen/arch/x86/Makefile | 1
xen/arch/x86/acpi/cpu_idle.c | 11 -
xen/arch/x86/hpet.c | 291 +++++++++++++++++++++++++++++++++++++++++++
xen/arch/x86/time.c | 59 ++------
xen/common/timer.c | 8 -
xen/include/asm-x86/hpet.h | 20 ++
xen/include/xen/timer.h | 9 +
7 files changed, 348 insertions(+), 51 deletions(-)
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/Makefile Wed May 21 10:38:57 2008 +0100
@@ -50,6 +50,7 @@ obj-y += machine_kexec.o
obj-y += machine_kexec.o
obj-y += crash.o
obj-y += tboot.o
+obj-y += hpet.o
obj-$(crash_debug) += gdbstub.o
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c Wed May 21 10:38:57 2008 +0100
@@ -39,6 +39,7 @@
#include <xen/smp.h>
#include <asm/cache.h>
#include <asm/io.h>
+#include <asm/hpet.h>
#include <xen/guest_access.h>
#include <public/platform.h>
#include <asm/processor.h>
@@ -438,19 +439,19 @@ static void acpi_processor_idle(void)
t1 = inl(pmtmr_ioport);
/*
- * FIXME: Before invoking C3, be aware that TSC/APIC timer may be
+ * Before invoking C3, be aware that TSC/APIC timer may be
* stopped by H/W. Without carefully handling of TSC/APIC stop issues,
* deep C state can't work correctly.
*/
/* preparing TSC stop */
cstate_save_tsc();
- /* placeholder for preparing APIC stop */
-
+ /* preparing APIC stop */
+ hpet_broadcast_enter();
/* Invoke C3 */
acpi_idle_do_entry(cx);
- /* placeholder for recovering APIC */
-
+ /* recovering APIC */
+ hpet_broadcast_exit();
/* recovering TSC */
cstate_restore_tsc();
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/hpet.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hpet.c Wed May 21 10:38:57 2008 +0100
@@ -0,0 +1,291 @@
+/******************************************************************************
+ * arch/x86/hpet.c
+ *
+ * HPET management.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/time.h>
+#include <xen/timer.h>
+#include <xen/smp.h>
+#include <xen/softirq.h>
+#include <asm/fixmap.h>
+#include <asm/div64.h>
+#include <asm/hpet.h>
+
+#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
+
+#define MAX_DELTA_NS MILLISECS(10*1000)
+#define MIN_DELTA_NS MICROSECS(1)
+
+struct hpet_event_channel
+{
+ unsigned long mult;
+ int shift;
+ s_time_t next_event;
+ cpumask_t cpumask;
+ spinlock_t lock;
+ void (*event_handler)(struct hpet_event_channel *);
+};
+static struct hpet_event_channel hpet_event;
+
+unsigned long hpet_address;
+
+/*
+ * Calculate a multiplication factor for scaled math, which is used to convert
+ * nanoseconds based values to clock ticks:
+ *
+ * clock_ticks = (nanoseconds * factor) >> shift.
+ *
+ * div_sc is the rearranged equation to calculate a factor from a given clock
+ * ticks / nanoseconds ratio:
+ *
+ * factor = (clock_ticks << shift) / nanoseconds
+ */
+static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
+ int shift)
+{
+ uint64_t tmp = ((uint64_t)ticks) << shift;
+
+ do_div(tmp, nsec);
+ return (unsigned long) tmp;
+}
+
+/*
+ * Convert nanoseconds based values to clock ticks:
+ *
+ * clock_ticks = (nanoseconds * factor) >> shift.
+ */
+static inline unsigned long ns2ticks(unsigned long nsec, int shift,
+ unsigned long factor)
+{
+ uint64_t tmp = ((uint64_t)nsec * factor) >> shift;
+
+ return (unsigned long) tmp;
+}
+
+static int hpet_legacy_next_event(unsigned long delta)
+{
+ unsigned long cnt;
+
+ cnt = hpet_read32(HPET_COUNTER);
+ cnt += delta;
+ hpet_write32(cnt, HPET_T0_CMP);
+
+ return ((long)(hpet_read32(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0;
+}
+
+static int reprogram_hpet_evt_channel(
+ struct hpet_event_channel *ch,
+ s_time_t expire, s_time_t now, int force)
+{
+ int64_t delta;
+ int ret;
+
+ if ( unlikely(expire < 0) )
+ {
+ printk(KERN_DEBUG "reprogram: expire < 0\n");
+ return -ETIME;
+ }
+
+ delta = expire - now;
+ if ( delta <= 0 )
+ {
+ printk(KERN_DEBUG "reprogram: expire(%"PRIx64") < "
+ "now(%"PRIx64")\n", expire, now);
+ if ( !force )
+ return -ETIME;
+ }
+
+ ch->next_event = expire;
+
+ delta = min_t(int64_t, delta, MAX_DELTA_NS);
+ delta = max_t(int64_t, delta, MIN_DELTA_NS);
+ delta = ns2ticks(delta, ch->shift, ch->mult);
+
+ ret = hpet_legacy_next_event(delta);
+ while ( ret && force )
+ {
+ delta += delta;
+ ret = hpet_legacy_next_event(delta);
+ }
+
+ return ret;
+}
+
+static int evt_do_broadcast(cpumask_t mask)
+{
+ int ret = 0, cpu = smp_processor_id();
+
+ if ( cpu_isset(cpu, mask) )
+ {
+ cpu_clear(cpu, mask);
+ raise_softirq(TIMER_SOFTIRQ);
+ ret = 1;
+ }
+
+ if ( !cpus_empty(mask) )
+ {
+ cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
+ ret = 1;
+ }
+ return ret;
+}
+
+static void handle_hpet_broadcast(struct hpet_event_channel *ch)
+{
+ cpumask_t mask;
+ s_time_t now, next_event;
+ int cpu, current_cpu = smp_processor_id();
+
+ spin_lock(&ch->lock);
+
+ if ( cpu_isset(current_cpu, ch->cpumask) )
+ printk(KERN_DEBUG "WARNING: current cpu%d in bc_mask\n", current_cpu);
+again:
+ ch->next_event = STIME_MAX;
+ next_event = STIME_MAX;
+ mask = (cpumask_t)CPU_MASK_NONE;
+ now = NOW();
+
+ /* find all expired events */
+ for_each_cpu_mask(cpu, ch->cpumask)
+ {
+ if ( per_cpu(timer_deadline, cpu) <= now )
+ cpu_set(cpu, mask);
+ else if ( per_cpu(timer_deadline, cpu) < next_event )
+ next_event = per_cpu(timer_deadline, cpu);
+ }
+ if ( per_cpu(timer_deadline, current_cpu) <= now )
+ cpu_set(current_cpu, mask);
+
+ /* wakeup the cpus which have an expired event. */
+ evt_do_broadcast(mask);
+
+ if ( next_event != STIME_MAX )
+ {
+ if ( reprogram_hpet_evt_channel(ch, next_event, now, 0) )
+ goto again;
+ }
+ spin_unlock(&ch->lock);
+}
+
+void hpet_broadcast_init(void)
+{
+ u64 hpet_rate;
+ u32 hpet_id, cfg;
+
+ hpet_rate = hpet_setup();
+ if ( hpet_rate == 0 )
+ return;
+
+ hpet_id = hpet_read32(HPET_ID);
+ if ( !(hpet_id & HPET_ID_LEGSUP) )
+ return;
+
+ /* Start HPET legacy interrupts */
+ cfg = hpet_read32(HPET_CFG);
+ cfg |= HPET_CFG_LEGACY;
+ hpet_write32(cfg, HPET_CFG);
+
+ /* set HPET T0 as oneshot */
+ cfg = hpet_read32(HPET_T0_CFG);
+ cfg &= ~HPET_TN_PERIODIC;
+ cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ hpet_write32(cfg, HPET_T0_CFG);
+
+ /*
+ * The period is a femto seconds value. We need to calculate the scaled
+ * math multiplication factor for nanosecond to hpet tick conversion.
+ */
+ hpet_event.mult = div_sc((unsigned long)hpet_rate, 1000000000ul, 32);
+ hpet_event.shift = 32;
+ hpet_event.next_event = STIME_MAX;
+ hpet_event.event_handler = handle_hpet_broadcast;
+ spin_lock_init(&hpet_event.lock);
+}
+
+void hpet_broadcast_enter(void)
+{
+ struct hpet_event_channel *ch = &hpet_event;
+
+ cpu_set(smp_processor_id(), ch->cpumask);
+
+ spin_lock(&ch->lock);
+
+ /* reprogram if current cpu expire time is nearer */
+ if ( this_cpu(timer_deadline) < ch->next_event )
+ reprogram_hpet_evt_channel(ch, this_cpu(timer_deadline), NOW(), 1);
+
+ spin_unlock(&ch->lock);
+}
+
+void hpet_broadcast_exit(void)
+{
+ struct hpet_event_channel *ch = &hpet_event;
+ int cpu = smp_processor_id();
+
+ if ( cpu_test_and_clear(cpu, ch->cpumask) )
+ reprogram_timer(per_cpu(timer_deadline, cpu));
+}
+
+int hpet_legacy_irq_tick(void)
+{
+ if ( !hpet_event.event_handler )
+ return 0;
+ hpet_event.event_handler(&hpet_event);
+ return 1;
+}
+
+u64 hpet_setup(void)
+{
+ static u64 hpet_rate;
+ static int initialised;
+ u32 hpet_id, hpet_period, cfg;
+ int i;
+
+ if ( initialised )
+ return hpet_rate;
+ initialised = 1;
+
+ if ( hpet_address == 0 )
+ return 0;
+
+ set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
+
+ hpet_id = hpet_read32(HPET_ID);
+ if ( hpet_id == 0 )
+ {
+ printk("BAD HPET vendor id.\n");
+ return 0;
+ }
+
+ /* Check for sane period (100ps <= period <= 100ns). */
+ hpet_period = hpet_read32(HPET_PERIOD);
+ if ( (hpet_period > 100000000) || (hpet_period < 100000) )
+ {
+ printk("BAD HPET period %u.\n", hpet_period);
+ return 0;
+ }
+
+ cfg = hpet_read32(HPET_CFG);
+ cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+ hpet_write32(cfg, HPET_CFG);
+
+ for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
+ {
+ cfg = hpet_read32(HPET_T0_CFG + i*0x20);
+ cfg &= ~HPET_TN_ENABLE;
+ hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
+ }
+
+ cfg = hpet_read32(HPET_CFG);
+ cfg |= HPET_CFG_ENABLE;
+ hpet_write32(cfg, HPET_CFG);
+
+ hpet_rate = 1000000000000000ULL; /* 10^15 */
+ (void)do_div(hpet_rate, hpet_period);
+
+ return hpet_rate;
+}
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/time.c Wed May 21 10:38:57 2008 +0100
@@ -38,7 +38,6 @@ string_param("clocksource", opt_clocksou
#define EPOCH MILLISECS(1000)
unsigned long cpu_khz; /* CPU clock frequency in kHz. */
-unsigned long hpet_address;
DEFINE_SPINLOCK(rtc_lock);
unsigned long pit0_ticks;
static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
@@ -68,7 +67,8 @@ struct platform_timesource {
static DEFINE_PER_CPU(struct cpu_time, cpu_time);
-static u8 tsc_invariant=0; /* TSC is invariant upon C state entry */
+/* TSC is invariant on C state entry? */
+static bool_t tsc_invariant;
/*
* We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -151,6 +151,9 @@ static void timer_interrupt(int irq, voi
{
ASSERT(local_irq_is_enabled());
+ if ( hpet_legacy_irq_tick() )
+ return;
+
/* Only for start-of-day interruopt tests in io_apic.c. */
(*(volatile unsigned long *)&pit0_ticks)++;
@@ -347,47 +350,10 @@ static u32 read_hpet_count(void)
static int init_hpet(struct platform_timesource *pts)
{
- u64 hpet_rate;
- u32 hpet_id, hpet_period, cfg;
- int i;
-
- if ( hpet_address == 0 )
+ u64 hpet_rate = hpet_setup();
+
+ if ( hpet_rate == 0 )
return 0;
-
- set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-
- hpet_id = hpet_read32(HPET_ID);
- if ( hpet_id == 0 )
- {
- printk("BAD HPET vendor id.\n");
- return 0;
- }
-
- /* Check for sane period (100ps <= period <= 100ns). */
- hpet_period = hpet_read32(HPET_PERIOD);
- if ( (hpet_period > 100000000) || (hpet_period < 100000) )
- {
- printk("BAD HPET period %u.\n", hpet_period);
- return 0;
- }
-
- cfg = hpet_read32(HPET_CFG);
- cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
- hpet_write32(cfg, HPET_CFG);
-
- for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
- {
- cfg = hpet_read32(HPET_T0_CFG + i*0x20);
- cfg &= ~HPET_TN_ENABLE;
- hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
- }
-
- cfg = hpet_read32(HPET_CFG);
- cfg |= HPET_CFG_ENABLE;
- hpet_write32(cfg, HPET_CFG);
-
- hpet_rate = 1000000000000000ULL; /* 10^15 */
- (void)do_div(hpet_rate, hpet_period);
pts->name = "HPET";
pts->frequency = hpet_rate;
@@ -1041,7 +1007,14 @@ static int __init disable_pit_irq(void)
outb_p(0x30, PIT_MODE);
outb_p(0, PIT_CH0);
outb_p(0, PIT_CH0);
- }
+
+ /*
+ * If we do not rely on PIT CH0 then we can use HPET for one-shot
+ * timer emulation when entering deep C states.
+ */
+ hpet_broadcast_init();
+ }
+
return 0;
}
__initcall(disable_pit_irq);
diff -r 672c09aad49d -r d795e15b85a7 xen/common/timer.c
--- a/xen/common/timer.c Tue May 20 14:50:45 2008 +0100
+++ b/xen/common/timer.c Wed May 21 10:38:57 2008 +0100
@@ -35,7 +35,7 @@ struct timers {
static DEFINE_PER_CPU(struct timers, timers);
-extern int reprogram_timer(s_time_t timeout);
+DEFINE_PER_CPU(s_time_t, timer_deadline);
/****************************************************************************
* HEAP OPERATIONS.
@@ -323,8 +323,10 @@ static void timer_softirq_action(void)
}
ts->running = NULL;
- }
- while ( !reprogram_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) );
+
+ this_cpu(timer_deadline) = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0;
+ }
+ while ( !reprogram_timer(this_cpu(timer_deadline)) );
spin_unlock_irq(&ts->lock);
}
diff -r 672c09aad49d -r d795e15b85a7 xen/include/asm-x86/hpet.h
--- a/xen/include/asm-x86/hpet.h Tue May 20 14:50:45 2008 +0100
+++ b/xen/include/asm-x86/hpet.h Wed May 21 10:38:57 2008 +0100
@@ -49,4 +49,24 @@
#define hpet_write32(y,x) \
(*(volatile u32 *)(fix_to_virt(FIX_HPET_BASE) + (x)) = (y))
+/*
+ * Detect and initialise HPET hardware: return counter update frequency.
+ * Return value is zero if HPET is unavailable.
+ */
+u64 hpet_setup(void);
+
+/*
+ * Callback from legacy timer (PIT channel 0) IRQ handler.
+ * Returns 1 if tick originated from HPET; else 0.
+ */
+int hpet_legacy_irq_tick(void);
+
+/*
+ * Temporarily use an HPET event counter for timer interrupt handling,
+ * rather than using the LAPIC timer. Used for Cx state entry.
+ */
+void hpet_broadcast_init(void);
+void hpet_broadcast_enter(void);
+void hpet_broadcast_exit(void);
+
#endif /* __X86_HPET_H__ */
diff -r 672c09aad49d -r d795e15b85a7 xen/include/xen/timer.h
--- a/xen/include/xen/timer.h Tue May 20 14:50:45 2008 +0100
+++ b/xen/include/xen/timer.h Wed May 21 10:38:57 2008 +0100
@@ -99,6 +99,15 @@ extern void process_pending_timers(void)
*/
extern void timer_init(void);
+/*
+ * Next timer deadline for each CPU.
+ * Modified only by the local CPU and never in interrupt context.
+ */
+DECLARE_PER_CPU(s_time_t, timer_deadline);
+
+/* Arch-defined function to reprogram timer hardware for new deadline. */
+extern int reprogram_timer(s_time_t timeout);
+
#endif /* _TIMER_H_ */
/*
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|