# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 43564304cf9448ad8978df6d2d0d6721b4615143
# Parent 9697bc63d4039196b15378f3b3fe406c6a445ea2
First cut of new time interfaces and synchronisation mechanisms.
Based on an initial patch from Don Fry at IBM.
Still TODO:
1. Testing
2. NTP synchronisation
3. Fix wallclock interface a bit
4. Support for platform timers other than PIT (e.g., HPET, IBM Cyclone)
5. Scale 64-bit TSC diffs instead of 32-bit, just for sanity
6. Error-correcting scale factor is still slightly wrong
6. More testing
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c Sun Jul 17 14:16:21 2005
+++ b/xen/arch/x86/apic.c Mon Jul 18 20:22:11 2005
@@ -723,16 +723,8 @@
static void __init setup_APIC_timer(unsigned int clocks)
{
unsigned long flags;
-
local_irq_save(flags);
-
- /*
- * Wait for IRQ0's slice:
- */
- wait_timer_tick();
-
__setup_APIC_LVTT(clocks);
-
local_irq_restore(flags);
}
diff -r 9697bc63d403 -r 43564304cf94
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Sun Jul 17
14:16:21 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Mon Jul 18
20:22:11 2005
@@ -19,7 +19,7 @@
s-obj-y :=
obj-y += cpu/
-obj-y += timers/
+#obj-y += timers/
obj-$(CONFIG_ACPI_BOOT) += acpi/
#c-obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
c-obj-$(CONFIG_MCA) += mca.o
diff -r 9697bc63d403 -r 43564304cf94 xen/common/domain.c
--- a/xen/common/domain.c Sun Jul 17 14:16:21 2005
+++ b/xen/common/domain.c Mon Jul 18 20:22:11 2005
@@ -42,8 +42,6 @@
d->domain_id = dom_id;
v->processor = cpu;
- spin_lock_init(&d->time_lock);
-
spin_lock_init(&d->big_lock);
spin_lock_init(&d->page_alloc_lock);
diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c Sun Jul 17 14:16:21 2005
+++ b/xen/arch/x86/vmx_intercept.c Mon Jul 18 20:22:11 2005
@@ -24,10 +24,10 @@
#include <asm/vmx_virpit.h>
#include <asm/vmx_intercept.h>
#include <public/io/ioreq.h>
-
#include <xen/lib.h>
#include <xen/sched.h>
#include <asm/current.h>
+#include <io_ports.h>
#ifdef CONFIG_VMX
@@ -175,7 +175,7 @@
p->port_mm)
return 0;
- if (p->addr == 0x43 &&
+ if (p->addr == PIT_MODE &&
p->dir == 0 && /* write */
((p->u.data >> 4) & 0x3) == 0 && /* latch command */
((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */
@@ -183,7 +183,7 @@
return 1;
}
- if (p->addr == (0x40 + vpit->channel) &&
+ if (p->addr == (PIT_CH0 + vpit->channel) &&
p->dir == 1) { /* read */
p->u.data = pit_read_io(vpit);
resume_pit_io(p);
diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c Sun Jul 17 14:16:21 2005
+++ b/xen/arch/x86/i8259.c Mon Jul 18 20:22:11 2005
@@ -19,7 +19,7 @@
#include <asm/bitops.h>
#include <xen/delay.h>
#include <asm/apic.h>
-
+#include <io_ports.h>
/*
* Common place to define all x86 IRQ vectors
@@ -395,9 +395,9 @@
/* Set the clock to HZ Hz */
#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
- outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- outb(LATCH >> 8 , 0x40); /* MSB */
+ outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
+ outb(LATCH >> 8, PIT_CH0); /* MSB */
setup_irq(2, &cascade);
}
diff -r 9697bc63d403 -r 43564304cf94 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Sun Jul 17 14:16:21 2005
+++ b/xen/common/page_alloc.c Mon Jul 18 20:22:11 2005
@@ -351,10 +351,10 @@
void scrub_heap_pages(void)
{
void *p;
- unsigned long pfn, flags;
+ unsigned long pfn;
+ int cpu = smp_processor_id();
printk("Scrubbing Free RAM: ");
- watchdog_disable();
for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
{
@@ -362,12 +362,15 @@
if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
printk(".");
+ if ( unlikely(softirq_pending(cpu)) )
+ do_softirq();
+
/* Quick lock-free check. */
if ( allocated_in_map(pfn) )
continue;
-
- spin_lock_irqsave(&heap_lock, flags);
-
+
+ spin_lock_irq(&heap_lock);
+
/* Re-check page status with lock held. */
if ( !allocated_in_map(pfn) )
{
@@ -385,11 +388,10 @@
unmap_domain_page(p);
}
}
-
- spin_unlock_irqrestore(&heap_lock, flags);
- }
-
- watchdog_enable();
+
+ spin_unlock_irq(&heap_lock);
+ }
+
printk("done.\n");
}
diff -r 9697bc63d403 -r 43564304cf94 xen/common/ac_timer.c
--- a/xen/common/ac_timer.c Sun Jul 17 14:16:21 2005
+++ b/xen/common/ac_timer.c Mon Jul 18 20:22:11 2005
@@ -202,7 +202,7 @@
do {
heap = ac_timers[cpu].heap;
now = NOW();
-
+
while ( (GET_HEAP_SIZE(heap) != 0) &&
((t = heap[1])->expires < (now + TIMER_SLOP)) )
{
diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Sun Jul 17 14:16:21 2005
+++ b/xen/arch/x86/smpboot.c Mon Jul 18 20:22:11 2005
@@ -40,6 +40,7 @@
#include <xen/sched.h>
#include <xen/irq.h>
#include <xen/delay.h>
+#include <xen/softirq.h>
#include <asm/current.h>
#include <asm/mc146818rtc.h>
#include <asm/desc.h>
@@ -406,6 +407,7 @@
*/
if (cpu_has_tsc && cpu_khz)
synchronize_tsc_ap();
+ calibrate_tsc_ap();
}
int cpucount;
@@ -464,6 +466,8 @@
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
+
+ init_percpu_time();
wmb();
startup_cpu_idle_loop();
@@ -1149,6 +1153,7 @@
*/
if (cpu_has_tsc && cpucount && cpu_khz)
synchronize_tsc_bp();
+ calibrate_tsc_bp();
}
/* These are wrappers to interface to the new boot process. Someone
@@ -1167,22 +1172,21 @@
int __devinit __cpu_up(unsigned int cpu)
{
/* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask)) {
- local_irq_enable();
+ if (cpu_isset(cpu, smp_commenced_mask))
return -ENOSYS;
- }
/* In case one didn't come up */
- if (!cpu_isset(cpu, cpu_callin_map)) {
- local_irq_enable();
+ if (!cpu_isset(cpu, cpu_callin_map))
return -EIO;
- }
-
- local_irq_enable();
+
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
- while (!cpu_isset(cpu, cpu_online_map))
+ while (!cpu_isset(cpu, cpu_online_map)) {
mb();
+ if (softirq_pending(0))
+ do_softirq();
+ }
+
return 0;
}
diff -r 9697bc63d403 -r 43564304cf94 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Sun Jul 17 14:16:21 2005
+++ b/xen/include/xen/sched.h Mon Jul 18 20:22:11 2005
@@ -92,7 +92,6 @@
domid_t domain_id;
shared_info_t *shared_info; /* shared data area */
- spinlock_t time_lock;
spinlock_t big_lock;
diff -r 9697bc63d403 -r 43564304cf94 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Sun Jul 17 14:16:21 2005
+++ b/xen/drivers/char/console.c Mon Jul 18 20:22:11 2005
@@ -635,8 +635,6 @@
debugtrace_bytes = bytes;
- memset(debugtrace_buf, '\0', debugtrace_bytes);
-
return 0;
}
__initcall(debugtrace_init);
diff -r 9697bc63d403 -r 43564304cf94 xen/include/xen/time.h
--- a/xen/include/xen/time.h Sun Jul 17 14:16:21 2005
+++ b/xen/include/xen/time.h Mon Jul 18 20:22:11 2005
@@ -30,7 +30,8 @@
#include <public/xen.h>
#include <asm/time.h>
-extern int init_xen_time();
+extern int init_xen_time(void);
+extern void init_percpu_time(void);
extern unsigned long cpu_khz;
diff -r 9697bc63d403 -r 43564304cf94 xen/include/public/xen.h
--- a/xen/include/public/xen.h Sun Jul 17 14:16:21 2005
+++ b/xen/include/public/xen.h Mon Jul 18 20:22:11 2005
@@ -329,12 +329,36 @@
#endif
} vcpu_info_t;
+typedef struct vcpu_time_info {
+ /*
+ * The following values are updated periodically (and not necessarily
+ * atomically!). The guest OS detects this because 'time_version1' is
+ * incremented just before updating these values, and 'time_version2' is
+ * incremented immediately after. See the Xen-specific Linux code for an
+ * example of how to read these values safely (arch/xen/kernel/time.c).
+ */
+ u32 time_version1;
+ u32 time_version2;
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_time; /* Time, in nanosecs, since boot. */
+ /*
+ * Current system time:
+ * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul
+ * CPU frequency (Hz):
+ * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
+ */
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+} vcpu_time_info_t;
+
/*
* Xen/kernel shared data -- pointer provided in start_info.
* NB. We expect that this struct is smaller than a page.
*/
typedef struct shared_info {
vcpu_info_t vcpu_data[MAX_VIRT_CPUS];
+
+ vcpu_time_info_t vcpu_time[MAX_VIRT_CPUS];
u32 n_vcpu;
@@ -373,33 +397,11 @@
u32 evtchn_mask[32];
/*
- * Time: The following abstractions are exposed: System Time, Clock Time,
- * Domain Virtual Time. Domains can access Cycle counter time directly.
+ * Wallclock time: updated only by control software. Guests should base
+ * their gettimeofday() syscall on this wallclock-base value.
*/
- u64 cpu_freq; /* CPU frequency (Hz). */
-
- /*
- * The following values are updated periodically (and not necessarily
- * atomically!). The guest OS detects this because 'time_version1' is
- * incremented just before updating these values, and 'time_version2' is
- * incremented immediately after. See the Xen-specific Linux code for an
- * example of how to read these values safely (arch/xen/kernel/time.c).
- */
- u32 time_version1;
- u32 time_version2;
- tsc_timestamp_t tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_time; /* Time, in nanosecs, since boot. */
u32 wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
u32 wc_usec; /* Usecs 00:00:00 UTC, Jan 1, 1970. */
- u64 domain_time; /* Domain virtual time, in nanosecs. */
-
- /*
- * Timeout values:
- * Allow a domain to specify a timeout value in system time and
- * domain virtual time.
- */
- u64 wall_timeout;
- u64 domain_timeout;
arch_shared_info_t arch;
diff -r 9697bc63d403 -r 43564304cf94
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Sun Jul 17
14:16:21 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Mon Jul 18
20:22:11 2005
@@ -15,7 +15,7 @@
ptrace.o quirks.o syscall.o bootflag.o
i386-obj-y := time.o
-obj-y += ../../i386/kernel/timers/
+#obj-y += ../../i386/kernel/timers/
s-obj-y :=
diff -r 9697bc63d403 -r 43564304cf94 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Sun Jul 17 14:16:21 2005
+++ b/xen/arch/x86/time.c Mon Jul 18 20:22:11 2005
@@ -1,16 +1,12 @@
-/****************************************************************************
- * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
- * (C) 2002-2003 University of Cambridge
- ****************************************************************************
- *
- * File: i386/time.c
- * Author: Rolf Neugebar & Keir Fraser
- */
-
-/*
- * linux/arch/i386/kernel/time.c
- *
- * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+/******************************************************************************
+ * arch/x86/time.c
+ *
+ * Per-CPU time calibration and management.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ *
+ * Portions from Linux are:
+ * Copyright (c) 1991, 1992, 1995 Linus Torvalds
*/
#include <xen/config.h>
@@ -31,29 +27,74 @@
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <asm/mc146818rtc.h>
-
-/* GLOBAL */
+#include <asm/div64.h>
+#include <io_ports.h>
+
unsigned long cpu_khz; /* CPU clock frequency in kHz. */
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
int timer_ack = 0;
unsigned long volatile jiffies;
-
-/* PRIVATE */
-static unsigned int rdtsc_bitshift; /* Which 32 bits of TSC do we use? */
-static u64 cpu_freq; /* CPU frequency (Hz) */
-static u32 st_scale_f; /* Cycles -> ns, fractional part */
-static u32 st_scale_i; /* Cycles -> ns, integer part */
-static u32 shifted_tsc_irq; /* CPU0's TSC at last 'time update' */
-static u64 full_tsc_irq; /* ...ditto, but all 64 bits */
-static s_time_t stime_irq; /* System time at last 'time update' */
-static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
-static rwlock_t time_lock = RW_LOCK_UNLOCKED;
+static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */
+
+struct time_scale {
+ int shift;
+ u32 mul_frac;
+};
+
+struct cpu_time {
+ u64 local_tsc_stamp;
+ s_time_t stime_local_stamp;
+ s_time_t stime_master_stamp;
+ struct time_scale tsc_scale;
+ struct ac_timer calibration_timer;
+} __cacheline_aligned;
+
+static struct cpu_time cpu_time[NR_CPUS];
+
+/* Protected by platform_timer_lock. */
+static s_time_t stime_platform_stamp;
+static u64 platform_timer_stamp;
+static struct time_scale platform_timer_scale;
+static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED;
+
+static inline u32 down_shift(u64 time, int shift)
+{
+ if ( shift < 0 )
+ return (u32)(time >> -shift);
+ return (u32)((u32)time << shift);
+}
+
+/*
+ * 32-bit division of integer dividend and integer divisor yielding
+ * 32-bit fractional quotient.
+ */
+static inline u32 div_frac(u32 dividend, u32 divisor)
+{
+ u32 quotient, remainder;
+ ASSERT(dividend < divisor);
+ __asm__ (
+ "div %4"
+ : "=a" (quotient), "=d" (remainder)
+ : "0" (0), "1" (dividend), "r" (divisor) );
+ return quotient;
+}
+
+/*
+ * 32-bit multiplication of integer multiplicand and fractional multiplier
+ * yielding 32-bit integer product.
+ */
+static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
+{
+ u32 product_int, product_frac;
+ __asm__ (
+ "mul %3"
+ : "=a" (product_frac), "=d" (product_int)
+ : "0" (multiplicand), "r" (multiplier) );
+ return product_int;
+}
void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
{
- write_lock_irq(&time_lock);
-
-#ifdef CONFIG_X86_IO_APIC
if ( timer_ack )
{
extern spinlock_t i8259A_lock;
@@ -63,30 +104,9 @@
inb(0x20);
spin_unlock(&i8259A_lock);
}
-#endif
- /*
- * Updates TSC timestamp (used to interpolate passage of time between
- * interrupts).
- */
- rdtscll(full_tsc_irq);
- shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
-
/* Update jiffies counter. */
(*(unsigned long *)&jiffies)++;
-
- /* Update wall time. */
- wc_usec += 1000000/HZ;
- if ( wc_usec >= 1000000 )
- {
- wc_usec -= 1000000;
- wc_sec++;
- }
-
- /* Updates system time (nanoseconds since boot). */
- stime_irq += MILLISECS(1000/HZ);
-
- write_unlock_irq(&time_lock);
/* Rough hack to allow accurate timers to sort-of-work with no APIC. */
if ( !cpu_has_apic )
@@ -103,9 +123,9 @@
#define CALIBRATE_FRAC 20 /* calibrate over 50ms */
#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
-static unsigned long __init calibrate_tsc(void)
-{
- u64 start, end, diff;
+static u64 calibrate_boot_tsc(void)
+{
+ u64 start, end;
unsigned long count;
/* Set the Gate high, disable speaker */
@@ -118,9 +138,9 @@
* terminal count mode), binary count, load 5 * LATCH count, (LSB and MSB)
* to begin countdown.
*/
- outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
- outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
- outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
+ outb(0xb0, PIT_MODE); /* binary, mode 0, LSB/MSB, Ch 2 */
+ outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
+ outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */
rdtscll(start);
for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
@@ -131,15 +151,147 @@
if ( count == 0 )
return 0;
- diff = end - start;
-
-#if defined(__i386__)
- /* If quotient doesn't fit in 32 bits then we return error (zero). */
- if ( diff & ~0xffffffffULL )
- return 0;
-#endif
-
- return (unsigned long)diff;
+ return ((end - start) * (u64)CALIBRATE_FRAC);
+}
+
+static void set_time_scale(struct time_scale *ts, u64 ticks_per_sec)
+{
+ u64 tps64 = ticks_per_sec;
+ u32 tps32;
+ int shift = 0;
+
+ while ( tps64 > (MILLISECS(1000)*2) )
+ {
+ tps64 >>= 1;
+ shift--;
+ }
+
+ tps32 = (u32)tps64;
+ while ( tps32 < (u32)MILLISECS(1000) )
+ {
+ tps32 <<= 1;
+ shift++;
+ }
+
+ ts->mul_frac = div_frac(MILLISECS(1000), tps32);
+ ts->shift = shift;
+}
+
+static atomic_t tsc_calibrate_gang = ATOMIC_INIT(0);
+static unsigned int tsc_calibrate_status = 0;
+
+void calibrate_tsc_bp(void)
+{
+ while ( atomic_read(&tsc_calibrate_gang) != (num_booting_cpus() - 1) )
+ mb();
+
+ outb(CALIBRATE_LATCH & 0xff, PIT_CH2);
+ outb(CALIBRATE_LATCH >> 8, PIT_CH2);
+
+ tsc_calibrate_status = 1;
+ wmb();
+
+ while ( (inb(0x61) & 0x20) == 0 )
+ continue;
+
+ tsc_calibrate_status = 2;
+ wmb();
+
+ while ( atomic_read(&tsc_calibrate_gang) != 0 )
+ mb();
+}
+
+void calibrate_tsc_ap(void)
+{
+ u64 t1, t2, ticks_per_sec;
+
+ atomic_inc(&tsc_calibrate_gang);
+
+ while ( tsc_calibrate_status < 1 )
+ mb();
+
+ rdtscll(t1);
+
+ while ( tsc_calibrate_status < 2 )
+ mb();
+
+ rdtscll(t2);
+
+ ticks_per_sec = (t2 - t1) * (u64)CALIBRATE_FRAC;
+ set_time_scale(&cpu_time[smp_processor_id()].tsc_scale, ticks_per_sec);
+
+ atomic_dec(&tsc_calibrate_gang);
+}
+
+/* Protected by platform_timer_lock. */
+static u64 platform_pit_counter;
+static u16 pit_stamp;
+static struct ac_timer pit_overflow_timer;
+
+static u16 pit_read_counter(void)
+{
+ u16 count;
+ ASSERT(spin_is_locked(&platform_timer_lock));
+ outb(0x80, PIT_MODE);
+ count = inb(PIT_CH2);
+ count |= inb(PIT_CH2) << 8;
+ return count;
+}
+
+static void pit_overflow(void *unused)
+{
+ u16 counter;
+
+ spin_lock(&platform_timer_lock);
+ counter = pit_read_counter();
+ platform_pit_counter += (u16)(pit_stamp - counter);
+ pit_stamp = counter;
+ spin_unlock(&platform_timer_lock);
+
+ set_ac_timer(&pit_overflow_timer, NOW() + MILLISECS(20));
+}
+
+static void init_platform_timer(void)
+{
+ init_ac_timer(&pit_overflow_timer, pit_overflow, NULL, 0);
+ pit_overflow(NULL);
+ platform_timer_stamp = platform_pit_counter;
+ set_time_scale(&platform_timer_scale, CLOCK_TICK_RATE);
+}
+
+static s_time_t __read_platform_stime(u64 platform_time)
+{
+ u64 diff64 = platform_time - platform_timer_stamp;
+ u32 diff = down_shift(diff64, platform_timer_scale.shift);
+ ASSERT(spin_is_locked(&platform_timer_lock));
+ return (stime_platform_stamp +
+ (u64)mul_frac(diff, platform_timer_scale.mul_frac));
+}
+
+static s_time_t read_platform_stime(void)
+{
+ u64 counter;
+ s_time_t stime;
+
+ spin_lock(&platform_timer_lock);
+ counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter());
+ stime = __read_platform_stime(counter);
+ spin_unlock(&platform_timer_lock);
+
+ return stime;
+}
+
+static void platform_time_calibration(void)
+{
+ u64 counter;
+ s_time_t stamp;
+
+ spin_lock(&platform_timer_lock);
+ counter = platform_pit_counter + (u16)(pit_stamp - pit_read_counter());
+ stamp = __read_platform_stime(counter);
+ stime_platform_stamp = stamp;
+ platform_timer_stamp = counter;
+ spin_unlock(&platform_timer_lock);
}
@@ -233,140 +385,214 @@
* System Time
***************************************************************************/
-static inline u64 get_time_delta(void)
-{
- s32 delta_tsc;
- u32 low;
- u64 delta, tsc;
-
- ASSERT(st_scale_f || st_scale_i);
+s_time_t get_s_time(void)
+{
+ struct cpu_time *t = &cpu_time[smp_processor_id()];
+ u64 tsc;
+ u32 delta;
+ s_time_t now;
rdtscll(tsc);
- low = (u32)(tsc >> rdtsc_bitshift);
- delta_tsc = (s32)(low - shifted_tsc_irq);
- if ( unlikely(delta_tsc < 0) ) delta_tsc = 0;
- delta = ((u64)delta_tsc * st_scale_f);
- delta >>= 32;
- delta += ((u64)delta_tsc * st_scale_i);
-
- return delta;
-}
-
-s_time_t get_s_time(void)
-{
- s_time_t now;
- unsigned long flags;
-
- read_lock_irqsave(&time_lock, flags);
-
- now = stime_irq + get_time_delta();
-
- /* Ensure that the returned system time is monotonically increasing. */
- {
- static s_time_t prev_now = 0;
- if ( unlikely(now < prev_now) )
- now = prev_now;
- prev_now = now;
- }
-
- read_unlock_irqrestore(&time_lock, flags);
-
- return now;
+ delta = down_shift(tsc - t->local_tsc_stamp, t->tsc_scale.shift);
+ now = t->stime_local_stamp + (u64)mul_frac(delta, t->tsc_scale.mul_frac);
+
+ return now;
}
static inline void __update_dom_time(struct vcpu *v)
{
- struct domain *d = v->domain;
- shared_info_t *si = d->shared_info;
-
- spin_lock(&d->time_lock);
-
- si->time_version1++;
+ struct cpu_time *t = &cpu_time[smp_processor_id()];
+ struct vcpu_time_info *u = &v->domain->shared_info->vcpu_time[v->vcpu_id];
+
+ u->time_version1++;
wmb();
- si->cpu_freq = cpu_freq;
- si->tsc_timestamp = full_tsc_irq;
- si->system_time = stime_irq;
- si->wc_sec = wc_sec;
- si->wc_usec = wc_usec;
+ u->tsc_timestamp = t->local_tsc_stamp;
+ u->system_time = t->stime_local_stamp;
+ u->tsc_to_system_mul = t->tsc_scale.mul_frac;
+ u->tsc_shift = (s8)t->tsc_scale.shift;
wmb();
- si->time_version2++;
-
- spin_unlock(&d->time_lock);
+ u->time_version2++;
+
+ /* Should only do this during do_settime(). */
+ v->domain->shared_info->wc_sec = wc_sec;
+ v->domain->shared_info->wc_usec = wc_usec;
}
void update_dom_time(struct vcpu *v)
{
- unsigned long flags;
-
- if ( v->domain->shared_info->tsc_timestamp != full_tsc_irq )
- {
- read_lock_irqsave(&time_lock, flags);
+ if ( v->domain->shared_info->vcpu_time[v->vcpu_id].tsc_timestamp !=
+ cpu_time[smp_processor_id()].local_tsc_stamp )
__update_dom_time(v);
- read_unlock_irqrestore(&time_lock, flags);
- }
}
/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
{
- s64 delta;
- long _usecs = (long)usecs;
-
- write_lock_irq(&time_lock);
-
- delta = (s64)(stime_irq - system_time_base);
-
- _usecs += (long)(delta/1000);
- while ( _usecs >= 1000000 )
- {
- _usecs -= 1000000;
- secs++;
- }
-
- wc_sec = secs;
- wc_usec = _usecs;
-
- /* Others will pick up the change at the next tick. */
+ u64 x, base_usecs;
+ u32 y;
+
+ base_usecs = system_time_base;
+ do_div(base_usecs, 1000);
+
+ x = (secs * 1000000ULL) + (u64)usecs + base_usecs;
+ y = do_div(x, 1000000);
+
+ wc_sec = (unsigned long)x;
+ wc_usec = (unsigned long)y;
+
__update_dom_time(current);
- send_guest_virq(current, VIRQ_TIMER);
-
- write_unlock_irq(&time_lock);
-}
-
+}
+
+static void local_time_calibration(void *unused)
+{
+ unsigned int cpu = smp_processor_id();
+
+ /*
+ * System timestamps, extrapolated from local and master oscillators,
+ * taken during this calibration and the previous calibration.
+ */
+ s_time_t prev_local_stime, curr_local_stime;
+ s_time_t prev_master_stime, curr_master_stime;
+
+ /* TSC timestamps taken during this calibration and prev calibration. */
+ u64 prev_tsc, curr_tsc;
+
+ /*
+ * System time and TSC ticks elapsed during the previous calibration
+ * 'epoch'. Also the accumulated error in the local estimate. All these
+ * values end up down-shifted to fit in 32 bits.
+ */
+ u64 stime_elapsed64, tsc_elapsed64, local_stime_error64;
+ u32 stime_elapsed32, tsc_elapsed32, local_stime_error32;
+
+ /* Calculated TSC shift to ensure 32-bit scale multiplier. */
+ int tsc_shift = 0;
+
+ prev_tsc = cpu_time[cpu].local_tsc_stamp;
+ prev_local_stime = cpu_time[cpu].stime_local_stamp;
+ prev_master_stime = cpu_time[cpu].stime_master_stamp;
+
+ /* Disable IRQs to get 'instantaneous' current timestamps. */
+ local_irq_disable();
+ rdtscll(curr_tsc);
+ curr_local_stime = get_s_time();
+ curr_master_stime = read_platform_stime();
+ local_irq_enable();
+
+#if 0
+ printk("PRE%d: tsc=%lld stime=%lld master=%lld\n",
+ cpu, prev_tsc, prev_local_stime, prev_master_stime);
+ printk("CUR%d: tsc=%lld stime=%lld master=%lld %lld\n",
+ cpu, curr_tsc, curr_local_stime, curr_master_stime,
+ platform_pit_counter);
+#endif
+
+ /* Local time warps forward if it lags behind master time. */
+ if ( curr_local_stime < curr_master_stime )
+ curr_local_stime = curr_master_stime;
+
+ stime_elapsed64 = curr_master_stime - prev_master_stime;
+ tsc_elapsed64 = curr_tsc - prev_tsc;
+
+ /*
+ * Error in the local system time estimate. Clamp to epoch time period, or
+ * we could end up with a negative scale factor (time going backwards!).
+ * This effectively clamps the scale factor to >= 0.
+ */
+ local_stime_error64 = curr_local_stime - curr_master_stime;
+ if ( local_stime_error64 > stime_elapsed64 )
+ local_stime_error64 = stime_elapsed64;
+
+ /*
+ * We require 0 < stime_elapsed < 2^31.
+ * This allows us to binary shift a 32-bit tsc_elapsed such that:
+ * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
+ */
+ while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
+ ((s32)stime_elapsed64 < 0) )
+ {
+ stime_elapsed64 >>= 1;
+ tsc_elapsed64 >>= 1;
+ local_stime_error64 >>= 1;
+ }
+
+ /* stime_master_diff (and hence stime_error) now fit in a 32-bit word. */
+ stime_elapsed32 = (u32)stime_elapsed64;
+ local_stime_error32 = (u32)local_stime_error64;
+
+ /* tsc_elapsed <= 2*stime_elapsed */
+ while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
+ {
+ tsc_elapsed64 >>= 1;
+ tsc_shift--;
+ }
+
+ /* Local difference must now fit in 32 bits. */
+ ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
+ tsc_elapsed32 = (u32)tsc_elapsed64;
+
+ /* tsc_elapsed > stime_elapsed */
+ ASSERT(tsc_elapsed32 != 0);
+ while ( tsc_elapsed32 <= stime_elapsed32 )
+ {
+ tsc_elapsed32 <<= 1;
+ tsc_shift++;
+ }
+
+#if 0
+ printk("---%d: %08x %d\n", cpu,
+ div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32),
+ tsc_shift);
+#endif
+
+ /* Record new timestamp information. */
+ cpu_time[cpu].tsc_scale.mul_frac =
+ div_frac(stime_elapsed32 - local_stime_error32, tsc_elapsed32);
+ cpu_time[cpu].tsc_scale.shift = tsc_shift;
+ cpu_time[cpu].local_tsc_stamp = curr_tsc;
+ cpu_time[cpu].stime_local_stamp = curr_local_stime;
+ cpu_time[cpu].stime_master_stamp = curr_master_stime;
+
+ set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000));
+
+ if ( cpu == 0 )
+ platform_time_calibration();
+}
+
+void init_percpu_time(void)
+{
+ unsigned int cpu = smp_processor_id();
+ unsigned long flags;
+ s_time_t now;
+
+ local_irq_save(flags);
+ rdtscll(cpu_time[cpu].local_tsc_stamp);
+ now = (cpu == 0) ? 0 : read_platform_stime();
+ local_irq_restore(flags);
+
+ cpu_time[cpu].stime_master_stamp = now;
+ cpu_time[cpu].stime_local_stamp = now;
+
+ init_ac_timer(&cpu_time[cpu].calibration_timer,
+ local_time_calibration, NULL, cpu);
+ set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + MILLISECS(1000));
+}
/* Late init function (after all CPUs are booted). */
-int __init init_xen_time()
-{
- u64 scale;
- unsigned int cpu_ghz;
-
- cpu_ghz = (unsigned int)(cpu_freq / 1000000000ULL);
- for ( rdtsc_bitshift = 0; cpu_ghz != 0; rdtsc_bitshift++, cpu_ghz >>= 1 )
- continue;
-
- scale = 1000000000LL << (32 + rdtsc_bitshift);
- scale /= cpu_freq;
- st_scale_f = scale & 0xffffffff;
- st_scale_i = scale >> 32;
+int __init init_xen_time(void)
+{
+ wc_sec = get_cmos_time();
local_irq_disable();
- /* System time ticks from zero. */
- rdtscll(full_tsc_irq);
- stime_irq = (s_time_t)0;
- shifted_tsc_irq = (u32)(full_tsc_irq >> rdtsc_bitshift);
-
- /* Wallclock time starts as the initial RTC time. */
- wc_sec = get_cmos_time();
+ init_percpu_time();
+
+ stime_platform_stamp = 0;
+ init_platform_timer();
local_irq_enable();
-
- printk("Time init:\n");
- printk(".... cpu_freq: %08X:%08X\n", (u32)(cpu_freq>>32),(u32)cpu_freq);
- printk(".... scale: %08X:%08X\n", (u32)(scale>>32),(u32)scale);
- printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec);
return 0;
}
@@ -375,15 +601,12 @@
/* Early init function. */
void __init early_time_init(void)
{
- unsigned long ticks_per_frac = calibrate_tsc();
-
- if ( !ticks_per_frac )
- panic("Error calibrating TSC\n");
-
- cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
-
- cpu_freq = (u64)ticks_per_frac * (u64)CALIBRATE_FRAC;
-
+ u64 tmp = calibrate_boot_tsc();
+
+ set_time_scale(&cpu_time[0].tsc_scale, tmp);
+
+ do_div(tmp, 1000);
+ cpu_khz = (unsigned long)tmp;
printk("Detected %lu.%03lu MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
diff -r 9697bc63d403 -r 43564304cf94 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h Sun Jul 17 14:16:21 2005
+++ b/xen/include/asm-x86/time.h Mon Jul 18 20:22:11 2005
@@ -4,4 +4,7 @@
extern int timer_ack;
+extern void calibrate_tsc_bp(void);
+extern void calibrate_tsc_ap(void);
+
#endif /* __X86_TIME_H__ */
diff -r 9697bc63d403 -r 43564304cf94
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Sun Jul 17 14:16:21 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Mon Jul 18 20:22:11 2005
@@ -104,24 +104,16 @@
struct timer_opts *cur_timer = &timer_tsc;
/* These are peridically updated in shared_info, and then copied here. */
-u32 shadow_tsc_stamp;
-u64 shadow_system_time;
-static u32 shadow_time_version;
+struct shadow_time_info {
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_timestamp; /* Time, in nanosecs, since boot. */
+ u32 tsc_to_nsec_mul;
+ u32 tsc_to_usec_mul;
+ int tsc_shift;
+ u32 version;
+};
+static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
static struct timeval shadow_tv;
-
-/*
- * We use this to ensure that gettimeofday() is monotonically increasing. We
- * only break this guarantee if the wall clock jumps backwards "a long way".
- */
-static struct timeval last_seen_tv = {0,0};
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
-/* Periodically propagate synchronised time base to the RTC and to Xen. */
-static long last_rtc_update, last_update_to_xen;
-#endif
-
-/* Periodically take synchronised time base from Xen, if we need it. */
-static long last_update_from_xen; /* UTC seconds when last read Xen clock. */
/* Keep track of last time we did processing/updating of jiffies and xtime. */
static u64 processed_system_time; /* System time (ns) at last processing. */
@@ -164,26 +156,147 @@
#define INDEPENDENT_WALLCLOCK() \
(independent_wallclock || (xen_start_info.flags & SIF_INITDOMAIN))
+int tsc_disable __initdata = 0;
+
+static void delay_tsc(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ rdtscl(bclock);
+ do
+ {
+ rep_nop();
+ rdtscl(now);
+ } while ((now-bclock) < loops);
+}
+
+struct timer_opts timer_tsc = {
+ .name = "tsc",
+ .delay = delay_tsc,
+};
+
+static inline u32 down_shift(u64 time, int shift)
+{
+ if ( shift < 0 )
+ return (u32)(time >> -shift);
+ return (u32)((u32)time << shift);
+}
+
+/*
+ * 32-bit multiplication of integer multiplicand and fractional multiplier
+ * yielding 32-bit integer product.
+ */
+static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
+{
+ u32 product_int, product_frac;
+ __asm__ (
+ "mul %3"
+ : "=a" (product_frac), "=d" (product_int)
+ : "0" (multiplicand), "r" (multiplier) );
+ return product_int;
+}
+
+void init_cpu_khz(void)
+{
+ u64 __cpu_khz = 1000000ULL << 32;
+ struct vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_time[0];
+ do_div(__cpu_khz, info->tsc_to_system_mul);
+ cpu_khz = down_shift(__cpu_khz, -info->tsc_shift);
+ printk(KERN_INFO "Xen reported: %lu.%03lu MHz processor.\n",
+ cpu_khz / 1000, cpu_khz % 1000);
+}
+
+static u64 get_nsec_offset(struct shadow_time_info *shadow)
+{
+ u64 now;
+ u32 delta;
+ rdtscll(now);
+ delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
+ return mul_frac(delta, shadow->tsc_to_nsec_mul);
+}
+
+static unsigned long get_usec_offset(struct shadow_time_info *shadow)
+{
+ u64 now;
+ u32 delta;
+ rdtscll(now);
+ delta = down_shift(now - shadow->tsc_timestamp, shadow->tsc_shift);
+ return mul_frac(delta, shadow->tsc_to_usec_mul);
+}
+
+static void update_wallclock(void)
+{
+ shared_info_t *s = HYPERVISOR_shared_info;
+ long wtm_nsec;
+ time_t wtm_sec, sec;
+ s64 nsec;
+
+ shadow_tv.tv_sec = s->wc_sec;
+ shadow_tv.tv_usec = s->wc_usec;
+
+ if (INDEPENDENT_WALLCLOCK())
+ return;
+
+ if ((time_status & STA_UNSYNC) != 0)
+ return;
+
+ /* Adjust shadow for jiffies that haven't updated xtime yet. */
+ shadow_tv.tv_usec -=
+ (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
+ HANDLE_USEC_UNDERFLOW(shadow_tv);
+
+ /* Update our unsynchronised xtime appropriately. */
+ sec = shadow_tv.tv_sec;
+ nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
+
+ __normalize_time(&sec, &nsec);
+ wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
+ wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
+
+ set_normalized_timespec(&xtime, sec, nsec);
+ set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
+}
+
/*
* Reads a consistent set of time-base values from Xen, into a shadow data
* area. Must be called with the xtime_lock held for writing.
*/
static void __get_time_values_from_xen(void)
{
- shared_info_t *s = HYPERVISOR_shared_info;
+ shared_info_t *s = HYPERVISOR_shared_info;
+ struct vcpu_time_info *src;
+ struct shadow_time_info *dst;
+
+ src = &s->vcpu_time[smp_processor_id()];
+ dst = &per_cpu(shadow_time, smp_processor_id());
do {
- shadow_time_version = s->time_version2;
+ dst->version = src->time_version2;
rmb();
- shadow_tv.tv_sec = s->wc_sec;
- shadow_tv.tv_usec = s->wc_usec;
- shadow_tsc_stamp = (u32)s->tsc_timestamp;
- shadow_system_time = s->system_time;
+ dst->tsc_timestamp = src->tsc_timestamp;
+ dst->system_timestamp = src->system_time;
+ dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
+ dst->tsc_shift = src->tsc_shift;
rmb();
}
- while (shadow_time_version != s->time_version1);
-
- cur_timer->mark_offset();
+ while (dst->version != src->time_version1);
+
+ dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
+
+ if ((shadow_tv.tv_sec != s->wc_sec) ||
+ (shadow_tv.tv_usec != s->wc_usec))
+ update_wallclock();
+}
+
+static inline int time_values_up_to_date(int cpu)
+{
+ struct vcpu_time_info *src;
+ struct shadow_time_info *dst;
+
+ src = &HYPERVISOR_shared_info->vcpu_time[smp_processor_id()];
+ dst = &per_cpu(shadow_time, smp_processor_id());
+
+ return (dst->version == src->time_version2);
}
#define TIME_VALUES_UP_TO_DATE \
@@ -229,13 +342,18 @@
unsigned long max_ntp_tick;
unsigned long flags;
s64 nsec;
+ unsigned int cpu;
+ struct shadow_time_info *shadow;
+
+ cpu = get_cpu();
+ shadow = &per_cpu(shadow_time, cpu);
do {
unsigned long lost;
seq = read_seqbegin(&xtime_lock);
- usec = cur_timer->get_offset();
+ usec = get_usec_offset(shadow);
lost = jiffies - wall_jiffies;
/*
@@ -256,11 +374,11 @@
sec = xtime.tv_sec;
usec += (xtime.tv_nsec / NSEC_PER_USEC);
- nsec = shadow_system_time - processed_system_time;
+ nsec = shadow->system_timestamp - processed_system_time;
__normalize_time(&sec, &nsec);
usec += (long)nsec / NSEC_PER_USEC;
- if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
+ if (unlikely(!time_values_up_to_date(cpu))) {
/*
* We may have blocked for a long time,
* rendering our calculations invalid
@@ -275,19 +393,11 @@
}
} while (read_seqretry(&xtime_lock, seq));
+ put_cpu();
+
while (usec >= USEC_PER_SEC) {
usec -= USEC_PER_SEC;
sec++;
- }
-
- /* Ensure that time-of-day is monotonically increasing. */
- if ((sec < last_seen_tv.tv_sec) ||
- ((sec == last_seen_tv.tv_sec) && (usec < last_seen_tv.tv_usec))) {
- sec = last_seen_tv.tv_sec;
- usec = last_seen_tv.tv_usec;
- } else {
- last_seen_tv.tv_sec = sec;
- last_seen_tv.tv_usec = usec;
}
tv->tv_sec = sec;
@@ -302,12 +412,17 @@
long wtm_nsec;
s64 nsec;
struct timespec xentime;
+ unsigned int cpu;
+ struct shadow_time_info *shadow;
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
if (!INDEPENDENT_WALLCLOCK())
return 0; /* Silent failure? */
+
+ cpu = get_cpu();
+ shadow = &per_cpu(shadow_time, cpu);
write_seqlock_irq(&xtime_lock);
@@ -317,9 +432,8 @@
* be stale, so we can retry with fresh ones.
*/
again:
- nsec = (s64)tv->tv_nsec -
- ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
- if (unlikely(!TIME_VALUES_UP_TO_DATE)) {
+ nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow);
+ if (unlikely(!time_values_up_to_date(cpu))) {
__get_time_values_from_xen();
goto again;
}
@@ -335,7 +449,7 @@
*/
nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
- nsec -= (shadow_system_time - processed_system_time);
+ nsec -= (shadow->system_timestamp - processed_system_time);
__normalize_time(&sec, &nsec);
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
@@ -349,23 +463,20 @@
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
- /* Reset all our running time counts. They make no sense now. */
- last_seen_tv.tv_sec = 0;
- last_update_from_xen = 0;
-
#ifdef CONFIG_XEN_PRIVILEGED_GUEST
if (xen_start_info.flags & SIF_INITDOMAIN) {
dom0_op_t op;
- last_rtc_update = last_update_to_xen = 0;
op.cmd = DOM0_SETTIME;
op.u.settime.secs = xentime.tv_sec;
op.u.settime.usecs = xentime.tv_nsec / NSEC_PER_USEC;
- op.u.settime.system_time = shadow_system_time;
+ op.u.settime.system_time = shadow->system_timestamp;
write_sequnlock_irq(&xtime_lock);
HYPERVISOR_dom0_op(&op);
} else
#endif
write_sequnlock_irq(&xtime_lock);
+
+ put_cpu();
clock_was_set();
return 0;
@@ -403,9 +514,30 @@
*/
unsigned long long monotonic_clock(void)
{
- return cur_timer->monotonic_clock();
+ int cpu = get_cpu();
+ struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+ s64 off;
+ unsigned long flags;
+
+ for ( ; ; ) {
+ off = get_nsec_offset(shadow);
+ if (time_values_up_to_date(cpu))
+ break;
+ write_seqlock_irqsave(&xtime_lock, flags);
+ __get_time_values_from_xen();
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ }
+
+ put_cpu();
+
+ return shadow->system_timestamp + off;
}
EXPORT_SYMBOL(monotonic_clock);
+
+unsigned long long sched_clock(void)
+{
+ return monotonic_clock();
+}
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
unsigned long profile_pc(struct pt_regs *regs)
@@ -427,27 +559,26 @@
static inline void do_timer_interrupt(int irq, void *dev_id,
struct pt_regs *regs)
{
- time_t wtm_sec, sec;
- s64 delta, delta_cpu, nsec;
- long sec_diff, wtm_nsec;
+ s64 delta, delta_cpu;
int cpu = smp_processor_id();
+ struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
do {
__get_time_values_from_xen();
- delta = delta_cpu = (s64)shadow_system_time +
- ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC);
+ delta = delta_cpu =
+ shadow->system_timestamp + get_nsec_offset(shadow);
delta -= processed_system_time;
delta_cpu -= per_cpu(processed_system_time, cpu);
}
- while (!TIME_VALUES_UP_TO_DATE);
+ while (!time_values_up_to_date(cpu));
if (unlikely(delta < 0) || unlikely(delta_cpu < 0)) {
printk("Timer ISR/%d: Time went backwards: "
"delta=%lld cpu_delta=%lld shadow=%lld "
"off=%lld processed=%lld cpu_processed=%lld\n",
- cpu, delta, delta_cpu, shadow_system_time,
- ((s64)cur_timer->get_offset() * (s64)NSEC_PER_USEC),
+ cpu, delta, delta_cpu, shadow->system_timestamp,
+ (s64)get_nsec_offset(shadow),
processed_system_time,
per_cpu(processed_system_time, cpu));
for (cpu = 0; cpu < num_online_cpus(); cpu++)
@@ -470,76 +601,6 @@
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING, regs);
}
-
- if (cpu != 0)
- return;
-
- /*
- * Take synchronised time from Xen once a minute if we're not
- * synchronised ourselves, and we haven't chosen to keep an independent
- * time base.
- */
- if (!INDEPENDENT_WALLCLOCK() &&
- ((time_status & STA_UNSYNC) != 0) &&
- (xtime.tv_sec > (last_update_from_xen + 60))) {
- /* Adjust shadow for jiffies that haven't updated xtime yet. */
- shadow_tv.tv_usec -=
- (jiffies - wall_jiffies) * (USEC_PER_SEC / HZ);
- HANDLE_USEC_UNDERFLOW(shadow_tv);
-
- /*
- * Reset our running time counts if they are invalidated by
- * a warp backwards of more than 500ms.
- */
- sec_diff = xtime.tv_sec - shadow_tv.tv_sec;
- if (unlikely(abs(sec_diff) > 1) ||
- unlikely(((sec_diff * USEC_PER_SEC) +
- (xtime.tv_nsec / NSEC_PER_USEC) -
- shadow_tv.tv_usec) > 500000)) {
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- last_rtc_update = last_update_to_xen = 0;
-#endif
- last_seen_tv.tv_sec = 0;
- }
-
- /* Update our unsynchronised xtime appropriately. */
- sec = shadow_tv.tv_sec;
- nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
-
- __normalize_time(&sec, &nsec);
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- last_update_from_xen = sec;
- }
-
-#ifdef CONFIG_XEN_PRIVILEGED_GUEST
- if (!(xen_start_info.flags & SIF_INITDOMAIN))
- return;
-
- /* Send synchronised time to Xen approximately every minute. */
- if (((time_status & STA_UNSYNC) == 0) &&
- (xtime.tv_sec > (last_update_to_xen + 60))) {
- dom0_op_t op;
- struct timeval tv;
-
- tv.tv_sec = xtime.tv_sec;
- tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
- tv.tv_usec += (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ);
- HANDLE_USEC_OVERFLOW(tv);
-
- op.cmd = DOM0_SETTIME;
- op.u.settime.secs = tv.tv_sec;
- op.u.settime.usecs = tv.tv_usec;
- op.u.settime.system_time = shadow_system_time;
- HYPERVISOR_dom0_op(&op);
-
- last_update_to_xen = xtime.tv_sec;
- }
-#endif
}
/*
@@ -731,12 +792,10 @@
xtime.tv_nsec = shadow_tv.tv_usec * NSEC_PER_USEC;
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- processed_system_time = shadow_system_time;
+ processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
per_cpu(processed_system_time, 0) = processed_system_time;
- if (timer_tsc_init.init(NULL) != 0)
- BUG();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
+ init_cpu_khz();
#if defined(__x86_64__)
vxtime.mode = VXTIME_TSC;
@@ -807,21 +866,15 @@
/* No locking required. We are only CPU running, and interrupts are off. */
void time_resume(void)
{
- if (timer_tsc_init.init(NULL) != 0)
- BUG();
+ init_cpu_khz();
/* Get timebases for new environment. */
__get_time_values_from_xen();
/* Reset our own concept of passage of system time. */
- processed_system_time = shadow_system_time;
+ processed_system_time =
+ per_cpu(shadow_time, smp_processor_id()).system_timestamp;
per_cpu(processed_system_time, 0) = processed_system_time;
-
- /* Accept a warp in UTC (wall-clock) time. */
- last_seen_tv.tv_sec = 0;
-
- /* Make sure we resync UTC time with Xen on next timer interrupt. */
- last_update_from_xen = 0;
}
#ifdef CONFIG_SMP
@@ -832,7 +885,8 @@
do {
seq = read_seqbegin(&xtime_lock);
- per_cpu(processed_system_time, cpu) = shadow_system_time;
+ per_cpu(processed_system_time, cpu) =
+ per_cpu(shadow_time, cpu).system_timestamp;
} while (read_seqretry(&xtime_lock, seq));
per_cpu(timer_irq, cpu) = bind_virq_to_irq(VIRQ_TIMER);
@@ -861,3 +915,13 @@
return 0;
}
__initcall(xen_sysctl_init);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|